Fixing copyright issues and code contributors
[alexxy/gromacs.git] / src / kernel / repl_ex.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #include <math.h>
43 #include "repl_ex.h"
44 #include "network.h"
45 #include "random.h"
46 #include "smalloc.h"
47 #include "physics.h"
48 #include "copyrite.h"
49 #include "macros.h"
50 #include "vec.h"
51 #include "names.h"
52 #include "mvdata.h"
53 #include "domdec.h"
54 #include "partdec.h"
55
56 #define PROBABILITYCUTOFF 100
57 /* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */
58
59 enum { ereTEMP, ereLAMBDA, ereENDSINGLE ,ereTL, ereNR };
60 const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"};
61 /* end_single_marker merely notes the end of single variable replica exchange. All types higher than
62    it are multiple replica exchange methods */
63 /* Eventually, should add 'pressure', 'temperature and pressure', 'lambda_and_pressure', 'temperature_lambda_pressure'?;
64    Let's wait until we feel better about the pressure control methods giving exact ensembles.  Right now, we assume constant pressure  */
65
66 typedef struct gmx_repl_ex
67 {
68     int  repl;
69     int  nrepl;
70     real temp;
71     int  type;
72     real **q;
73     gmx_bool bNPT;
74     real *pres;
75     int  *ind;
76     int *allswaps;
77     int  nst;
78     int nex;
79     int  seed;
80     int  nattempt[2];
81     real *prob_sum;
82     int  **nmoves;
83     int  *nexchange;
84
85     /* these are helper arrays for replica exchange; allocated here so they
86        don't have to be allocated each time */
87     int *destinations;
88     int **cyclic;
89     int **order;
90     int *tmpswap;
91     gmx_bool *incycle;
92     gmx_bool *bEx;
93
94     /* helper arrays to hold the quantities that are exchanged */
95     real *prob;
96     real *Epot;
97     real *beta;
98     real *Vol;
99     real **de;
100
101 } t_gmx_repl_ex;
102
103 static gmx_bool repl_quantity(FILE *fplog,const gmx_multisim_t *ms,
104                               struct gmx_repl_ex *re,int ere,real q)
105 {
106     real *qall;
107     gmx_bool bDiff;
108     int  i,s;
109
110     snew(qall,ms->nsim);
111     qall[re->repl] = q;
112     gmx_sum_sim(ms->nsim,qall,ms);
113
114     bDiff = FALSE;
115     for (s=1; s<ms->nsim; s++)
116     {
117         if (qall[s] != qall[0])
118         {
119             bDiff = TRUE;   
120         }
121     }
122
123     if (bDiff)
124     {
125         /* Set the replica exchange type and quantities */
126         re->type = ere;
127
128         snew(re->q[ere],re->nrepl);
129         for(s=0; s<ms->nsim; s++)
130         {
131             re->q[ere][s] = qall[s];
132         }
133     }
134     sfree(qall);
135     return bDiff;
136 }
137
138 gmx_repl_ex_t init_replica_exchange(FILE *fplog,
139                                     const gmx_multisim_t *ms,
140                                     const t_state *state,
141                                     const t_inputrec *ir,
142                                     int nst, int nex, int init_seed)
143 {
144     real temp,pres;
145     int  i,j,k;
146     struct gmx_repl_ex *re;
147     gmx_bool bTemp;
148     gmx_bool bLambda=FALSE;
149
150     fprintf(fplog,"\nInitializing Replica Exchange\n");
151
152     if (ms == NULL || ms->nsim == 1)
153     {
154         gmx_fatal(FARGS,"Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?");
155     }
156
157     snew(re,1);
158
159     re->repl     = ms->sim;
160     re->nrepl    = ms->nsim;
161     snew(re->q,ereENDSINGLE);
162
163     fprintf(fplog,"Repl  There are %d replicas:\n",re->nrepl);
164
165     check_multi_int(fplog,ms,state->natoms,"the number of atoms",FALSE);
166     check_multi_int(fplog,ms,ir->eI,"the integrator",FALSE);
167     check_multi_large_int(fplog,ms,ir->init_step+ir->nsteps,"init_step+nsteps",FALSE);
168     check_multi_large_int(fplog,ms,(ir->init_step+nst-1)/nst,
169                           "first exchange step: init_step/-replex",FALSE);
170     check_multi_int(fplog,ms,ir->etc,"the temperature coupling",FALSE);
171     check_multi_int(fplog,ms,ir->opts.ngtc,
172                     "the number of temperature coupling groups",FALSE);
173     check_multi_int(fplog,ms,ir->epc,"the pressure coupling",FALSE);
174     check_multi_int(fplog,ms,ir->efep,"free energy",FALSE);
175     check_multi_int(fplog,ms,ir->fepvals->n_lambda,"number of lambda states",FALSE);
176
177     re->temp = ir->opts.ref_t[0];
178     for(i=1; (i<ir->opts.ngtc); i++)
179     {
180         if (ir->opts.ref_t[i] != re->temp)
181         {
182             fprintf(fplog,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
183             fprintf(stderr,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
184         }
185     }
186
187     re->type = -1;
188     bTemp = repl_quantity(fplog,ms,re,ereTEMP,re->temp);
189     if (ir->efep != efepNO)
190     {
191         bLambda = repl_quantity(fplog,ms,re,ereLAMBDA,(real)ir->fepvals->init_fep_state);
192     }
193     if (re->type == -1)  /* nothing was assigned */
194     {
195         gmx_fatal(FARGS,"The properties of the %d systems are all the same, there is nothing to exchange",re->nrepl);
196     }
197     if (bLambda && bTemp) {
198         re->type = ereTL;
199     }
200
201     if (bTemp)
202     {
203         please_cite(fplog,"Sugita1999a");
204         if (ir->epc != epcNO)
205         {
206             re->bNPT = TRUE;
207             fprintf(fplog,"Repl  Using Constant Pressure REMD.\n");
208             please_cite(fplog,"Okabe2001a");
209         }
210         if (ir->etc == etcBERENDSEN)
211         {
212             gmx_fatal(FARGS,"REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead",
213                       ETCOUPLTYPE(ir->etc),ETCOUPLTYPE(etcVRESCALE));
214         }
215     }
216     if (bLambda) {
217         if (ir->fepvals->delta_lambda != 0)   /* check this? */
218         {
219             gmx_fatal(FARGS,"delta_lambda is not zero");
220         }
221     }
222     if (re->bNPT)
223     {
224         snew(re->pres,re->nrepl);
225         if (ir->epct == epctSURFACETENSION)
226         {
227             pres = ir->ref_p[ZZ][ZZ];
228         }
229         else
230         {
231             pres = 0;
232             j = 0;
233             for(i=0; i<DIM; i++)
234             {
235                 if (ir->compress[i][i] != 0)
236                 {
237                     pres += ir->ref_p[i][i];
238                     j++;
239                 }
240             }
241             pres /= j;
242         }
243         re->pres[re->repl] = pres;
244         gmx_sum_sim(re->nrepl,re->pres,ms);
245     }
246
247     /* Make an index for increasing replica order */
248     /* only makes sense if one or the other is varying, not both!
249        if both are varying, we trust the order the person gave. */
250     snew(re->ind,re->nrepl);
251     for(i=0; i<re->nrepl; i++)
252     {
253         re->ind[i] = i;
254     }
255
256     if (re->type<ereENDSINGLE) {
257
258         for(i=0; i<re->nrepl; i++)
259         {
260             for(j=i+1; j<re->nrepl; j++)
261             {
262                 if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]])
263                 {
264                     k = re->ind[i];
265                     re->ind[i] = re->ind[j];
266                     re->ind[j] = k;
267                 }
268                 else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]])
269                 {
270                     gmx_fatal(FARGS,"Two replicas have identical %ss",erename[re->type]);
271                 }
272             }
273         }
274     }
275
276     /* keep track of all the swaps, starting with the initial placement. */
277     snew(re->allswaps,re->nrepl);
278     for(i=0; i<re->nrepl; i++)
279     {
280         re->allswaps[i] = re->ind[i];
281     }
282
283     switch (re->type)
284     {
285     case ereTEMP:
286         fprintf(fplog,"\nReplica exchange in temperature\n");
287         for(i=0; i<re->nrepl; i++)
288         {
289             fprintf(fplog," %5.1f",re->q[re->type][re->ind[i]]);
290         }
291         fprintf(fplog,"\n");
292         break;
293     case ereLAMBDA:
294         fprintf(fplog,"\nReplica exchange in lambda\n");
295         for(i=0; i<re->nrepl; i++)
296         {
297             fprintf(fplog," %3d",(int)re->q[re->type][re->ind[i]]);
298         }
299         fprintf(fplog,"\n");
300         break;
301     case ereTL:
302         fprintf(fplog,"\nReplica exchange in temperature and lambda state\n");
303         for(i=0; i<re->nrepl; i++)
304         {
305             fprintf(fplog," %5.1f",re->q[ereTEMP][re->ind[i]]);
306         }
307         fprintf(fplog,"\n");
308         for(i=0; i<re->nrepl; i++)
309         {
310             fprintf(fplog," %5d",(int)re->q[ereLAMBDA][re->ind[i]]);
311         }
312         fprintf(fplog,"\n");
313         break;
314     default:
315         gmx_incons("Unknown replica exchange quantity");
316     }
317     if (re->bNPT)
318     {
319         fprintf(fplog,"\nRepl  p");
320         for(i=0; i<re->nrepl; i++)
321         {
322             fprintf(fplog," %5.2f",re->pres[re->ind[i]]);
323         }
324
325         for(i=0; i<re->nrepl; i++)
326         {
327             if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]]))
328             {
329                 fprintf(fplog,"\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
330                 fprintf(stderr,"\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
331             }
332         }
333     }
334     re->nst = nst;
335     if (init_seed == -1)
336     {
337         if (MASTERSIM(ms))
338         {
339             re->seed = make_seed();
340         }
341         else
342         {
343             re->seed = 0;
344         }
345         gmx_sumi_sim(1,&(re->seed),ms);
346     }
347     else
348     {
349         re->seed = init_seed;
350     }
351     fprintf(fplog,"\nReplica exchange interval: %d\n",re->nst);
352     fprintf(fplog,"\nReplica random seed: %d\n",re->seed);
353
354     re->nattempt[0] = 0;
355     re->nattempt[1] = 0;
356
357     snew(re->prob_sum,re->nrepl);
358     snew(re->nexchange,re->nrepl);
359     snew(re->nmoves,re->nrepl);
360     for (i=0;i<re->nrepl;i++) 
361     {
362         snew(re->nmoves[i],re->nrepl);
363     }
364     fprintf(fplog,"Replica exchange information below: x=exchange, pr=probability\n");
365
366     /* generate space for the helper functions so we don't have to snew each time */
367
368     snew(re->destinations,re->nrepl);
369     snew(re->incycle,re->nrepl);
370     snew(re->tmpswap,re->nrepl);
371     snew(re->cyclic,re->nrepl);
372     snew(re->order,re->nrepl);
373     for (i=0;i<re->nrepl;i++)
374     {
375         snew(re->cyclic[i],re->nrepl);
376         snew(re->order[i],re->nrepl);
377     }
378     /* allocate space for the functions storing the data for the replicas */
379     /* not all of these arrays needed in all cases, but they don't take
380        up much space, since the max size is nrepl**2 */
381     snew(re->prob,re->nrepl);
382     snew(re->bEx,re->nrepl);
383     snew(re->beta,re->nrepl);
384     snew(re->Vol,re->nrepl);
385     snew(re->Epot,re->nrepl);
386     snew(re->de,re->nrepl);
387     for (i=0;i<re->nrepl;i++)
388     {
389         snew(re->de[i],re->nrepl);
390     }
391     re->nex = nex;
392     return re;
393 }
394
395 static void exchange_reals(const gmx_multisim_t *ms,int b,real *v,int n)
396 {
397     real *buf;
398     int  i;
399
400     if (v)
401     {
402         snew(buf,n);
403 #ifdef GMX_MPI
404         /*
405           MPI_Sendrecv(v,  n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
406           buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
407           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
408         */
409         {
410             MPI_Request mpi_req;
411
412             MPI_Isend(v,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
413                       ms->mpi_comm_masters,&mpi_req);
414             MPI_Recv(buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
415                      ms->mpi_comm_masters,MPI_STATUS_IGNORE);
416             MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
417         }
418 #endif
419         for(i=0; i<n; i++)
420         {
421             v[i] = buf[i];
422         }
423         sfree(buf);
424     }
425 }
426
427
428 static void exchange_ints(const gmx_multisim_t *ms,int b,int *v,int n)
429 {
430   int *buf;
431   int  i;
432
433   if (v) {
434     snew(buf,n);
435 #ifdef GMX_MPI
436     /*
437     MPI_Sendrecv(v,  n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
438                  buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
439                  ms->mpi_comm_masters,MPI_STATUS_IGNORE);
440     */
441     {
442       MPI_Request mpi_req;
443
444       MPI_Isend(v,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
445                 ms->mpi_comm_masters,&mpi_req);
446       MPI_Recv(buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
447                ms->mpi_comm_masters,MPI_STATUS_IGNORE);
448       MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
449     }
450 #endif
451     for(i=0; i<n; i++) 
452     {
453         v[i] = buf[i];
454     }
455     sfree(buf);
456   }
457 }
458
459 static void exchange_doubles(const gmx_multisim_t *ms,int b,double *v,int n)
460 {
461     double *buf;
462     int  i;
463
464     if (v)
465     {
466         snew(buf,n);
467 #ifdef GMX_MPI
468         /*
469           MPI_Sendrecv(v,  n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
470           buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
471           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
472         */
473         {
474             MPI_Request mpi_req;
475
476             MPI_Isend(v,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
477                       ms->mpi_comm_masters,&mpi_req);
478             MPI_Recv(buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
479                      ms->mpi_comm_masters,MPI_STATUS_IGNORE);
480             MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
481         }
482 #endif
483         for(i=0; i<n; i++)
484         {
485             v[i] = buf[i];
486         }
487         sfree(buf);
488     }
489 }
490
491 static void exchange_rvecs(const gmx_multisim_t *ms,int b,rvec *v,int n)
492 {
493     rvec *buf;
494     int  i;
495   
496     if (v)
497     {
498         snew(buf,n);
499 #ifdef GMX_MPI
500         /*
501           MPI_Sendrecv(v[0],  n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
502           buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
503           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
504         */
505         {
506             MPI_Request mpi_req;
507
508             MPI_Isend(v[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
509                       ms->mpi_comm_masters,&mpi_req);
510             MPI_Recv(buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
511                      ms->mpi_comm_masters,MPI_STATUS_IGNORE);
512             MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
513         }
514 #endif
515         for(i=0; i<n; i++)
516         {
517             copy_rvec(buf[i],v[i]);
518         }
519         sfree(buf);
520     }
521 }
522
523 static void exchange_state(const gmx_multisim_t *ms,int b,t_state *state)
524 {
525     /* When t_state changes, this code should be updated. */
526     int ngtc,nnhpres;
527     ngtc = state->ngtc * state->nhchainlength;
528     nnhpres = state->nnhpres* state->nhchainlength;
529     exchange_rvecs(ms,b,state->box,DIM);
530     exchange_rvecs(ms,b,state->box_rel,DIM);
531     exchange_rvecs(ms,b,state->boxv,DIM);
532     exchange_reals(ms,b,&(state->veta),1);
533     exchange_reals(ms,b,&(state->vol0),1);
534     exchange_rvecs(ms,b,state->svir_prev,DIM);
535     exchange_rvecs(ms,b,state->fvir_prev,DIM);
536     exchange_rvecs(ms,b,state->pres_prev,DIM);
537     exchange_doubles(ms,b,state->nosehoover_xi,ngtc);
538     exchange_doubles(ms,b,state->nosehoover_vxi,ngtc);
539     exchange_doubles(ms,b,state->nhpres_xi,nnhpres);
540     exchange_doubles(ms,b,state->nhpres_vxi,nnhpres);
541     exchange_doubles(ms,b,state->therm_integral,state->ngtc);
542     exchange_rvecs(ms,b,state->x,state->natoms);
543     exchange_rvecs(ms,b,state->v,state->natoms);
544     exchange_rvecs(ms,b,state->sd_X,state->natoms);
545 }
546
547 static void copy_rvecs(rvec *s,rvec *d,int n)
548 {
549     int i;
550
551     if (d != NULL)
552     {
553         for(i=0; i<n; i++)
554         {
555             copy_rvec(s[i],d[i]);
556         }
557     }
558 }
559
560 static void copy_doubles(const double *s,double *d,int n)
561 {
562     int i;
563
564     if (d != NULL)
565     {
566         for(i=0; i<n; i++)
567         {
568             d[i] = s[i];
569         }
570     }
571 }
572
573 static void copy_reals(const real *s,real *d,int n)
574 {
575     int i;
576
577     if (d != NULL)
578     {
579         for(i=0; i<n; i++)
580         {
581             d[i] = s[i];
582         }
583     }
584 }
585
586 static void copy_ints(const int *s,int *d,int n)
587 {
588     int i;
589
590     if (d != NULL)
591     {
592         for(i=0; i<n; i++)
593         {
594             d[i] = s[i];
595         }
596     }
597 }
598
599 #define scopy_rvecs(v,n)   copy_rvecs(state->v,state_local->v,n);
600 #define scopy_doubles(v,n) copy_doubles(state->v,state_local->v,n);
601 #define scopy_reals(v,n) copy_reals(state->v,state_local->v,n);
602 #define scopy_ints(v,n)   copy_ints(state->v,state_local->v,n);
603
604 static void copy_state_nonatomdata(t_state *state,t_state *state_local)
605 {
606     /* When t_state changes, this code should be updated. */
607     int ngtc,nnhpres;
608     ngtc = state->ngtc * state->nhchainlength;
609     nnhpres = state->nnhpres* state->nhchainlength;
610     scopy_rvecs(box,DIM);
611     scopy_rvecs(box_rel,DIM);
612     scopy_rvecs(boxv,DIM);
613     state_local->veta = state->veta;
614     state_local->vol0 = state->vol0;
615     scopy_rvecs(svir_prev,DIM);
616     scopy_rvecs(fvir_prev,DIM);
617     scopy_rvecs(pres_prev,DIM);
618     scopy_doubles(nosehoover_xi,ngtc);
619     scopy_doubles(nosehoover_vxi,ngtc);
620     scopy_doubles(nhpres_xi,nnhpres);
621     scopy_doubles(nhpres_vxi,nnhpres);
622     scopy_doubles(therm_integral,state->ngtc);
623     scopy_rvecs(x,state->natoms);
624     scopy_rvecs(v,state->natoms);
625     scopy_rvecs(sd_X,state->natoms);
626     copy_ints(&(state->fep_state),&(state_local->fep_state),1);
627     scopy_reals(lambda,efptNR);
628 }
629
630 static void scale_velocities(t_state *state,real fac)
631 {
632     int i;
633
634     if (state->v)
635     {
636         for(i=0; i<state->natoms; i++)
637         {
638             svmul(fac,state->v[i],state->v[i]);
639         }
640     }
641 }
642
643 static void pd_collect_state(const t_commrec *cr,t_state *state)
644 {
645     int shift;
646   
647     if (debug)
648     {
649         fprintf(debug,"Collecting state before exchange\n");
650     }
651     shift = cr->nnodes - cr->npmenodes - 1;
652     move_rvecs(cr,FALSE,FALSE,GMX_LEFT,GMX_RIGHT,state->x,NULL,shift,NULL);
653     if (state->v)
654     {
655         move_rvecs(cr,FALSE,FALSE,GMX_LEFT,GMX_RIGHT,state->v,NULL,shift,NULL);
656     }
657     if (state->sd_X)
658     {
659         move_rvecs(cr,FALSE,FALSE,GMX_LEFT,GMX_RIGHT,state->sd_X,NULL,shift,NULL);
660     }
661 }
662
663 static void print_transition_matrix(FILE *fplog,const char *leg,int n,int **nmoves, int *nattempt)
664 {
665     int i,j,ntot;
666     float Tprint;
667
668     ntot = nattempt[0] + nattempt[1];
669     fprintf(fplog,"\n");
670     fprintf(fplog,"Repl");
671     for (i=0;i<n;i++)
672     {
673         fprintf(fplog,"    ");  /* put the title closer to the center */
674     }
675     fprintf(fplog,"Empirical Transition Matrix\n");
676
677     fprintf(fplog,"Repl");
678     for (i=0;i<n;i++)
679     {
680         fprintf(fplog,"%8d",(i+1));
681     }
682     fprintf(fplog,"\n");
683
684     for (i=0;i<n;i++)
685     {
686         fprintf(fplog,"Repl");
687         for (j=0;j<n;j++)
688         {
689             Tprint = 0.0;
690             if (nmoves[i][j] > 0)
691             {
692                 Tprint = nmoves[i][j]/(2.0*ntot);
693             }
694             fprintf(fplog,"%8.4f",Tprint);
695         }
696         fprintf(fplog,"%3d\n",i);
697     }
698 }
699
700 static void print_ind(FILE *fplog,const char *leg,int n,int *ind,gmx_bool *bEx)
701 {
702     int i;
703
704     fprintf(fplog,"Repl %2s %2d",leg,ind[0]);
705     for(i=1; i<n; i++)
706     {
707         fprintf(fplog," %c %2d",(bEx!=0 && bEx[i]) ? 'x' : ' ',ind[i]);
708     }
709     fprintf(fplog,"\n");
710 }
711
712 static void print_allswitchind(FILE *fplog,int n,int *ind,int *pind, int *allswaps, int *tmpswap)
713 {
714     int i;
715
716     for (i=0;i<n;i++)
717     {
718         tmpswap[i] = allswaps[i];
719     }
720     for (i=0;i<n;i++)
721     {
722         allswaps[i] = tmpswap[pind[i]];
723     }
724
725     fprintf(fplog,"\nAccepted Exchanges:   ");
726     for (i=0;i<n;i++)
727     {
728         fprintf(fplog,"%d ",pind[i]);
729     }
730     fprintf(fplog,"\n");
731
732     /* the "Order After Exchange" is the state label corresponding to the configuration that
733        started in state listed in order, i.e.
734
735        3 0 1 2
736
737        means that the:
738        configuration starting in simulation 3 is now in simulation 0,
739        configuration starting in simulation 0 is now in simulation 1,
740        configuration starting in simulation 1 is now in simulation 2,
741        configuration starting in simulation 2 is now in simulation 3
742      */
743     fprintf(fplog,"Order After Exchange: ");
744     for (i=0;i<n;i++)
745     {
746         fprintf(fplog,"%d ",allswaps[i]);
747     }
748     fprintf(fplog,"\n\n");
749 }
750
751 static void print_prob(FILE *fplog,const char *leg,int n,real *prob)
752 {
753     int  i;
754     char buf[8];
755   
756     fprintf(fplog,"Repl %2s ",leg);
757     for(i=1; i<n; i++)
758     {
759         if (prob[i] >= 0)
760         {
761             sprintf(buf,"%4.2f",prob[i]);
762             fprintf(fplog,"  %3s",buf[0]=='1' ? "1.0" : buf+1);
763         }
764         else
765         {
766             fprintf(fplog,"     ");
767         }
768     }
769     fprintf(fplog,"\n");
770 }
771
772 static void print_count(FILE *fplog,const char *leg,int n,int *count)
773 {
774     int i;
775
776     fprintf(fplog,"Repl %2s ",leg);
777     for(i=1; i<n; i++)
778     {
779         fprintf(fplog," %4d",count[i]);
780     }
781     fprintf(fplog,"\n");
782 }
783
784 static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, int a, int b, int ap, int bp) {
785
786     real ediff,dpV,delta=0;
787     real *Epot = re->Epot;
788     real *Vol = re->Vol;
789     real **de = re->de;
790     real *beta = re->beta;
791
792     /* Two cases; we are permuted and not.  In all cases, setting ap = a and bp = b will reduce
793        to the non permuted case */
794
795     switch (re->type)
796     {
797     case ereTEMP:
798         /*
799          * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439
800          */
801         ediff = Epot[b] - Epot[a];
802         delta = -(beta[bp] - beta[ap])*ediff;
803         break;
804     case ereLAMBDA:
805         /* two cases:  when we are permuted, and not.  */
806         /* non-permuted:
807            ediff =  E_new - E_old
808                  =  [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)]
809                  =  [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)]
810                  =  de[b][a] + de[a][b] */
811
812         /* permuted:
813            ediff =  E_new - E_old
814                  =  [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)]
815                  =  [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)]
816                  =  [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)]
817                  =  [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)]
818                  =  (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b])    */
819         /* but, in the current code implementation, we flip configurations, not indices . . .
820            So let's examine that.
821                  =  [H_b(x_ap) - H_a(x_a)] - [H_a(x_ap) - H_a(x_a)] + [H_a(x_bp) - H_b(x_b)] - H_b(x_bp) - H_b(x_b)]
822                  =  [H_b(x_ap) - H_a(x_ap)]  + [H_a(x_bp) - H_b(x_pb)]
823                  = (de[b][ap] - de[a][ap]) + (de[a][bp] - de[b][bp]
824                  So, if we exchange b<=> bp and a<=> ap, we return to the same result.
825                  So the simple solution is to flip the
826                  position of perturbed and original indices in the tests.
827         */
828
829         ediff = (de[bp][a] - de[ap][a]) + (de[ap][b] - de[bp][b]);
830         delta = ediff*beta[a]; /* assume all same temperature in this case */
831         break;
832     case ereTL:
833         /* not permuted:  */
834         /* delta =  reduced E_new - reduced E_old
835                  =  [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)]
836                  =  [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)]
837                  =  [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] +
838                     [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)]
839                  =  [beta_b dH_b(x_a) + [beta_a dH_a(x_b) +
840                     beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b))
841                  =  beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */
842         /* delta = beta[b]*de[b][a] + beta[a]*de[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */
843         /* permuted (big breath!) */
844         /*   delta =  reduced E_new - reduced E_old
845                  =  [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)]
846                  =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
847                  =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
848                     - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a)
849                     - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
850                  =  [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] +
851                     [(beta_ap H_ap(x_b)  - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))]
852                     + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
853                  =  [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] +
854                     [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))]
855                     + beta_pb (H_a(x_a) - H_b(x_b))  - beta_ap (H_a(x_a) - H_b(x_b))
856                  =  ([beta_bp de[bp][a] - beta_ap de[ap][a]) + beta_ap de[ap][b]  - beta_bp de[bp][b])
857                  + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b))  */
858         delta = beta[bp]*(de[bp][a] - de[bp][b]) + beta[ap]*(de[ap][b] - de[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]);
859         break;
860     default:
861         gmx_incons("Unknown replica exchange quantity");
862     }
863     if (bPrint)
864     {
865         fprintf(fplog,"Repl %d <-> %d  dE_term = %10.3e (kT)\n",a,b,delta);
866     }
867     if (re->bNPT)
868     {
869         /* revist the calculation for 5.0.  Might be some improvements. */
870         dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC;
871         if (bPrint) 
872         {
873             fprintf(fplog,"  dpV = %10.3e  d = %10.3e\nb",dpV,delta + dpV);
874         }
875         delta += dpV;
876     }
877     return delta;
878 }
879
880 static void
881 test_for_replica_exchange(FILE *fplog,
882                           const gmx_multisim_t *ms,
883                           struct gmx_repl_ex *re,
884                           gmx_enerdata_t *enerd,
885                           real vol,
886                           gmx_large_int_t step,
887                           real time)
888 {
889     int  m,i,j,a,b,ap,bp,i0,i1,tmp;
890     real ediff=0,delta=0,dpV=0;
891     gmx_bool bPrint,bMultiEx;
892     gmx_bool *bEx = re->bEx;
893     real *prob = re->prob;
894     int *pind = re->destinations; /* permuted index */
895     gmx_bool bEpot=FALSE;
896     gmx_bool bDLambda=FALSE;
897     gmx_bool bVol=FALSE;
898
899     bMultiEx = (re->nex > 1);  /* multiple exchanges at each state */
900     fprintf(fplog,"Replica exchange at step " gmx_large_int_pfmt " time %g\n",step,time);
901
902     if (re->bNPT)
903     {
904         for (i=0;i<re->nrepl;i++)
905         {
906             re->Vol[i] = 0;
907         }
908         bVol = TRUE;
909         re->Vol[re->repl]  = vol;
910     }
911     if ((re->type == ereTEMP || re->type == ereTL))
912     {
913         for (i=0;i<re->nrepl;i++)
914         {
915             re->Epot[i] = 0;
916         }
917         bEpot = TRUE;
918         re->Epot[re->repl] = enerd->term[F_EPOT];
919         /* temperatures of different states*/
920         for (i=0;i<re->nrepl;i++)
921         {
922             re->beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ);
923         }
924     }
925     else
926     {
927         for (i=0;i<re->nrepl;i++)
928         {
929             re->beta[i] = 1.0/(re->temp*BOLTZ);  /* we have a single temperature */
930         }
931     }
932     if (re->type == ereLAMBDA || re->type == ereTL)
933     {
934         bDLambda = TRUE;
935         /* lambda differences. */
936         /* de[i][j] is the energy of the jth simulation in the ith Hamiltonian
937            minus the energy of the jth simulation in the jth Hamiltonian */
938         for (i=0;i<re->nrepl;i++)
939         {
940             for (j=0;j<re->nrepl;j++)
941             {
942                 re->de[i][j] = 0;
943             }
944         }
945         for (i=0;i<re->nrepl;i++)
946         {
947             re->de[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]);
948         }
949     }
950
951     /* now actually do the communication */
952     if (bVol)
953     {
954         gmx_sum_sim(re->nrepl,re->Vol,ms);
955     }
956     if (bEpot)
957     {
958         gmx_sum_sim(re->nrepl,re->Epot,ms);
959     }
960     if (bDLambda)
961     {
962         for (i=0;i<re->nrepl;i++)
963         {
964             gmx_sum_sim(re->nrepl,re->de[i],ms);
965         }
966     }
967
968     /* make a duplicate set of indices for shuffling */
969     for(i=0;i<re->nrepl;i++)
970     {
971         pind[i] = re->ind[i];
972     }
973
974     if (bMultiEx)
975     {
976         /* multiple random switch exchange */
977         for (i=0;i<re->nex;i++)
978         {
979             /* randomly select a pair  */
980             /* in theory, could reduce this by identifying only which switches had a nonneglibible
981                probability of occurring (log p > -100) and only operate on those switches */
982             /* find out which state it is from, and what label that state currently has. Likely
983                more work that useful. */
984             i0 = (int)(re->nrepl*rando(&(re->seed)));
985             i1 = (int)(re->nrepl*rando(&(re->seed)));
986             if (i0==i1)
987             {
988                 i--;
989                 continue;  /* self-exchange, back up and do it again */
990             }
991
992             a = re->ind[i0]; /* what are the indices of these states? */
993             b = re->ind[i1];
994             ap = pind[i0];
995             bp = pind[i1];
996
997             bPrint = FALSE; /* too noisy */
998             /* calculate the energy difference */
999             /* if the code changes to flip the STATES, rather than the configurations,
1000                use the commented version of the code */
1001             /* delta = calc_delta(fplog,bPrint,re,a,b,ap,bp); */
1002             delta = calc_delta(fplog,bPrint,re,ap,bp,a,b);
1003
1004             /* we actually only use the first space in the prob and bEx array,
1005                since there are actually many switches between pairs. */
1006
1007             if (delta <= 0)
1008             {
1009                 /* accepted */
1010                 prob[0] = 1;
1011                 bEx[0] = TRUE;
1012             }
1013             else
1014             {
1015                 if (delta > PROBABILITYCUTOFF)
1016                 {
1017                     prob[0] = 0;
1018                 }
1019                 else
1020                 {
1021                     prob[0] = exp(-delta);
1022                 }
1023                 /* roll a number to determine if accepted */
1024                 bEx[0] = (rando(&(re->seed)) < prob[0]);
1025             }
1026             re->prob_sum[0] += prob[0];
1027
1028             if (bEx[0])
1029             {
1030                 /* swap the states */
1031                 tmp = pind[i0];
1032                 pind[i0] = pind[i1];
1033                 pind[i1] = tmp;
1034             }
1035         }
1036         re->nattempt[0]++;  /* keep track of total permutation trials here */
1037         print_allswitchind(fplog,re->nrepl,re->ind,pind,re->allswaps,re->tmpswap);
1038     }
1039     else
1040     {
1041         /* standard nearest neighbor replica exchange */
1042         m = (step / re->nst) % 2;
1043         for(i=1; i<re->nrepl; i++)
1044         {
1045             a = re->ind[i-1];
1046             b = re->ind[i];
1047             
1048             bPrint = (re->repl==a || re->repl==b);
1049             if (i % 2 == m)
1050             {
1051                 delta = calc_delta(fplog,bPrint,re,a,b,a,b);
1052                 if (delta <= 0) {
1053                     /* accepted */
1054                     prob[i] = 1;
1055                     bEx[i] = TRUE;
1056                 }
1057                 else
1058                 {
1059                     if (delta > PROBABILITYCUTOFF)
1060                     {
1061                         prob[i] = 0;
1062                     }
1063                     else
1064                     {
1065                         prob[i] = exp(-delta);
1066                     }
1067                     /* roll a number to determine if accepted */
1068                     bEx[i] = (rando(&(re->seed)) < prob[i]);
1069                 }
1070                 re->prob_sum[i] += prob[i];
1071
1072                 if (bEx[i])
1073                 {
1074                     /* swap these two */
1075                     tmp = pind[i-1];
1076                     pind[i-1] = pind[i];
1077                     pind[i] = tmp;
1078                     re->nexchange[i]++;  /* statistics for back compatibility */
1079                 }
1080             }
1081             else
1082             {
1083                 prob[i] = -1;
1084                 bEx[i] = FALSE;
1085             }
1086         }
1087         /* print some statistics */
1088         print_ind(fplog,"ex",re->nrepl,re->ind,bEx);
1089         print_prob(fplog,"pr",re->nrepl,prob);
1090         fprintf(fplog,"\n");
1091         re->nattempt[m]++;
1092     }
1093
1094     /* record which moves were made and accepted */
1095     for (i=0;i<re->nrepl;i++)
1096     {
1097         re->nmoves[re->ind[i]][pind[i]] +=1;
1098         re->nmoves[pind[i]][re->ind[i]] +=1;
1099     }
1100     fflush(fplog); /* make sure we can see what the last exchange was */
1101 }
1102
1103 static void write_debug_x(t_state *state)
1104 {
1105     int i;
1106
1107     if (debug)
1108     {
1109         for(i=0; i<state->natoms; i+=10)
1110         {
1111             fprintf(debug,"dx %5d %10.5f %10.5f %10.5f\n",i,state->x[i][XX],state->x[i][YY],state->x[i][ZZ]);
1112         }
1113     }
1114 }
1115
1116 static void
1117 cyclic_decomposition(FILE *fplog,
1118                      const int *destinations,
1119                      int **cyclic,
1120                      gmx_bool *incycle,
1121                      const int nrepl,
1122                      int *nswap)
1123 {
1124
1125     int i,j,c,p;
1126     int maxlen = 1;
1127     for (i=0;i<nrepl;i++)
1128     {
1129         incycle[i] = FALSE;
1130     }
1131     for (i=0;i<nrepl;i++)  /* one cycle for each replica */
1132     {
1133         if (incycle[i])
1134         {
1135             cyclic[i][0] = -1;
1136             continue;
1137         }
1138         cyclic[i][0] = i;
1139         incycle[i] = TRUE;
1140         c = 1;
1141         p = i;
1142         for (j=0;j<nrepl;j++)  /* potentially all cycles are part, but we will break first */
1143         {
1144             p = destinations[p]; /* start permuting */
1145             if (p==i)
1146             {
1147                 cyclic[i][c] = -1;
1148                 if (c > maxlen)
1149                 {
1150                     maxlen = c;
1151                 }
1152                 break; /* we've reached the original element, the cycle is complete, and we marked the end. */
1153             }
1154             else
1155             {
1156                 cyclic[i][c] = p;  /* each permutation gives a new member of the cycle */
1157                 incycle[p] = TRUE;
1158                 c++;
1159             }
1160         }
1161     }
1162     *nswap = maxlen - 1;
1163
1164     if (debug)
1165     {
1166         for (i=0;i<nrepl;i++)
1167         {
1168             fprintf(debug,"Cycle %d:",i);
1169             for (j=0;j<nrepl;j++)
1170             {
1171                 if (cyclic[i][j] < 0)
1172                 {
1173                     break;
1174                 }
1175                 fprintf(debug,"%2d",cyclic[i][j]);
1176             }
1177             fprintf(debug,"\n");
1178         }
1179         fflush(debug);
1180     }
1181 }
1182
1183 static void
1184 compute_exchange_order(FILE *fplog,
1185                        int **cyclic,
1186                        int **order,
1187                        const int nrepl,
1188                        const int maxswap)
1189 {
1190     int i,j;
1191
1192     for (j=0;j<maxswap;j++)
1193     {
1194         for (i=0;i<nrepl;i++)
1195         {
1196             if (cyclic[i][j+1] >= 0)
1197             {
1198                 order[cyclic[i][j+1]][j] = cyclic[i][j];
1199                 order[cyclic[i][j]][j] = cyclic[i][j+1];
1200             }
1201         }
1202         for (i=0;i<nrepl;i++)
1203         {
1204             if (order[i][j] < 0)
1205             {
1206                 order[i][j] = i; /* if it's not exchanging, it should stay this round*/
1207             }
1208         }
1209     }
1210
1211     if (debug)
1212     {
1213         fprintf(fplog,"Replica Exchange Order\n");
1214         for (i=0;i<nrepl;i++)
1215         {
1216             fprintf(fplog,"Replica %d:",i);
1217             for (j=0;j<maxswap;j++)
1218             {
1219                 if (order[i][j] < 0) break;
1220                 fprintf(debug,"%2d",order[i][j]);
1221             }
1222             fprintf(fplog,"\n");
1223         }
1224         fflush(fplog);
1225     }
1226 }
1227
1228 static void
1229 prepare_to_do_exchange(FILE *fplog,
1230                        const int *destinations,
1231                        const int replica_id,
1232                        const int nrepl,
1233                        int *maxswap,
1234                        int **order,
1235                        int **cyclic,
1236                        int *incycle,
1237                        gmx_bool *bThisReplicaExchanged)
1238 {
1239     int i,j;
1240     /* Hold the cyclic decomposition of the (multiple) replica
1241      * exchange. */
1242     gmx_bool bAnyReplicaExchanged = FALSE;
1243     *bThisReplicaExchanged = FALSE;
1244
1245     for (i = 0; i < nrepl; i++)
1246     {
1247         if (destinations[i] != i) {
1248             /* only mark as exchanged if the index has been shuffled */
1249             bAnyReplicaExchanged = TRUE;
1250             break;
1251         }
1252     }
1253     if (bAnyReplicaExchanged)
1254     {
1255         /* reinitialize the placeholder arrays */
1256         for (i = 0; i < nrepl; i++)
1257         {
1258             for (j = 0; j < nrepl; j++)
1259             {
1260                 cyclic[i][j] = -1;
1261                 order[i][j] = -1;
1262             }
1263         }
1264
1265         /* Identify the cyclic decomposition of the permutation (very
1266          * fast if neighbor replica exchange). */
1267         cyclic_decomposition(fplog,destinations,cyclic,incycle,nrepl,maxswap);
1268
1269         /* Now translate the decomposition into a replica exchange
1270          * order at each step. */
1271         compute_exchange_order(fplog,cyclic,order,nrepl,*maxswap);
1272
1273         /* Did this replica do any exchange at any point? */
1274         for (j = 0; j < *maxswap; j++)
1275         {
1276             if (replica_id != order[replica_id][j])
1277             {
1278                 *bThisReplicaExchanged = TRUE;
1279                 break;
1280             }
1281         }
1282     }
1283 }
1284
1285 gmx_bool replica_exchange(FILE *fplog,const t_commrec *cr,struct gmx_repl_ex *re,
1286                           t_state *state,gmx_enerdata_t *enerd,
1287                           t_state *state_local,gmx_large_int_t step,real time)
1288 {
1289     int i,j;
1290     int replica_id = 0;
1291     int exchange_partner;
1292     int maxswap = 0;
1293     /* Number of rounds of exchanges needed to deal with any multiple
1294      * exchanges. */
1295     /* Where each replica ends up after the exchange attempt(s). */
1296     /* The order in which multiple exchanges will occur. */
1297     gmx_bool bThisReplicaExchanged = FALSE;
1298
1299     if (MASTER(cr))
1300     {
1301         replica_id  = re->repl;
1302         test_for_replica_exchange(fplog,cr->ms,re,enerd,det(state_local->box),step,time);
1303         prepare_to_do_exchange(fplog,re->destinations,replica_id,re->nrepl,&maxswap,
1304                                re->order,re->cyclic,re->incycle,&bThisReplicaExchanged);
1305     }
1306     /* Do intra-simulation broadcast so all processors belonging to
1307      * each simulation know whether they need to participate in
1308      * collecting the state. Otherwise, they might as well get on with
1309      * the next thing to do. */
1310     if (PAR(cr))
1311     {
1312 #ifdef GMX_MPI
1313         MPI_Bcast(&bThisReplicaExchanged,sizeof(gmx_bool),MPI_BYTE,MASTERRANK(cr),
1314                   cr->mpi_comm_mygroup);
1315 #endif
1316     }
1317
1318     if (bThisReplicaExchanged)
1319     {
1320         /* Exchange the states */
1321
1322         if (PAR(cr))
1323         {
1324             /* Collect the global state on the master node */
1325             if (DOMAINDECOMP(cr))
1326             {
1327                 dd_collect_state(cr->dd,state_local,state);
1328             }
1329             else
1330             {
1331                 pd_collect_state(cr,state);
1332             }
1333         }
1334         
1335         if (MASTER(cr))
1336         {
1337             /* There will be only one swap cycle with standard replica
1338              * exchange, but there may be multiple swap cycles if we
1339              * allow multiple swaps. */
1340
1341             for (j = 0; j < maxswap; j++)
1342             {
1343                 exchange_partner = re->order[replica_id][j];
1344
1345                 if (exchange_partner != replica_id)
1346                 {
1347                     /* Exchange the global states between the master nodes */
1348                     if (debug)
1349                     {
1350                         fprintf(debug,"Exchanging %d with %d\n",replica_id,exchange_partner);
1351                     }
1352                     exchange_state(cr->ms,exchange_partner,state);
1353                 }
1354             }
1355             /* For temperature-type replica exchange, we need to scale
1356              * the velocities. */
1357             if (re->type == ereTEMP || re->type == ereTL)
1358             {
1359                 scale_velocities(state,sqrt(re->q[ereTEMP][replica_id]/re->q[ereTEMP][re->destinations[replica_id]]));
1360             }
1361
1362         }
1363
1364         /* With domain decomposition the global state is distributed later */
1365         if (!DOMAINDECOMP(cr))
1366         {
1367             /* Copy the global state to the local state data structure */
1368             copy_state_nonatomdata(state,state_local);
1369             
1370             if (PAR(cr))
1371             {
1372                 bcast_state(cr,state,FALSE);
1373             }
1374         }
1375     }
1376
1377     return bThisReplicaExchanged;
1378 }
1379
1380 void print_replica_exchange_statistics(FILE *fplog,struct gmx_repl_ex *re)
1381 {
1382     int  i;
1383
1384     fprintf(fplog,"\nReplica exchange statistics\n");
1385
1386     if (re->nex == 0)
1387     {
1388         fprintf(fplog,"Repl  %d attempts, %d odd, %d even\n",
1389                 re->nattempt[0]+re->nattempt[1],re->nattempt[1],re->nattempt[0]);
1390
1391         fprintf(fplog,"Repl  average probabilities:\n");
1392         for(i=1; i<re->nrepl; i++)
1393         {
1394             if (re->nattempt[i%2] == 0)
1395             {
1396                 re->prob[i] = 0;
1397             }
1398             else
1399             {
1400                 re->prob[i] =  re->prob_sum[i]/re->nattempt[i%2];
1401             }
1402         }
1403         print_ind(fplog,"",re->nrepl,re->ind,NULL);
1404         print_prob(fplog,"",re->nrepl,re->prob);
1405
1406         fprintf(fplog,"Repl  number of exchanges:\n");
1407         print_ind(fplog,"",re->nrepl,re->ind,NULL);
1408         print_count(fplog,"",re->nrepl,re->nexchange);
1409
1410         fprintf(fplog,"Repl  average number of exchanges:\n");
1411         for(i=1; i<re->nrepl; i++) 
1412         {
1413             if (re->nattempt[i%2] == 0)
1414             {
1415                 re->prob[i] = 0;
1416             }
1417             else
1418             {
1419                 re->prob[i] =  ((real)re->nexchange[i])/re->nattempt[i%2];
1420             }
1421         }
1422         print_ind(fplog,"",re->nrepl,re->ind,NULL);
1423         print_prob(fplog,"",re->nrepl,re->prob);
1424
1425         fprintf(fplog,"\n");
1426     }
1427     /* print the transition matrix */
1428     print_transition_matrix(fplog,"",re->nrepl,re->nmoves,re->nattempt);
1429 }