Merge release-4-5-patches into release-4-6
[alexxy/gromacs.git] / src / kernel / repl_ex.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  *                This source code is part of
5  * 
6  *                 G   R   O   M   A   C   S
7  * 
8  *          GROningen MAchine for Chemical Simulations
9  * 
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  * 
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  * 
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  * 
31  * For more info, check our website at http://www.gromacs.org
32  * 
33  * And Hey:
34  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #include <math.h>
41 #include "repl_ex.h"
42 #include "network.h"
43 #include "random.h"
44 #include "smalloc.h"
45 #include "physics.h"
46 #include "copyrite.h"
47 #include "macros.h"
48 #include "vec.h"
49 #include "names.h"
50 #include "mvdata.h"
51 #include "domdec.h"
52 #include "partdec.h"
53
54 #define PROBABILITYCUTOFF 100
55 /* we don't bother evaluating if events are more rare than exp(-100) = 3.7x10^-44 */
56
57 enum { ereTEMP, ereLAMBDA, ereENDSINGLE ,ereTL, ereNR };
58 const char *erename[ereNR] = { "temperature", "lambda", "end_single_marker", "temperature and lambda"};
59
60 typedef struct gmx_repl_ex
61 {
62     int  repl;
63     int  nrepl;
64     real temp;
65     int  type;
66     real **q;
67     gmx_bool bNPT;
68     real *pres;
69     int  *ind;
70     int *allswaps;
71     int  nst;
72     int nex;
73     int  seed;
74     int  nattempt[2];
75     real *prob_sum;
76     int  **nmoves;
77     int  *nexchange;
78 } t_gmx_repl_ex;
79
80 static gmx_bool repl_quantity(FILE *fplog,const gmx_multisim_t *ms,
81                               struct gmx_repl_ex *re,int ere,real q)
82 {
83     real *qall;
84     gmx_bool bDiff;
85     int  i,s;
86
87     snew(qall,ms->nsim);
88     qall[re->repl] = q;
89     gmx_sum_sim(ms->nsim,qall,ms);
90
91     bDiff = FALSE;
92     for (s=1; s<ms->nsim; s++)
93     {
94         if (qall[s] != qall[0])
95         {
96             bDiff = TRUE;   
97         }
98     }
99
100     if (bDiff)
101     {
102         /* Set the replica exchange type and quantities */
103         re->type = ere;
104
105         snew(re->q[ere],re->nrepl);
106         for(s=0; s<ms->nsim; s++)
107         {
108             re->q[ere][s] = qall[s];
109         }
110     }
111     sfree(qall);
112     return bDiff;
113 }
114
115 gmx_repl_ex_t init_replica_exchange(FILE *fplog,
116                                     const gmx_multisim_t *ms,
117                                     const t_state *state,
118                                     const t_inputrec *ir,
119                                     int nst, int nex, int init_seed)
120 {
121     real temp,pres;
122     int  i,j,k;
123     struct gmx_repl_ex *re;
124     gmx_bool bTemp;
125     gmx_bool bLambda=FALSE;
126
127     fprintf(fplog,"\nInitializing Replica Exchange\n");
128
129     if (ms == NULL || ms->nsim == 1)
130     {
131         gmx_fatal(FARGS,"Nothing to exchange with only one replica, maybe you forgot to set the -multi option of mdrun?");
132     }
133
134     snew(re,1);
135
136     re->repl     = ms->sim;
137     re->nrepl    = ms->nsim;
138     snew(re->q,ereENDSINGLE);
139
140     fprintf(fplog,"Repl  There are %d replicas:\n",re->nrepl);
141
142     check_multi_int(fplog,ms,state->natoms,"the number of atoms");
143     check_multi_int(fplog,ms,ir->eI,"the integrator");
144     check_multi_large_int(fplog,ms,ir->init_step+ir->nsteps,"init_step+nsteps");
145     check_multi_large_int(fplog,ms,(ir->init_step+nst-1)/nst,
146                           "first exchange step: init_step/-replex");
147     check_multi_int(fplog,ms,ir->etc,"the temperature coupling");
148     check_multi_int(fplog,ms,ir->opts.ngtc,
149                     "the number of temperature coupling groups");
150     check_multi_int(fplog,ms,ir->epc,"the pressure coupling");
151     check_multi_int(fplog,ms,ir->efep,"free energy");
152     check_multi_int(fplog,ms,ir->fepvals->n_lambda,"number of lambda states");
153
154     re->temp = ir->opts.ref_t[0];
155     for(i=1; (i<ir->opts.ngtc); i++)
156     {
157         if (ir->opts.ref_t[i] != re->temp)
158         {
159             fprintf(fplog,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
160             fprintf(stderr,"\nWARNING: The temperatures of the different temperature coupling groups are not identical\n\n");
161         }
162     }
163
164     re->type = -1;
165     bTemp = repl_quantity(fplog,ms,re,ereTEMP,re->temp);
166     if (ir->efep != efepNO)
167     {
168         bLambda = repl_quantity(fplog,ms,re,ereLAMBDA,(real)ir->fepvals->init_fep_state);
169     }
170     if (re->type == -1)  /* nothing was assigned */
171     {
172         gmx_fatal(FARGS,"The properties of the %d systems are all the same, there is nothing to exchange",re->nrepl);
173     }
174     if (bLambda && bTemp) {
175         re->type = ereTL;
176     }
177
178     if (bTemp)
179     {
180         please_cite(fplog,"Sugita1999a");
181         if (ir->epc != epcNO)
182         {
183             re->bNPT = TRUE;
184             fprintf(fplog,"Repl  Using Constant Pressure REMD.\n");
185             please_cite(fplog,"Okabe2001a");
186         }
187         if (ir->etc == etcBERENDSEN)
188         {
189             gmx_fatal(FARGS,"REMD with the %s thermostat does not produce correct potential energy distributions, consider using the %s thermostat instead",
190                       ETCOUPLTYPE(ir->etc),ETCOUPLTYPE(etcVRESCALE));
191         }
192     }
193     if (bLambda) {
194         if (ir->fepvals->delta_lambda != 0)   /* check this? */
195         {
196             gmx_fatal(FARGS,"delta_lambda is not zero");
197         }
198     }
199     if (re->bNPT)
200     {
201         snew(re->pres,re->nrepl);
202         if (ir->epct == epctSURFACETENSION)
203         {
204             pres = ir->ref_p[ZZ][ZZ];
205         }
206         else
207         {
208             pres = 0;
209             j = 0;
210             for(i=0; i<DIM; i++)
211             {
212                 if (ir->compress[i][i] != 0)
213                 {
214                     pres += ir->ref_p[i][i];
215                     j++;
216                 }
217             }
218             pres /= j;
219         }
220         re->pres[re->repl] = pres;
221         gmx_sum_sim(re->nrepl,re->pres,ms);
222     }
223
224     /* Make an index for increasing replica order */
225     /* only makes sense if one or the other is varying, not both!
226        if both are varying, we trust the order the person gave. */
227     snew(re->ind,re->nrepl);
228     for(i=0; i<re->nrepl; i++)
229     {
230         re->ind[i] = i;
231     }
232
233     if (re->type<ereENDSINGLE) {
234
235         for(i=0; i<re->nrepl; i++)
236         {
237             for(j=i+1; j<re->nrepl; j++)
238             {
239                 if (re->q[re->type][re->ind[j]] < re->q[re->type][re->ind[i]])
240                 {
241                     k = re->ind[i];
242                     re->ind[i] = re->ind[j];
243                     re->ind[j] = k;
244                 }
245                 else if (re->q[re->type][re->ind[j]] == re->q[re->type][re->ind[i]])
246                 {
247                     gmx_fatal(FARGS,"Two replicas have identical %ss",erename[re->type]);
248                 }
249             }
250         }
251     }
252
253     /* keep track of all the swaps, starting with the initial placement. */
254     snew(re->allswaps,re->nrepl);
255     for(i=0; i<re->nrepl; i++)
256     {
257         re->allswaps[i] = re->ind[i];
258     }
259
260     switch (re->type)
261     {
262     case ereTEMP:
263         fprintf(fplog,"\nReplica exchange in temperature\n");
264         for(i=0; i<re->nrepl; i++)
265         {
266             fprintf(fplog," %5.1f",re->q[re->type][re->ind[i]]);
267         }
268         fprintf(fplog,"\n");
269         break;
270     case ereLAMBDA:
271         fprintf(fplog,"\nReplica exchange in lambda\n");
272         for(i=0; i<re->nrepl; i++)
273         {
274             fprintf(fplog," %3d",(int)re->q[re->type][re->ind[i]]);
275         }
276         fprintf(fplog,"\n");
277         break;
278     case ereTL:
279         fprintf(fplog,"\nReplica exchange in temperature and lambda state\n");
280         for(i=0; i<re->nrepl; i++)
281         {
282             fprintf(fplog," %5.1f",re->q[ereTEMP][re->ind[i]]);
283         }
284         fprintf(fplog,"\n");
285         for(i=0; i<re->nrepl; i++)
286         {
287             fprintf(fplog," %5d",(int)re->q[ereLAMBDA][re->ind[i]]);
288         }
289         fprintf(fplog,"\n");
290         break;
291     default:
292         gmx_incons("Unknown replica exchange quantity");
293     }
294     if (re->bNPT)
295     {
296         fprintf(fplog,"\nRepl  p");
297         for(i=0; i<re->nrepl; i++)
298         {
299             fprintf(fplog," %5.2f",re->pres[re->ind[i]]);
300         }
301
302         for(i=0; i<re->nrepl; i++)
303         {
304             if ((i > 0) && (re->pres[re->ind[i]] < re->pres[re->ind[i-1]]))
305             {
306                 fprintf(fplog,"\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
307                 fprintf(stderr,"\nWARNING: The reference pressures decrease with increasing temperatures\n\n");
308             }
309         }
310     }
311     re->nst = nst;
312     if (init_seed == -1)
313     {
314         if (MASTERSIM(ms))
315         {
316             re->seed = make_seed();
317         }
318         else
319         {
320             re->seed = 0;
321         }
322         gmx_sumi_sim(1,&(re->seed),ms);
323     }
324     else
325     {
326         re->seed = init_seed;
327     }
328     fprintf(fplog,"\nReplica exchange interval: %d\n",re->nst);
329     fprintf(fplog,"\nReplica random seed: %d\n",re->seed);
330
331     re->nattempt[0] = 0;
332     re->nattempt[1] = 0;
333
334     snew(re->prob_sum,re->nrepl);
335     snew(re->nexchange,re->nrepl);
336     snew(re->nmoves,re->nrepl);
337     for (i=0;i<re->nrepl;i++) 
338     {
339         snew(re->nmoves[i],re->nrepl);
340     }
341     fprintf(fplog,"Replica exchange information below: x=exchange, pr=probability\n");
342
343     re->nex = nex;
344     return re;
345 }
346
347 static void exchange_reals(const gmx_multisim_t *ms,int b,real *v,int n)
348 {
349     real *buf;
350     int  i;
351
352     if (v)
353     {
354         snew(buf,n);
355 #ifdef GMX_MPI
356         /*
357           MPI_Sendrecv(v,  n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
358           buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
359           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
360         */
361         {
362             MPI_Request mpi_req;
363
364             MPI_Isend(v,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
365                       ms->mpi_comm_masters,&mpi_req);
366             MPI_Recv(buf,n*sizeof(real),MPI_BYTE,MSRANK(ms,b),0,
367                      ms->mpi_comm_masters,MPI_STATUS_IGNORE);
368             MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
369         }
370 #endif
371         for(i=0; i<n; i++)
372         {
373             v[i] = buf[i];
374         }
375         sfree(buf);
376     }
377 }
378
379
380 static void exchange_ints(const gmx_multisim_t *ms,int b,int *v,int n)
381 {
382   int *buf;
383   int  i;
384
385   if (v) {
386     snew(buf,n);
387 #ifdef GMX_MPI
388     /*
389     MPI_Sendrecv(v,  n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
390                  buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
391                  ms->mpi_comm_masters,MPI_STATUS_IGNORE);
392     */
393     {
394       MPI_Request mpi_req;
395
396       MPI_Isend(v,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
397                 ms->mpi_comm_masters,&mpi_req);
398       MPI_Recv(buf,n*sizeof(int),MPI_BYTE,MSRANK(ms,b),0,
399                ms->mpi_comm_masters,MPI_STATUS_IGNORE);
400       MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
401     }
402 #endif
403     for(i=0; i<n; i++) 
404     {
405         v[i] = buf[i];
406     }
407     sfree(buf);
408   }
409 }
410
411 static void exchange_doubles(const gmx_multisim_t *ms,int b,double *v,int n)
412 {
413     double *buf;
414     int  i;
415
416     if (v)
417     {
418         snew(buf,n);
419 #ifdef GMX_MPI
420         /*
421           MPI_Sendrecv(v,  n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
422           buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
423           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
424         */
425         {
426             MPI_Request mpi_req;
427
428             MPI_Isend(v,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
429                       ms->mpi_comm_masters,&mpi_req);
430             MPI_Recv(buf,n*sizeof(double),MPI_BYTE,MSRANK(ms,b),0,
431                      ms->mpi_comm_masters,MPI_STATUS_IGNORE);
432             MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
433         }
434 #endif
435         for(i=0; i<n; i++)
436         {
437             v[i] = buf[i];
438         }
439         sfree(buf);
440     }
441 }
442
443 static void exchange_rvecs(const gmx_multisim_t *ms,int b,rvec *v,int n)
444 {
445     rvec *buf;
446     int  i;
447   
448     if (v)
449     {
450         snew(buf,n);
451 #ifdef GMX_MPI
452         /*
453           MPI_Sendrecv(v[0],  n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
454           buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
455           ms->mpi_comm_masters,MPI_STATUS_IGNORE);
456         */
457         {
458             MPI_Request mpi_req;
459
460             MPI_Isend(v[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
461                       ms->mpi_comm_masters,&mpi_req);
462             MPI_Recv(buf[0],n*sizeof(rvec),MPI_BYTE,MSRANK(ms,b),0,
463                      ms->mpi_comm_masters,MPI_STATUS_IGNORE);
464             MPI_Wait(&mpi_req,MPI_STATUS_IGNORE);
465         }
466 #endif
467         for(i=0; i<n; i++)
468         {
469             copy_rvec(buf[i],v[i]);
470         }
471         sfree(buf);
472     }
473 }
474
475 static void exchange_state(const gmx_multisim_t *ms,int b,t_state *state)
476 {
477     /* When t_state changes, this code should be updated. */
478     int ngtc,nnhpres;
479     ngtc = state->ngtc * state->nhchainlength;
480     nnhpres = state->nnhpres* state->nhchainlength;
481     exchange_rvecs(ms,b,state->box,DIM);
482     exchange_rvecs(ms,b,state->box_rel,DIM);
483     exchange_rvecs(ms,b,state->boxv,DIM);
484     exchange_reals(ms,b,&(state->veta),1);
485     exchange_reals(ms,b,&(state->vol0),1);
486     exchange_rvecs(ms,b,state->svir_prev,DIM);
487     exchange_rvecs(ms,b,state->fvir_prev,DIM);
488     exchange_rvecs(ms,b,state->pres_prev,DIM);
489     exchange_doubles(ms,b,state->nosehoover_xi,ngtc);
490     exchange_doubles(ms,b,state->nosehoover_vxi,ngtc);
491     exchange_doubles(ms,b,state->nhpres_xi,nnhpres);
492     exchange_doubles(ms,b,state->nhpres_vxi,nnhpres);
493     exchange_doubles(ms,b,state->therm_integral,state->ngtc);
494     exchange_rvecs(ms,b,state->x,state->natoms);
495     exchange_rvecs(ms,b,state->v,state->natoms);
496     exchange_rvecs(ms,b,state->sd_X,state->natoms);
497 }
498
499 static void copy_rvecs(rvec *s,rvec *d,int n)
500 {
501     int i;
502
503     if (d != NULL)
504     {
505         for(i=0; i<n; i++)
506         {
507             copy_rvec(s[i],d[i]);
508         }
509     }
510 }
511
512 static void copy_doubles(const double *s,double *d,int n)
513 {
514     int i;
515
516     if (d != NULL)
517     {
518         for(i=0; i<n; i++)
519         {
520             d[i] = s[i];
521         }
522     }
523 }
524
525 static void copy_reals(const real *s,real *d,int n)
526 {
527     int i;
528
529     if (d != NULL)
530     {
531         for(i=0; i<n; i++)
532         {
533             d[i] = s[i];
534         }
535     }
536 }
537
538 static void copy_ints(const int *s,int *d,int n)
539 {
540     int i;
541
542     if (d != NULL)
543     {
544         for(i=0; i<n; i++)
545         {
546             d[i] = s[i];
547         }
548     }
549 }
550
551 #define scopy_rvecs(v,n)   copy_rvecs(state->v,state_local->v,n);
552 #define scopy_doubles(v,n) copy_doubles(state->v,state_local->v,n);
553 #define scopy_reals(v,n) copy_reals(state->v,state_local->v,n);
554 #define scopy_ints(v,n)   copy_ints(state->v,state_local->v,n);
555
556 static void copy_state_nonatomdata(t_state *state,t_state *state_local)
557 {
558     /* When t_state changes, this code should be updated. */
559     int ngtc,nnhpres;
560     ngtc = state->ngtc * state->nhchainlength;
561     nnhpres = state->nnhpres* state->nhchainlength;
562     scopy_rvecs(box,DIM);
563     scopy_rvecs(box_rel,DIM);
564     scopy_rvecs(boxv,DIM);
565     state_local->veta = state->veta;
566     state_local->vol0 = state->vol0;
567     scopy_rvecs(svir_prev,DIM);
568     scopy_rvecs(fvir_prev,DIM);
569     scopy_rvecs(pres_prev,DIM);
570     scopy_doubles(nosehoover_xi,ngtc);
571     scopy_doubles(nosehoover_vxi,ngtc);
572     scopy_doubles(nhpres_xi,nnhpres);
573     scopy_doubles(nhpres_vxi,nnhpres);
574     scopy_doubles(therm_integral,state->ngtc);
575     scopy_rvecs(x,state->natoms);
576     scopy_rvecs(v,state->natoms);
577     scopy_rvecs(sd_X,state->natoms);
578     copy_ints(&(state->fep_state),&(state_local->fep_state),1);
579     scopy_reals(lambda,efptNR);
580 }
581
582 static void scale_velocities(t_state *state,real fac)
583 {
584     int i;
585
586     if (state->v)
587     {
588         for(i=0; i<state->natoms; i++)
589         {
590             svmul(fac,state->v[i],state->v[i]);
591         }
592     }
593 }
594
595 static void pd_collect_state(const t_commrec *cr,t_state *state)
596 {
597     int shift;
598   
599     if (debug)
600     {
601         fprintf(debug,"Collecting state before exchange\n");
602     }
603     shift = cr->nnodes - cr->npmenodes - 1;
604     move_rvecs(cr,FALSE,FALSE,GMX_LEFT,GMX_RIGHT,state->x,NULL,shift,NULL);
605     if (state->v)
606     {
607         move_rvecs(cr,FALSE,FALSE,GMX_LEFT,GMX_RIGHT,state->v,NULL,shift,NULL);
608     }
609     if (state->sd_X)
610     {
611         move_rvecs(cr,FALSE,FALSE,GMX_LEFT,GMX_RIGHT,state->sd_X,NULL,shift,NULL);
612     }
613 }
614
615 static void print_matrix(FILE *fplog,const char *leg,int n,int **nmoves, int *nattempt)
616 {
617     int i,j,ntot;
618     float Tprint;
619
620     ntot = nattempt[0] + nattempt[1];
621
622     fprintf(fplog,"                  Empirical Transition Matrix\n");
623     for (i=0;i<n;i++)
624     {
625         fprintf(fplog,"%8d",(i+1));
626     }
627     fprintf(fplog,"\n");
628     for (i=0;i<n;i++)
629     {
630         for (j=0;j<n;j++)
631         {
632             Tprint = 0.0;
633             if (nmoves[i][j] > 0)
634             {
635                 Tprint = nmoves[i][j]/(2.0*ntot);
636             }
637             fprintf(fplog,"%8.4f",Tprint);
638         }
639         fprintf(fplog,"%3d\n",i);
640     }
641 }
642
643 static void print_ind(FILE *fplog,const char *leg,int n,int *ind,gmx_bool *bEx)
644 {
645     int i;
646
647     fprintf(fplog,"Repl %2s %2d",leg,ind[0]);
648     for(i=1; i<n; i++)
649     {
650         fprintf(fplog," %c %2d",(bEx!=0 && bEx[i]) ? 'x' : ' ',ind[i]);
651     }
652     fprintf(fplog,"\n");
653 }
654
655 static void print_allswitchind(FILE *fplog,int n,int *ind,int *pind, int *allswaps)
656 {
657     int i;
658     int *tmpswap;
659
660     snew(tmpswap,n); /* need to save the data */
661     for (i=0;i<n;i++)
662     {
663         tmpswap[i] = allswaps[i];
664     }
665     for (i=0;i<n;i++)
666     {
667         allswaps[i] = tmpswap[pind[i]];
668     }
669
670     fprintf(fplog,"\nAccepted Exchanges:   ");
671     for (i=0;i<n;i++)
672     {
673         fprintf(fplog,"%d ",pind[i]);
674     }
675     fprintf(fplog,"\n");
676
677     fprintf(fplog,"Order After Exchange: ");
678     for (i=0;i<n;i++)
679     {
680         fprintf(fplog,"%d ",allswaps[i]);
681     }
682     fprintf(fplog,"\n\n");
683
684     sfree(tmpswap);
685 }
686
687 static void print_prob(FILE *fplog,const char *leg,int n,real *prob)
688 {
689     int  i;
690     char buf[8];
691   
692     fprintf(fplog,"Repl %2s ",leg);
693     for(i=1; i<n; i++)
694     {
695         if (prob[i] >= 0)
696         {
697             sprintf(buf,"%4.2f",prob[i]);
698             fprintf(fplog,"  %3s",buf[0]=='1' ? "1.0" : buf+1);
699         }
700         else
701         {
702             fprintf(fplog,"     ");
703         }
704     }
705     fprintf(fplog,"\n");
706 }
707
708 static void print_count(FILE *fplog,const char *leg,int n,int *count)
709 {
710     int i;
711
712     fprintf(fplog,"Repl %2s ",leg);
713     for(i=1; i<n; i++)
714     {
715         fprintf(fplog," %4d",count[i]);
716     }
717     fprintf(fplog,"\n");
718 }
719
720 static real calc_delta(FILE *fplog, gmx_bool bPrint, struct gmx_repl_ex *re, real *Epot, real **df, real* Vol, real *beta, int a, int b, int ap, int bp) {
721
722     real ediff,dpV,delta=0;
723
724     /* Two cases; we are permuted and not.  In all cases, setting ap = a and bp = b will reduce
725        to the non permuted case */
726
727     switch (re->type)
728     {
729     case ereTEMP:
730         /*
731          * Okabe et. al. Chem. Phys. Lett. 335 (2001) 435-439
732          */
733         ediff = Epot[b] - Epot[a];
734         delta = -(beta[bp] - beta[ap])*ediff;
735         break;
736     case ereLAMBDA:
737         /* two cases:  when we are permuted, and not.  */
738         /* non-permuted:
739            ediff =  E_new - E_old
740                  =  [H_b(x_a) + H_a(x_b)] - [H_b(x_b) + H_a(x_a)]
741                  =  [H_b(x_a) - H_a(x_a)] + [H_a(x_b) - H_b(x_b)]
742                  =  df[b][a] + df[a][b] */
743         /* permuted:
744            ediff =  E_new - E_old
745                  =  [H_bp(x_a) + H_ap(x_b)] - [H_bp(x_b) + H_ap(x_a)]
746                  =  [H_bp(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_bp(x_b)]
747                  =  [H_bp(x_a) - H_a(x_a) + H_a(x_a) - H_ap(x_a)] + [H_ap(x_b) - H_b(x_b) + H_b(x_b) - H_bp(x_b)]
748                  =  [H_bp(x_a) - H_a(x_a)] - [H_ap(x_a) - H_a(x_a)] + [H_ap(x_b) - H_b(x_b)] - H_bp(x_b) - H_b(x_b)]
749                  =  (df[bp][a] - df[ap][a]) + (df[ap][b] - df[bp][b])    */
750         ediff = (df[bp][a] - df[ap][a]) + (df[ap][b] - df[bp][b]);
751         delta = ediff*beta[a]; /* assume all same temperature in this case */
752         break;
753     case ereTL:
754         /* not permuted:  */
755         /* delta =  reduced E_new - reduced E_old
756                  =  [beta_b H_b(x_a) + beta_a H_a(x_b)] - [beta_b H_b(x_b) + beta_a H_a(x_a)]
757                  =  [beta_b H_b(x_a) - beta_a H_a(x_a)] + [beta_a H_a(x_b) - beta_b H_b(x_b)]
758                  =  [beta_b dH_b(x_a) + beta_b H_a(x_a) - beta_a H_a(x_a)] +
759                     [beta_a dH_a(x_b) + beta_a H_b(x_b) - beta_b H_b(x_b)]
760                  =  [beta_b dH_b(x_a) + [beta_a dH_a(x_b) +
761                     beta_b (H_a(x_a) - H_b(x_b)]) - beta_a (H_a(x_a) - H_b(x_b))
762                  =  beta_b dH_b(x_a) + beta_a dH_a(x_b) - (beta_b - beta_a)(H_b(x_b) - H_a(x_a) */
763         /* delta = beta[b]*df[b][a] + beta[a]*df[a][b] - (beta[b] - beta[a])*(Epot[b] - Epot[a]; */
764         /* permuted (big breath!) */
765         /*   delta =  reduced E_new - reduced E_old
766                  =  [beta_bp H_bp(x_a) + beta_ap H_ap(x_b)] - [beta_bp H_bp(x_b) + beta_ap H_ap(x_a)]
767                  =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
768                  =  [beta_bp H_bp(x_a) - beta_ap H_ap(x_a)] + [beta_ap H_ap(x_b) - beta_bp H_bp(x_b)]
769                     - beta_pb H_a(x_a) + beta_ap H_a(x_a) + beta_pb H_a(x_a) - beta_ap H_a(x_a)
770                     - beta_ap H_b(x_b) + beta_bp H_b(x_b) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
771                  =  [(beta_bp H_bp(x_a) - beta_bp H_a(x_a)) - (beta_ap H_ap(x_a) - beta_ap H_a(x_a))] +
772                     [(beta_ap H_ap(x_b)  - beta_ap H_b(x_b)) - (beta_bp H_bp(x_b) - beta_bp H_b(x_b))]
773                     + beta_pb H_a(x_a) - beta_ap H_a(x_a) + beta_ap H_b(x_b) - beta_bp H_b(x_b)
774                  =  [beta_bp (H_bp(x_a) - H_a(x_a)) - beta_ap (H_ap(x_a) - H_a(x_a))] +
775                     [beta_ap (H_ap(x_b) - H_b(x_b)) - beta_bp (H_bp(x_b) - H_b(x_b))]
776                     + beta_pb (H_a(x_a) - H_b(x_b))  - beta_ap (H_a(x_a) - H_b(x_b))
777                  =  ([beta_bp df[bp][a] - beta_ap df[ap][a]) + beta_ap df[ap][b]  - beta_bp df[bp][b])
778                  + (beta_pb-beta_ap)(H_a(x_a) - H_b(x_b))  */
779         delta = beta[bp]*(df[bp][a] - df[bp][b]) + beta[ap]*(df[ap][b] - df[ap][a]) - (beta[bp]-beta[ap])*(Epot[b]-Epot[a]);
780         break;
781     default:
782         gmx_incons("Unknown replica exchange quantity");
783     }
784     if (bPrint)
785     {
786         fprintf(fplog,"Repl %d <-> %d  dE_term = %10.3e (kT)\n",a,b,delta);
787     }
788     if (re->bNPT)
789     {
790         /* revist the calculation for 5.0.  Might be some improvements. */
791         dpV = (beta[ap]*re->pres[ap]-beta[bp]*re->pres[bp])*(Vol[b]-Vol[a])/PRESFAC;
792         if (bPrint) 
793         {
794             fprintf(fplog,"  dpV = %10.3e  d = %10.3e\nb",dpV,delta + dpV);
795         }
796         delta += dpV;
797     }
798     return delta;
799 }
800
801 static void get_replica_exchange(FILE *fplog,const gmx_multisim_t *ms,
802                                  struct gmx_repl_ex *re,gmx_enerdata_t *enerd,real vol,
803                                  gmx_large_int_t step,real time,int *pind)
804 {
805     int  m,i,a,b,ap,bp,i0,i1,tmp;
806     real *Epot=NULL,*Vol=NULL,**flambda=NULL,*beta=NULL,*prob;
807     real ediff=0,delta=0,dpV=0;
808     gmx_bool *bEx,bPrint,bMultiEx;
809     gmx_bool bEpot=FALSE;
810     gmx_bool bFLambda=FALSE;
811     gmx_bool bVol=FALSE;
812
813     bMultiEx = (re->nex > 1);  /* multiple exchanges at each state */
814     fprintf(fplog,"Replica exchange at step " gmx_large_int_pfmt " time %g\n",step,time);
815
816     snew(beta,re->nrepl);
817     if (re->bNPT)
818     {
819         bVol = TRUE;
820         snew(Vol,re->nrepl);
821         Vol[re->repl]  = vol;
822     }
823
824     if ((re->type == ereTEMP || re->type == ereTL))
825     {
826         bEpot = TRUE;
827         snew(Epot,re->nrepl);
828         Epot[re->repl] = enerd->term[F_EPOT];
829         /* temperatures of different states*/
830         for (i=0;i<re->nrepl;i++)
831         {
832             beta[i] = 1.0/(re->q[ereTEMP][i]*BOLTZ);
833         }
834     }
835     else
836     {
837         for (i=0;i<re->nrepl;i++)
838         {
839             beta[i] = 1.0/(re->temp*BOLTZ);  /* we have a single temperature */
840         }
841     }
842     if (re->type == ereLAMBDA || re->type == ereTL)
843     {
844         bFLambda = TRUE;
845         /* lambda differences. */
846         /* flambda[i][j] is the energy of the jth simulation in the ith Hamiltonian
847            minus the energy of the jth simulation in the jth Hamiltonian */
848         snew(flambda,re->nrepl);
849         for (i=0;i<re->nrepl;i++)
850         {
851             snew(flambda[i],re->nrepl);
852             flambda[i][re->repl] = (enerd->enerpart_lambda[(int)re->q[ereLAMBDA][i]+1]-enerd->enerpart_lambda[0]);
853         }
854     }
855
856     /* now actually do the communication */
857     if (bVol)
858     {
859         gmx_sum_sim(re->nrepl,Vol,ms);
860     }
861     if (bEpot)
862     {
863         gmx_sum_sim(re->nrepl,Epot,ms);
864     }
865     if (bFLambda)
866     {
867         for (i=0;i<re->nrepl;i++)
868         {
869             gmx_sum_sim(re->nrepl,flambda[i],ms);
870         }
871     }
872     snew(bEx,re->nrepl);
873     snew(prob,re->nrepl);
874
875     /* make a duplicate set of indices for shuffling */
876     for(i=0;i<re->nrepl;i++)
877     {
878         pind[i] = re->ind[i];
879     }
880
881     if (bMultiEx)
882     {
883         /* multiple random switch exchange */
884         for (i=0;i<re->nex;i++)
885         {
886             /* randomly select a pair  */
887             /* find out which state it is from, and what label that state currently has */
888             i0 = (int)(re->nrepl*rando(&(re->seed)));
889             i1 = (int)(re->nrepl*rando(&(re->seed)));
890             if (i0==i1)
891             {
892                 i--;
893                 continue;  /* got the same pair, back up and do it again */
894             }
895
896             a = re->ind[i0];
897             b = re->ind[i1];
898             ap = pind[i0];
899             bp = pind[i1];
900
901             bPrint = FALSE; /* too noisy */
902             delta = calc_delta(fplog,bPrint,re,Epot,flambda,Vol,beta,a,b,ap,bp); /* calculate the energy difference */
903
904             /* we actually only use the first space, since there are actually many switches between pairs. */
905
906             if (delta <= 0)
907             {
908                 /* accepted */
909                 prob[0] = 1;
910                 bEx[0] = TRUE;
911             }
912             else
913             {
914                 if (delta > PROBABILITYCUTOFF)
915                 {
916                     prob[0] = 0;
917                 }
918                 else
919                 {
920                     prob[0] = exp(-delta);
921                 }
922                 /* roll a number to determine if accepted */
923                 bEx[0] = (rando(&(re->seed)) < prob[0]);
924             }
925             re->prob_sum[0] += prob[0];
926
927             if (bEx[0])
928             {
929                 /* swap the states */
930                 tmp = pind[i0];
931                 pind[i0] = pind[i1];
932                 pind[i1] = tmp;
933             }
934         }
935         re->nattempt[0]++;  /* keep track of total permutation trials here */
936         print_allswitchind(fplog,re->nrepl,re->ind,pind,re->allswaps);
937     }
938     else
939     {
940         /* standard nearest neighbor replica exchange */
941         m = (step / re->nst) % 2;
942         for(i=1; i<re->nrepl; i++)
943         {
944             a = re->ind[i-1];
945             b = re->ind[i];
946             
947             bPrint = (re->repl==a || re->repl==b);
948             if (i % 2 == m)
949             {
950                 delta = calc_delta(fplog,bPrint,re,Epot,flambda,Vol,beta,a,b,a,b);
951                 if (delta <= 0) {
952                     /* accepted */
953                     prob[i] = 1;
954                     bEx[i] = TRUE;
955                 }
956                 else
957                 {
958                     if (delta > PROBABILITYCUTOFF)
959                     {
960                         prob[i] = 0;
961                     }
962                     else
963                     {
964                         prob[i] = exp(-delta);
965                     }
966                     /* roll a number to determine if accepted */
967                     bEx[i] = (rando(&(re->seed)) < prob[i]);
968                 }
969                 re->prob_sum[i] += prob[i];
970
971                 if (bEx[i])
972                 {
973                     /* swap these two */
974                     tmp = pind[i-1];
975                     pind[i-1] = pind[i];
976                     pind[i] = tmp;
977                 }
978             }
979             else
980             {
981                 prob[i] = -1;
982                 bEx[i] = FALSE;
983             }
984         }
985         /* print some statistics */
986         print_ind(fplog,"ex",re->nrepl,re->ind,bEx);
987         print_prob(fplog,"pr",re->nrepl,prob);
988         fprintf(fplog,"\n");
989         re->nattempt[m]++;
990     }
991
992     /* record which moves were made and accepted */
993     for (i=0;i<re->nrepl;i++)
994     {
995         re->nmoves[re->ind[i]][pind[i]] +=1;
996         re->nmoves[pind[i]][re->ind[i]] +=1;
997     }
998     /* free up data */
999     sfree(bEx);
1000     sfree(prob);
1001     sfree(beta);
1002     if (re->bNPT)
1003     {
1004         sfree(Vol);
1005     }
1006     if ((re->type == ereTEMP || re->type == ereTL))
1007     {
1008         sfree(Epot);
1009     }
1010     if ((re->type == ereLAMBDA || re->type == ereTL))
1011     {
1012         for (i=0;i<re->nrepl;i++)
1013         {
1014             sfree(flambda[i]);
1015         }
1016         sfree(flambda);
1017     }
1018 }
1019
1020 static void write_debug_x(t_state *state)
1021 {
1022     int i;
1023
1024     if (debug)
1025     {
1026         for(i=0; i<state->natoms; i+=10)
1027         {
1028             fprintf(debug,"dx %5d %10.5f %10.5f %10.5f\n",i,state->x[i][XX],state->x[i][YY],state->x[i][ZZ]);
1029         }
1030     }
1031 }
1032
1033 static void cyclic_decomposition(FILE *fplog, int *pind, int **cyclic, int n, int *nswap)
1034 {
1035
1036     int i,j,c,p;
1037     int *incycle;
1038     int maxlen = 1;
1039     snew(incycle,n);
1040
1041     /* compute cyclic decompositions */
1042     for (i=0;i<n;i++)
1043     {
1044         snew(cyclic[i],n);
1045         for (j=0;j<n;j++)
1046         {
1047             cyclic[i][j] = -1;
1048         }
1049     }
1050
1051     for (i=0;i<n;i++)  /* one cycle for each replica */
1052     {
1053         if (incycle[i])
1054         {
1055             cyclic[i][0] = -1;
1056             continue;
1057         }
1058         cyclic[i][0] = i;
1059         incycle[i] = TRUE;
1060         c = 1;
1061         p = i;
1062         for (j=0;j<n;j++)  /* potentially all cycles are part, but we will break first */
1063         {
1064             p = pind[p]; /* start permuting */
1065             if (p==i)
1066             {
1067                 cyclic[i][c] = -1;
1068                 if (c > maxlen)
1069                 {
1070                     maxlen = c;
1071                 }
1072                 break; /* we've reached the original element, the cycle is complete, and we marked the end. */
1073             }
1074             else
1075             {
1076                 cyclic[i][c] = p;  /* each permutation gives a new member of the cycle */
1077                 incycle[p] = TRUE;
1078                 c++;
1079             }
1080         }
1081     }
1082     *nswap = maxlen - 1;
1083
1084     if (debug)
1085     {
1086         for (i=0;i<n;i++)
1087         {
1088             fprintf(fplog,"Cycle %d:",i);
1089             for (j=0;j<n;j++)
1090             {
1091                 if (cyclic[i][j] < 0)
1092                 {
1093                     break;
1094                 }
1095                 fprintf(fplog,"%2d",cyclic[i][j]);
1096             }
1097             fprintf(fplog,"\n");
1098         }
1099         fflush(fplog);
1100     }
1101 }
1102
1103 static void compute_exchange_order(FILE *fplog, int **cyclic,int **order, int n, int maxswap)
1104 {
1105     int i,j;
1106
1107     for (i=0;i<n;i++)
1108     {
1109         snew(order[i],maxswap);
1110         for (j=0;j<maxswap;j++)
1111         {
1112             order[i][j] = -1;
1113         }
1114     }
1115     for (j=0;j<maxswap;j++)
1116     {
1117         for (i=0;i<n;i++)
1118         {
1119             if (cyclic[i][j+1] >= 0)
1120             {
1121                 order[cyclic[i][j+1]][j] = cyclic[i][j];
1122                 order[cyclic[i][j]][j] = cyclic[i][j+1];
1123             }
1124         }
1125         for (i=0;i<n;i++)
1126         {
1127             if (order[i][j] < 0)
1128             {
1129                 order[i][j] = i; /* if it's not exchanging, it should stay this round*/
1130             }
1131         }
1132     }
1133     if (debug)
1134     {
1135         fprintf(fplog,"Replica Exchange Order\n");
1136         for (i=0;i<n;i++)
1137         {
1138             fprintf(fplog,"Replica %d:",i);
1139             for (j=0;j<maxswap;j++)
1140             {
1141                 if (order[i][j] < 0) break;
1142                 fprintf(fplog,"%2d",order[i][j]);
1143             }
1144             fprintf(fplog,"\n");
1145         }
1146         fflush(fplog);
1147     }
1148 }
1149
1150 gmx_bool replica_exchange(FILE *fplog,const t_commrec *cr,struct gmx_repl_ex *re,
1151                           t_state *state,gmx_enerdata_t *enerd,
1152                           t_state *state_local,gmx_large_int_t step,real time)
1153 {
1154     gmx_multisim_t *ms;
1155     int exchange=-1,shift;
1156     int i,j,maxswap=0;
1157     int *exchanges=NULL;
1158     int **cyclic=NULL;
1159     int **order=NULL;
1160     gmx_bool bExchanged=FALSE;
1161
1162     ms = cr->ms;
1163     if (MASTER(cr))
1164     {
1165         snew(exchanges,re->nrepl);
1166         get_replica_exchange(fplog,ms,re,enerd,det(state->box),step,time,exchanges);
1167         bExchanged = (exchanges[re->repl] != re->nrepl);  /* only mark as exchanged if it has a shuffled index */
1168         snew(cyclic,re->nrepl);
1169         snew(order,re->nrepl);
1170
1171         /* identify the cyclic decomposition of the permutation (very easy if neighbor replica exchange) */
1172         cyclic_decomposition(fplog,exchanges,cyclic,re->nrepl,&maxswap); 
1173
1174         /* now translate the decompsition into a replica exchange order at each step */
1175         compute_exchange_order(fplog,cyclic,order,re->nrepl,maxswap);
1176
1177         sfree(cyclic); /* don't need this anymore */
1178     }
1179     if (PAR(cr))
1180     {
1181 #ifdef GMX_MPI
1182         MPI_Bcast(&bExchanged,sizeof(gmx_bool),MPI_BYTE,MASTERRANK(cr),
1183                   cr->mpi_comm_mygroup);
1184 #endif
1185     }
1186     if (bExchanged)
1187     {
1188         /* Exchange the states */
1189
1190         if (PAR(cr))
1191         {
1192             /* Collect the global state on the master node */
1193             if (DOMAINDECOMP(cr))
1194             {
1195                 dd_collect_state(cr->dd,state_local,state);
1196             }
1197             else
1198             {
1199                 pd_collect_state(cr,state);
1200             }
1201         }
1202         
1203         if (MASTER(cr))
1204         {
1205             for (i=0;i<maxswap;i++) /* there will be only one swap cycle with standard replica exchange */
1206             {
1207                 exchange = order[ms->sim][i];
1208
1209                 if (exchange != ms->sim)
1210                 {
1211                     /* Exchange the global states between the master nodes */
1212                     if (debug)
1213                     {
1214                         fprintf(debug,"Exchanging %d with %d\n",ms->sim,exchange);
1215                     }
1216                     exchange_state(ms,exchange,state);
1217                 }
1218             }
1219             if (re->type == ereTEMP || re->type == ereTL)
1220             {
1221                 scale_velocities(state,sqrt(re->q[ereTEMP][ms->sim]/re->q[ereTEMP][exchanges[ms->sim]]));
1222             }
1223             sfree(order);
1224         }
1225
1226         /* With domain decomposition the global state is distributed later */
1227         if (!DOMAINDECOMP(cr))
1228         {
1229             /* Copy the global state to the local state data structure */
1230             copy_state_nonatomdata(state,state_local);
1231             
1232             if (PAR(cr))
1233             {
1234                 bcast_state(cr,state,FALSE);
1235             }
1236         }
1237     }
1238     return bExchanged;
1239 }
1240
1241 void print_replica_exchange_statistics(FILE *fplog,struct gmx_repl_ex *re)
1242 {
1243     real *prob;
1244     int  i;
1245
1246     fprintf(fplog,"\nReplica exchange statistics\n");
1247
1248     if (re->nex == 0)
1249     {
1250         fprintf(fplog,"Repl  %d attempts, %d odd, %d even\n",
1251                 re->nattempt[0]+re->nattempt[1],re->nattempt[1],re->nattempt[0]);
1252
1253         snew(prob,re->nrepl);
1254         for(i=1; i<re->nrepl; i++)
1255         {
1256             if (re->nattempt[i%2] == 0)
1257             {
1258                 prob[i] = 0;
1259             }
1260             else
1261             {
1262                 prob[i] =  re->prob_sum[i]/re->nattempt[i%2];
1263             }
1264         }
1265         print_ind(fplog,"",re->nrepl,re->ind,NULL);
1266         print_prob(fplog,"",re->nrepl,prob);
1267
1268         fprintf(fplog,"Repl  number of exchanges:\n");
1269         print_ind(fplog,"",re->nrepl,re->ind,NULL);
1270         print_count(fplog,"",re->nrepl,re->nexchange);
1271
1272         fprintf(fplog,"Repl  average number of exchanges:\n");
1273         for(i=1; i<re->nrepl; i++) 
1274         {
1275             if (re->nattempt[i%2] == 0)
1276             {
1277                 prob[i] = 0;
1278             }
1279             else
1280             {
1281                 prob[i] =  ((real)re->nexchange[i])/re->nattempt[i%2];
1282             }
1283         }
1284         print_ind(fplog,"",re->nrepl,re->ind,NULL);
1285         print_prob(fplog,"",re->nrepl,prob);
1286         sfree(prob);
1287         fprintf(fplog,"\n");
1288     }
1289     /* print the transition matrix */
1290     print_matrix(fplog,"",re->nrepl,re->nmoves,re->nattempt);
1291 }