Merge branch 'release-4-5-patches' into release-4-6

[alexxy/gromacs.git] / src / mdlib / minimize.c
diff --git a/src/mdlib/minimize.c b/src/mdlib/minimize.c

index eee7bee5351260606ed92b345540854b291d065e..259133005b02778b175f3e16b033fac0dfccc6ac 100644 (file)
--- a/src/mdlib/minimize.c
+++ b/src/mdlib/minimize.c
@@ -1,37 +1,39 @@
-/* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
+/*
+ * This file is part of the GROMACS molecular simulation package.
   *
- * 
- *                This source code is part of
- * 
- *                 G   R   O   M   A   C   S
- * 
- *          GROningen MAchine for Chemical Simulations
- * 
- *                        VERSION 3.2.0
- * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2004, The GROMACS development team,
   * check out http://www.gromacs.org for more information.
-
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
+ * Copyright (c) 2012, by the GROMACS development team, led by
+ * David van der Spoel, Berk Hess, Erik Lindahl, and including many
+ * others, as listed in the AUTHORS file in the top-level source
+ * directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
   * of the License, or (at your option) any later version.
- * 
- * If you want to redistribute modifications, please consider that
- * scientific software is very special. Version control is crucial -
- * bugs must be traceable. We will be happy to consider code for
- * inclusion in the official distribution, but derived work must not
- * be called official GROMACS. Details are found in the README & COPYING
- * files - if they are missing, get the official version at www.gromacs.org.
- * 
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
   * To help us fund GROMACS development, we humbly ask that you cite
- * the papers on the package - you can find them in the top README file.
- * 
- * For more info, check our website at http://www.gromacs.org
- * 
- * And Hey:
- * GROwing Monsters And Cloning Shrimps
+ * the research papers on the package. Check out http://www.gromacs.org.
   */
  #ifdef HAVE_CONFIG_H
  #include <config.h>
@@ -64,6 +66,7 @@
  #include "vsite.h"
  #include "force.h"
  #include "mdrun.h"
+#include "md_support.h"
  #include "domdec.h"
  #include "partdec.h"
  #include "trnio.h"
@@ -75,6 +78,9 @@
  #include "mtop_util.h"
  #include "gmxfio.h"
  #include "pme.h"
+#include "bondf.h"
+#include "gmx_omp_nthreads.h"
+
  
  typedef struct {
    t_state s;
@@ -88,9 +94,12 @@ typedef struct {
  static em_state_t *init_em_state()
  {
    em_state_t *ems;
-  
+
    snew(ems,1);
  
+  /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
+  snew(ems->s.lambda,efptNR);
+
    return ems;
  }
  
@@ -125,26 +134,37 @@ static void sp_header(FILE *out,const char *minimizer,real ftol,int nsteps)
  
  static void warn_step(FILE *fp,real ftol,gmx_bool bLastStep,gmx_bool bConstrain)
  {
+    char buffer[2048];
      if (bLastStep)
      {
-        fprintf(fp,"\nReached the maximum number of steps before reaching Fmax < %g\n",ftol);
+        sprintf(buffer,
+                "\nEnergy minimization reached the maximum number"
+                "of steps before the forces reached the requested"
+                "precision Fmax < %g.\n",ftol);
      }
      else
      {
-        fprintf(fp,"\nStepsize too small, or no change in energy.\n"
-                "Converged to machine precision,\n"
-                "but not to the requested precision Fmax < %g\n",
-                ftol);
-        if (sizeof(real)<sizeof(double))
-        {
-            fprintf(fp,"\nDouble precision normally gives you higher accuracy.\n");
-        }
-        if (bConstrain)
-        {
-            fprintf(fp,"You might need to increase your constraint accuracy, or turn\n"
-                    "off constraints alltogether (set constraints = none in mdp file)\n");
-        }
-    }
+        sprintf(buffer,
+                "\nEnergy minimization has stopped, but the forces have"
+                "not converged to the requested precision Fmax < %g (which"
+                "may not be possible for your system). It stopped"
+                "because the algorithm tried to make a new step whose size"
+                "was too small, or there was no change in the energy since"
+                "last step. Either way, we regard the minimization as"
+                "converged to within the available machine precision,"
+                "given your starting configuration and EM parameters.\n%s%s",
+                ftol,
+                sizeof(real)<sizeof(double) ?
+                "\nDouble precision normally gives you higher accuracy, but"
+                "this is often not needed for preparing to run molecular"
+                "dynamics.\n" :
+                "",
+                bConstrain ?
+                "You might need to increase your constraint accuracy, or turn\n"
+                "off constraints altogether (set constraints = none in mdp file)\n" :
+                "");
+    }
+    fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
  }
  
  
@@ -157,23 +177,23 @@ static void print_converged(FILE *fp,const char *alg,real ftol,
  
    if (bDone)
      fprintf(fp,"\n%s converged to Fmax < %g in %s steps\n",
-           alg,ftol,gmx_step_str(count,buf)); 
+           alg,ftol,gmx_step_str(count,buf));
    else if(count<nsteps)
      fprintf(fp,"\n%s converged to machine precision in %s steps,\n"
                 "but did not reach the requested Fmax < %g.\n",
             alg,gmx_step_str(count,buf),ftol);
-  else 
+  else
      fprintf(fp,"\n%s did not converge to Fmax < %g in %s steps.\n",
             alg,ftol,gmx_step_str(count,buf));
  
  #ifdef GMX_DOUBLE
-  fprintf(fp,"Potential Energy  = %21.14e\n",epot); 
-  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1); 
-  fprintf(fp,"Norm of force     = %21.14e\n",fnorm); 
+  fprintf(fp,"Potential Energy  = %21.14e\n",epot);
+  fprintf(fp,"Maximum force     = %21.14e on atom %d\n",fmax,nfmax+1);
+  fprintf(fp,"Norm of force     = %21.14e\n",fnorm);
  #else
-  fprintf(fp,"Potential Energy  = %14.7e\n",epot); 
-  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1); 
-  fprintf(fp,"Norm of force     = %14.7e\n",fnorm); 
+  fprintf(fp,"Potential Energy  = %14.7e\n",epot);
+  fprintf(fp,"Maximum force     = %14.7e on atom %d\n",fmax,nfmax+1);
+  fprintf(fp,"Norm of force     = %14.7e\n",fnorm);
  #endif
  }
  
@@ -269,34 +289,27 @@ void init_em(FILE *fplog,const char *title,
  {
      int  start,homenr,i;
      real dvdlambda;
-    
+
      if (fplog)
      {
          fprintf(fplog,"Initiating %s\n",title);
      }
-    
+
      state_global->ngtc = 0;
-    
-    /* Initiate some variables */
-    if (ir->efep != efepNO)
-    {
-        state_global->lambda = ir->init_lambda;
-    }
-    else 
-    {
-        state_global->lambda = 0.0;
-    }
-    
+
+    /* Initialize lambda variables */
+    initialize_lambdas(fplog,ir,&(state_global->fep_state),state_global->lambda,NULL);
+
      init_nrnb(nrnb);
-    
+
      if (DOMAINDECOMP(cr))
      {
          *top = dd_init_local_top(top_global);
-        
+
          dd_init_local_state(cr->dd,state_global,&ems->s);
  
          *f = NULL;
-        
+
          /* Distribute the charge groups over the nodes from the master node */
          dd_partition_system(fplog,ir->init_step,cr,TRUE,1,
                              state_global,top_global,ir,
@@ -304,7 +317,7 @@ void init_em(FILE *fplog,const char *title,
                              fr,vsite,NULL,constr,
                              nrnb,NULL,FALSE);
          dd_store_state(cr->dd,&ems->s);
-        
+
          if (ir->nstfout)
          {
              snew(*f_global,top_global->natoms);
@@ -328,12 +341,12 @@ void init_em(FILE *fplog,const char *title,
              copy_rvec(state_global->x[i],ems->s.x[i]);
          }
          copy_mat(state_global->box,ems->s.box);
-        
+
          if (PAR(cr) && ir->eI != eiNM)
          {
              /* Initialize the particle decomposition and split the topology */
              *top = split_system(fplog,top_global,ir,cr);
-            
+
              pd_cg_range(cr,&fr->cg0,&fr->hcg);
          }
          else
@@ -341,8 +354,12 @@ void init_em(FILE *fplog,const char *title,
              *top = gmx_mtop_generate_local_top(top_global,ir);
          }
          *f_global = *f;
+
+        forcerec_set_excl_load(fr,*top,cr);
+
+        init_bonded_thread_force_reduction(fr,&(*top)->idef);      
          
-        if (ir->ePBC != epbcNONE && !ir->bPeriodicMols)
+        if (ir->ePBC != epbcNONE && !fr->bMolPBC)
          {
              *graph = mk_graph(fplog,&((*top)->idef),0,top_global->natoms,FALSE,FALSE);
          }
@@ -362,14 +379,14 @@ void init_em(FILE *fplog,const char *title,
              homenr = top_global->natoms;
          }
          atoms2md(top_global,ir,0,NULL,start,homenr,mdatoms);
-        update_mdatoms(mdatoms,state_global->lambda);
-    
+        update_mdatoms(mdatoms,state_global->lambda[efptFEP]);
+
          if (vsite)
          {
              set_vsite_top(vsite,*top,mdatoms,cr);
          }
      }
-    
+
      if (constr)
      {
          if (ir->eConstrAlg == econtSHAKE &&
@@ -378,7 +395,7 @@ void init_em(FILE *fplog,const char *title,
              gmx_fatal(FARGS,"Can not do energy minimization with %s, use %s\n",
                        econstr_names[econtSHAKE],econstr_names[econtLINCS]);
          }
-        
+
          if (!DOMAINDECOMP(cr))
          {
              set_constraints(constr,*top,ir,mdatoms,cr);
@@ -390,26 +407,27 @@ void init_em(FILE *fplog,const char *title,
              dvdlambda=0;
              constrain(PAR(cr) ? NULL : fplog,TRUE,TRUE,constr,&(*top)->idef,
                        ir,NULL,cr,-1,0,mdatoms,
-                      ems->s.x,ems->s.x,NULL,ems->s.box,
-                      ems->s.lambda,&dvdlambda,
+                      ems->s.x,ems->s.x,NULL,fr->bMolPBC,ems->s.box,
+                      ems->s.lambda[efptFEP],&dvdlambda,
                        NULL,NULL,nrnb,econqCoord,FALSE,0,0);
          }
      }
-    
+
      if (PAR(cr))
      {
          *gstat = global_stat_init(ir);
      }
-    
+
      *outf = init_mdoutf(nfile,fnm,0,cr,ir,NULL);
  
      snew(*enerd,1);
-    init_enerdata(top_global->groups.grps[egcENER].nr,ir->n_flambda,*enerd);
+    init_enerdata(top_global->groups.grps[egcENER].nr,ir->fepvals->n_lambda,
+                  *enerd);
  
      if (mdebin != NULL)
      {
          /* Init bin for energy stuff */
-        *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL); 
+        *mdebin = init_mdebin((*outf)->fp_ene,top_global,ir,NULL);
      }
  
      clear_rvec(mu_tot);
@@ -421,7 +439,7 @@ static void finish_em(FILE *fplog,t_commrec *cr,gmx_mdoutf_t *outf,
  {
    if (!(cr->duty & DUTY_PME)) {
      /* Tell the PME only node to finish */
-    gmx_pme_finish(cr);
+    gmx_pme_send_finish(cr);
    }
  
    done_mdoutf(outf);
@@ -463,14 +481,14 @@ static void write_em_traj(FILE *fplog,t_commrec *cr,
          copy_em_coords(state,state_global);
          f_global = state->f;
      }
-    
+
      mdof_flags = 0;
      if (bX) { mdof_flags |= MDOF_X; }
      if (bF) { mdof_flags |= MDOF_F; }
      write_traj(fplog,cr,outf,mdof_flags,
                 top_global,step,(double)step,
                 &state->s,state_global,state->f,f_global,NULL,NULL);
-    
+
      if (confout != NULL && MASTER(cr))
      {
          if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
@@ -487,127 +505,123 @@ static void write_em_traj(FILE *fplog,t_commrec *cr,
  }
  
  static void do_em_step(t_commrec *cr,t_inputrec *ir,t_mdatoms *md,
-                      em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
-                      gmx_constr_t constr,gmx_localtop_t *top,
-                      t_nrnb *nrnb,gmx_wallcycle_t wcycle,
-                      gmx_large_int_t count)
+                       gmx_bool bMolPBC,
+                       em_state_t *ems1,real a,rvec *f,em_state_t *ems2,
+                       gmx_constr_t constr,gmx_localtop_t *top,
+                       t_nrnb *nrnb,gmx_wallcycle_t wcycle,
+                       gmx_large_int_t count)
  
  {
-  t_state *s1,*s2;
-  int  start,end,gf,i,m;
-  rvec *x1,*x2;
-  real dvdlambda;
+    t_state *s1,*s2;
+    int  i;
+    int  start,end;
+    rvec *x1,*x2;
+    real dvdlambda;
  
-  s1 = &ems1->s;
-  s2 = &ems2->s;
+    s1 = &ems1->s;
+    s2 = &ems2->s;
  
-  if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
-    gmx_incons("state mismatch in do_em_step");
+    if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
+    {
+        gmx_incons("state mismatch in do_em_step");
+    }
  
-  s2->flags = s1->flags;
+    s2->flags = s1->flags;
  
-  if (s2->nalloc != s1->nalloc) {
-    s2->nalloc = s1->nalloc;
-    srenew(s2->x,s1->nalloc);
-    srenew(ems2->f,  s1->nalloc);
-    if (s2->flags & (1<<estCGP))
-      srenew(s2->cg_p,  s1->nalloc);
-  }
+    if (s2->nalloc != s1->nalloc)
+    {
+        s2->nalloc = s1->nalloc;
+        srenew(s2->x,s1->nalloc);
+        srenew(ems2->f,  s1->nalloc);
+        if (s2->flags & (1<<estCGP))
+        {
+            srenew(s2->cg_p,  s1->nalloc);
+        }
+    }
    
-  s2->natoms = s1->natoms;
-  s2->lambda = s1->lambda;
-  copy_mat(s1->box,s2->box);
-
-  start = md->start;
-  end   = md->start + md->homenr;
-
-  x1 = s1->x;
-  x2 = s2->x;
-  gf = 0;
-  for(i=start; i<end; i++) {
-    if (md->cFREEZE)
-      gf = md->cFREEZE[i];
-    for(m=0; m<DIM; m++) {
-      if (ir->opts.nFreeze[gf][m])
-       x2[i][m] = x1[i][m];
-      else
-       x2[i][m] = x1[i][m] + a*f[i][m];
+    s2->natoms = s1->natoms;
+    copy_mat(s1->box,s2->box);
+    /* Copy free energy state */
+    for (i=0;i<efptNR;i++)
+    {
+        s2->lambda[i] = s1->lambda[i];
      }
-  }
-
-  if (s2->flags & (1<<estCGP)) {
-    /* Copy the CG p vector */
-    x1 = s1->cg_p;
-    x2 = s2->cg_p;
-    for(i=start; i<end; i++)
-      copy_rvec(x1[i],x2[i]);
-  }
+    copy_mat(s1->box,s2->box);
  
-  if (DOMAINDECOMP(cr)) {
-    s2->ddp_count = s1->ddp_count;
-    if (s2->cg_gl_nalloc < s1->cg_gl_nalloc) {
-      s2->cg_gl_nalloc = s1->cg_gl_nalloc;
-      srenew(s2->cg_gl,s2->cg_gl_nalloc);
-    }
-    s2->ncg_gl = s1->ncg_gl;
-    for(i=0; i<s2->ncg_gl; i++)
-      s2->cg_gl[i] = s1->cg_gl[i];
-    s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
-  }
-
-  if (constr) {
-    wallcycle_start(wcycle,ewcCONSTR);
-    dvdlambda = 0;
-    constrain(NULL,TRUE,TRUE,constr,&top->idef,        
-              ir,NULL,cr,count,0,md,
-              s1->x,s2->x,NULL,s2->box,s2->lambda,
-              &dvdlambda,NULL,NULL,nrnb,econqCoord,FALSE,0,0);
-    wallcycle_stop(wcycle,ewcCONSTR);
-  }
-}
+    start = md->start;
+    end   = md->start + md->homenr;
  
-static void do_x_step(t_commrec *cr,int n,rvec *x1,real a,rvec *f,rvec *x2)
+    x1 = s1->x;
+    x2 = s2->x;
  
-{
-  int  start,end,i,m;
+#pragma omp parallel num_threads(gmx_omp_nthreads_get(emntUpdate))
+    {
+        int gf,i,m;
  
-  if (DOMAINDECOMP(cr)) {
-    start = 0;
-    end   = cr->dd->nat_home;
-  } else if (PARTDECOMP(cr)) {
-    pd_at_range(cr,&start,&end);
-  } else {
-    start = 0;
-    end   = n;
-  }
+        gf = 0;
+#pragma omp for schedule(static) nowait
+        for(i=start; i<end; i++)
+        {
+            if (md->cFREEZE)
+            {
+                gf = md->cFREEZE[i];
+            }
+            for(m=0; m<DIM; m++)
+            {
+                if (ir->opts.nFreeze[gf][m])
+                {
+                    x2[i][m] = x1[i][m];
+                }
+                else
+                {
+                    x2[i][m] = x1[i][m] + a*f[i][m];
+                }
+            }
+        }
  
-  for(i=start; i<end; i++) {
-    for(m=0; m<DIM; m++) {
-      x2[i][m] = x1[i][m] + a*f[i][m];
+        if (s2->flags & (1<<estCGP))
+        {
+            /* Copy the CG p vector */
+            x1 = s1->cg_p;
+            x2 = s2->cg_p;
+#pragma omp for schedule(static) nowait
+            for(i=start; i<end; i++)
+            {
+                copy_rvec(x1[i],x2[i]);
+            }
+        }
+        
+        if (DOMAINDECOMP(cr))
+        {
+            s2->ddp_count = s1->ddp_count;
+            if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
+            {
+#pragma omp barrier
+                s2->cg_gl_nalloc = s1->cg_gl_nalloc;
+                srenew(s2->cg_gl,s2->cg_gl_nalloc);
+#pragma omp barrier
+            }
+            s2->ncg_gl = s1->ncg_gl;
+#pragma omp for schedule(static) nowait
+            for(i=0; i<s2->ncg_gl; i++)
+            {
+                s2->cg_gl[i] = s1->cg_gl[i];
+            }
+            s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
+        }
      }
-  }
-}
-
-static void do_x_sub(t_commrec *cr,int n,rvec *x1,rvec *x2,real a,rvec *f)
-
-{
-  int  start,end,i,m;
-
-  if (DOMAINDECOMP(cr)) {
-    start = 0;
-    end   = cr->dd->nat_home;
-  } else if (PARTDECOMP(cr)) {
-    pd_at_range(cr,&start,&end);
-  } else {
-    start = 0;
-    end   = n;
-  }
-
-  for(i=start; i<end; i++) {
-    for(m=0; m<DIM; m++) {
-      f[i][m] = (x1[i][m] - x2[i][m])*a;
+    
+    if (constr)
+    {
+        wallcycle_start(wcycle,ewcCONSTR);
+        dvdlambda = 0;
+        constrain(NULL,TRUE,TRUE,constr,&top->idef,    
+                  ir,NULL,cr,count,0,md,
+                  s1->x,s2->x,NULL,bMolPBC,s2->box,
+                  s2->lambda[efptBONDED],&dvdlambda,
+                  NULL,NULL,nrnb,econqCoord,FALSE,0,0);
+        wallcycle_stop(wcycle,ewcCONSTR);
      }
-  }
  }
  
  static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
@@ -627,7 +641,7 @@ static void em_dd_partition_system(FILE *fplog,int step,t_commrec *cr,
      dd_store_state(cr->dd,&ems->s);
      wallcycle_stop(wcycle,ewcDOMDEC);
  }
-    
+
  static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
                              t_state *state_global,gmx_mtop_t *top_global,
                              em_state_t *ems,gmx_localtop_t *top,
@@ -645,9 +659,9 @@ static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
    gmx_bool bNS;
    int  nabnsb;
    tensor force_vir,shake_vir,ekin;
-  real dvdl,prescorr,enercorr,dvdlcorr;
+  real dvdlambda,prescorr,enercorr,dvdlcorr;
    real terminate=0;
-  
+
    /* Set the time to the initial time, the time does not change during EM */
    t = inputrec->init_t;
  
@@ -680,7 +694,7 @@ static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
                              nrnb,wcycle);
      }
    }
-      
+
      /* Calc force & energy on new trial position  */
      /* do_force always puts the charge groups in the box and shifts again
       * We do not unshift, so molecules are always whole in congrad.c
@@ -690,12 +704,13 @@ static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
               ems->s.box,ems->s.x,&ems->s.hist,
               ems->f,force_vir,mdatoms,enerd,fcd,
               ems->s.lambda,graph,fr,vsite,mu_tot,t,NULL,NULL,TRUE,
-             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES | GMX_FORCE_VIRIAL |
-             (bNS ? GMX_FORCE_NS | GMX_FORCE_DOLR : 0));
-       
-  /* Clear the unused shake virial and pressure */
-  clear_mat(shake_vir);
-  clear_mat(pres);
+             GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
+             GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
+             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
+
+    /* Clear the unused shake virial and pressure */
+    clear_mat(shake_vir);
+    clear_mat(pres);
  
      /* Communicate stuff when parallel */
      if (PAR(cr) && inputrec->eI != eiNM)
@@ -705,16 +720,16 @@ static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
          global_stat(fplog,gstat,cr,enerd,force_vir,shake_vir,mu_tot,
                      inputrec,NULL,NULL,NULL,1,&terminate,
                      top_global,&ems->s,FALSE,
-                    CGLO_ENERGY | 
-                    CGLO_PRESSURE | 
-                    CGLO_CONSTRAINT | 
+                    CGLO_ENERGY |
+                    CGLO_PRESSURE |
+                    CGLO_CONSTRAINT |
                      CGLO_FIRSTITERATE);
  
          wallcycle_stop(wcycle,ewcMoveE);
      }
  
      /* Calculate long range corrections to pressure and energy */
-    calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda,
+    calc_dispcorr(fplog,inputrec,fr,count,top_global->natoms,ems->s.box,ems->s.lambda[efptVDW],
                    pres,force_vir,&prescorr,&enercorr,&dvdlcorr);
      enerd->term[F_DISPCORR] = enercorr;
      enerd->term[F_EPOT] += enercorr;
@@ -722,18 +737,19 @@ static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
      enerd->term[F_DVDL] += dvdlcorr;
  
    ems->epot = enerd->term[F_EPOT];
-  
+
    if (constr) {
      /* Project out the constraint components of the force */
      wallcycle_start(wcycle,ewcCONSTR);
-    dvdl = 0;
+    dvdlambda = 0;
      constrain(NULL,FALSE,FALSE,constr,&top->idef,
                inputrec,NULL,cr,count,0,mdatoms,
-              ems->s.x,ems->f,ems->f,ems->s.box,ems->s.lambda,&dvdl,
+              ems->s.x,ems->f,ems->f,fr->bMolPBC,ems->s.box,
+              ems->s.lambda[efptBONDED],&dvdlambda,
                NULL,&shake_vir,nrnb,econqForceDispl,FALSE,0,0);
      if (fr->bSepDVDL && fplog)
-      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdl);
-    enerd->term[F_DHDL_CON] += dvdl;
+      fprintf(fplog,sepdvdlformat,"Constraints",t,dvdlambda);
+    enerd->term[F_DVDL_BONDED] += dvdlambda;
      m_add(force_vir,shake_vir,vir);
      wallcycle_stop(wcycle,ewcCONSTR);
    } else {
@@ -742,10 +758,9 @@ static void evaluate_energy(FILE *fplog,gmx_bool bVerbose,t_commrec *cr,
  
    clear_mat(ekin);
    enerd->term[F_PRES] =
-    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres,
-             (fr->eeltype==eelPPPM)?enerd->term[F_COUL_RECIP]:0.0);
+    calc_pres(fr->ePBC,inputrec->nwall,ems->s.box,ekin,vir,pres);
  
-  sum_dhdl(enerd,ems->s.lambda,inputrec);
+  sum_dhdl(enerd,ems->s.lambda,inputrec->fepvals);
  
      if (EI_ENERGY_MINIMIZATION(inputrec->eI))
      {
@@ -777,7 +792,7 @@ static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
     * but to fully optimize this a much more complicated algorithm is required.
     */
    snew(fmg,mtop->natoms);
-  
+
    ncg   = s_min->s.ncg_gl;
    cg_gl = s_min->s.cg_gl;
    i = 0;
@@ -815,7 +830,7 @@ static double reorder_partsum(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
        i++;
      }
    }
-  
+
    sfree(fmg);
  
    return partsum;
@@ -833,7 +848,7 @@ static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
     * it looks a bit complicated since we take freeze groups into account,
     * and might have to sum it in parallel runs.
     */
-  
+
    if (!DOMAINDECOMP(cr) ||
        (s_min->s.ddp_count == cr->dd->ddp_count &&
         s_b->s.ddp_count   == cr->dd->ddp_count)) {
@@ -850,7 +865,7 @@ static real pr_beta(t_commrec *cr,t_grpopts *opts,t_mdatoms *mdatoms,
        for(m=0; m<DIM; m++)
         if (!opts->nFreeze[gf][m]) {
           sum += (fb[i][m] - fm[i][m])*fb[i][m];
-       } 
+       }
      }
    } else {
      /* We need to reorder cgs while summing */
@@ -875,7 +890,8 @@ double do_cg(FILE *fplog,t_commrec *cr,
               t_nrnb *nrnb,gmx_wallcycle_t wcycle,
               gmx_edsam_t ed,
               t_forcerec *fr,
-             int repl_ex_nst,int repl_ex_seed,
+             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+             gmx_membed_t membed,
               real cpt_period,real max_hours,
               const char *deviceOptions,
               unsigned long Flags,
@@ -892,7 +908,7 @@ double do_cg(FILE *fplog,t_commrec *cr,
    rvec   *f_global,*p,*sf,*sfm;
    double gpa,gpb,gpc,tmp,sum[2],minstep;
    real   fnormn;
-  real   stepsize;     
+  real   stepsize;
    real   a,b,c,beta=0.0;
    real   epot_repl=0;
    real   pnorm;
@@ -904,7 +920,7 @@ double do_cg(FILE *fplog,t_commrec *cr,
    int    number_steps,neval=0,nstcg=inputrec->nstcgsteep;
    gmx_mdoutf_t *outf;
    int    i,m,gf,step,nminstep;
-  real   terminate=0;  
+  real   terminate=0;
  
    step=0;
  
@@ -918,10 +934,10 @@ double do_cg(FILE *fplog,t_commrec *cr,
            state_global,top_global,s_min,&top,&f,&f_global,
            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
            nfile,fnm,&outf,&mdebin);
-  
+
    /* Print to log file */
    print_em_start(fplog,cr,runtime,wcycle,CG);
-  
+
    /* Max number of steps */
    number_steps=inputrec->nsteps;
  
@@ -944,10 +960,10 @@ double do_cg(FILE *fplog,t_commrec *cr,
    if (MASTER(cr)) {
      /* Copy stuff to the energy bin for easy printing etc. */
      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
-              mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
-              NULL,NULL,vir,pres,NULL,mu_tot,constr);
-    
-    print_ebin_header(fplog,step,step,s_min->s.lambda);
+               mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
+               NULL,NULL,vir,pres,NULL,mu_tot,constr);
+
+    print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
      print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
                 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
    }
@@ -955,7 +971,7 @@ double do_cg(FILE *fplog,t_commrec *cr,
  
    /* Estimate/guess the initial stepsize */
    stepsize = inputrec->em_stepsize/s_min->fnorm;
- 
+
    if (MASTER(cr)) {
      fprintf(stderr,"   F-max             = %12.5e on atom %d\n",
             s_min->fmax,s_min->a_fmax+1);
@@ -968,16 +984,16 @@ double do_cg(FILE *fplog,t_commrec *cr,
      fprintf(fplog,"   F-Norm            = %12.5e\n",
             s_min->fnorm/sqrt(state_global->natoms));
      fprintf(fplog,"\n");
-  }  
-  /* Start the loop over CG steps.             
+  }
+  /* Start the loop over CG steps.
     * Each successful step is counted, and we continue until
     * we either converge or reach the max number of steps.
     */
    converged = FALSE;
    for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged;step++) {
-    
-    /* start taking steps in a new direction 
-     * First time we enter the routine, beta=0, and the direction is 
+
+    /* start taking steps in a new direction
+     * First time we enter the routine, beta=0, and the direction is
       * simply the negative gradient.
       */
  
@@ -987,7 +1003,7 @@ double do_cg(FILE *fplog,t_commrec *cr,
      gpa = 0;
      gf = 0;
      for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
-      if (mdatoms->cFREEZE) 
+      if (mdatoms->cFREEZE)
         gf = mdatoms->cFREEZE[i];
        for(m=0; m<DIM; m++) {
         if (!inputrec->opts.nFreeze[gf][m]) {
@@ -999,19 +1015,19 @@ double do_cg(FILE *fplog,t_commrec *cr,
         }
        }
      }
-    
+
      /* Sum the gradient along the line across CPUs */
      if (PAR(cr))
        gmx_sumd(1,&gpa,cr);
  
      /* Calculate the norm of the search vector */
      get_f_norm_max(cr,&(inputrec->opts),mdatoms,p,&pnorm,NULL,NULL);
-    
+
      /* Just in case stepsize reaches zero due to numerical precision... */
-    if(stepsize<=0)      
+    if(stepsize<=0)
        stepsize = inputrec->em_stepsize/pnorm;
-    
-    /* 
+
+    /*
       * Double check the value of the derivative in the search direction.
       * If it is positive it must be due to the old information in the
       * CG formula, so just remove that and start over with beta=0.
@@ -1046,15 +1062,15 @@ double do_cg(FILE *fplog,t_commrec *cr,
        converged=TRUE;
        break;
      }
-    
+
      /* Write coordinates if necessary */
      do_x = do_per_step(step,inputrec->nstxout);
      do_f = do_per_step(step,inputrec->nstfout);
-    
+
      write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
                    top_global,inputrec,step,
                    s_min,state_global,f_global);
-    
+
      /* Take a step downhill.
       * In theory, we should minimize the function along this direction.
       * That is quite possible, but it turns out to take 5-10 function evaluations
@@ -1067,15 +1083,15 @@ double do_cg(FILE *fplog,t_commrec *cr,
       * the continue straight to the next CG step without trying to find any minimum.
       * If it didn't work (higher energy), there must be a minimum somewhere between
       * the old position and the new one.
-     * 
+     *
       * Due to the finite numerical accuracy, it turns out that it is a good idea
       * to even accept a SMALL increase in energy, if the derivative is still downhill.
-     * This leads to lower final energies in the tests I've done. / Erik 
+     * This leads to lower final energies in the tests I've done. / Erik
       */
      s_a->epot = s_min->epot;
      a = 0.0;
      c = a + stepsize; /* reference position along line is zero */
-    
+
      if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count) {
        em_dd_partition_system(fplog,step,cr,top_global,inputrec,
                              s_min,top,mdatoms,fr,vsite,constr,
@@ -1083,9 +1099,9 @@ double do_cg(FILE *fplog,t_commrec *cr,
      }
  
      /* Take a trial step (new coords in s_c) */
-    do_em_step(cr,inputrec,mdatoms,s_min,c,s_min->s.cg_p,s_c,
-              constr,top,nrnb,wcycle,-1);
-    
+    do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,c,s_min->s.cg_p,s_c,
+               constr,top,nrnb,wcycle,-1);
+
      neval++;
      /* Calculate energy for the trial step */
      evaluate_energy(fplog,bVerbose,cr,
@@ -1093,13 +1109,13 @@ double do_cg(FILE *fplog,t_commrec *cr,
                     inputrec,nrnb,wcycle,gstat,
                     vsite,constr,fcd,graph,mdatoms,fr,
                     mu_tot,enerd,vir,pres,-1,FALSE);
-    
+
      /* Calc derivative along line */
      p  = s_c->s.cg_p;
      sf = s_c->f;
      gpc=0;
      for(i=mdatoms->start; i<mdatoms->start+mdatoms->homenr; i++) {
-      for(m=0; m<DIM; m++) 
+      for(m=0; m<DIM; m++)
           gpc -= p[i][m]*sf[i][m];  /* f is negative gradient, thus the sign */
      }
      /* Sum the gradient along the line across CPUs */
@@ -1127,11 +1143,11 @@ double do_cg(FILE *fplog,t_commrec *cr,
         */
        foundlower = FALSE;
        stepsize *= 0.618034;
-    }    
+    }
+
  
  
  
-    
      /* OK, if we didn't find a lower value we will have to locate one now - there must
       * be one in the interval [a=0,c].
       * The same thing is valid here, though: Don't spend dozens of iterations to find
@@ -1154,14 +1170,14 @@ double do_cg(FILE *fplog,t_commrec *cr,
         if(gpa<0 && gpc>0)
           b = a + gpa*(a-c)/(gpc-gpa);
         else
-         b = 0.5*(a+c);                
-       
+         b = 0.5*(a+c);
+
         /* safeguard if interpolation close to machine accuracy causes errors:
          * never go outside the interval
          */
         if(b<=a || b>=c)
           b = 0.5*(a+c);
-       
+
         if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count) {
           /* Reload the old state */
           em_dd_partition_system(fplog,-1,cr,top_global,inputrec,
@@ -1170,9 +1186,9 @@ double do_cg(FILE *fplog,t_commrec *cr,
         }
  
         /* Take a trial step to this new point - new coords in s_b */
-       do_em_step(cr,inputrec,mdatoms,s_min,b,s_min->s.cg_p,s_b,
-                  constr,top,nrnb,wcycle,-1);
-       
+       do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,s_min,b,s_min->s.cg_p,s_b,
+               constr,top,nrnb,wcycle,-1);
+
         neval++;
         /* Calculate energy for the trial step */
         evaluate_energy(fplog,bVerbose,cr,
@@ -1180,7 +1196,7 @@ double do_cg(FILE *fplog,t_commrec *cr,
                         inputrec,nrnb,wcycle,gstat,
                         vsite,constr,fcd,graph,mdatoms,fr,
                         mu_tot,enerd,vir,pres,-1,FALSE);
-       
+
         /* p does not change within a step, but since the domain decomposition
          * might change, we have to use cg_p of s_b here.
          */
@@ -1194,13 +1210,13 @@ double do_cg(FILE *fplog,t_commrec *cr,
         /* Sum the gradient along the line across CPUs */
         if (PAR(cr))
           gmx_sumd(1,&gpb,cr);
-       
+
         if (debug)
           fprintf(debug,"CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
                   s_a->epot,s_b->epot,s_c->epot,gpb);
  
         epot_repl = s_b->epot;
-       
+
         /* Keep one of the intervals based on the value of the derivative at the new point */
         if (gpb > 0) {
           /* Replace c endpoint with b */
@@ -1213,15 +1229,15 @@ double do_cg(FILE *fplog,t_commrec *cr,
           a = b;
           gpa = gpb;
         }
-       
-       /* 
+
+       /*
          * Stop search as soon as we find a value smaller than the endpoints.
          * Never run more than 20 steps, no matter what.
          */
         nminstep++;
        } while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
-              (nminstep < 20));     
-      
+              (nminstep < 20));
+
        if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
           nminstep >= 20) {
         /* OK. We couldn't find a significantly lower energy.
@@ -1238,7 +1254,7 @@ double do_cg(FILE *fplog,t_commrec *cr,
           continue;
         }
        }
-      
+
        /* Select min energy state of A & C, put the best in B.
         */
        if (s_c->epot < s_a->epot) {
@@ -1256,7 +1272,7 @@ double do_cg(FILE *fplog,t_commrec *cr,
         gpb = gpa;
         b = a;
        }
-      
+
      } else {
        if (debug)
         fprintf(debug,"CGE: Found a lower energy %f, moving C to B\n",
@@ -1265,10 +1281,10 @@ double do_cg(FILE *fplog,t_commrec *cr,
        gpb = gpc;
        b = c;
      }
-    
+
      /* new search direction */
      /* beta = 0 means forget all memory and restart with steepest descents. */
-    if (nstcg && ((step % nstcg)==0)) 
+    if (nstcg && ((step % nstcg)==0))
        beta = 0.0;
      else {
        /* s_min->fnorm cannot be zero, because then we would have converged
@@ -1283,12 +1299,12 @@ double do_cg(FILE *fplog,t_commrec *cr,
      /* Limit beta to prevent oscillations */
      if (fabs(beta) > 5.0)
        beta = 0.0;
-    
-    
+
+
      /* update positions */
      swap_em_state(s_min,s_b);
      gpa = gpb;
-    
+
      /* Print it if necessary */
      if (MASTER(cr)) {
        if(bVerbose)
@@ -1297,27 +1313,28 @@ double do_cg(FILE *fplog,t_commrec *cr,
                 s_min->fmax,s_min->a_fmax+1);
        /* Store the new (lower) energies */
        upd_mdebin(mdebin,FALSE,FALSE,(double)step,
-                mdatoms->tmass,enerd,&s_min->s,s_min->s.box,
-                NULL,NULL,vir,pres,NULL,mu_tot,constr);
+                 mdatoms->tmass,enerd,&s_min->s,inputrec->fepvals,inputrec->expandedvals,s_min->s.box,
+                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
+
        do_log = do_per_step(step,inputrec->nstlog);
        do_ene = do_per_step(step,inputrec->nstenergy);
        if(do_log)
-       print_ebin_header(fplog,step,step,s_min->s.lambda);
+          print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
        print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
                  do_log ? fplog : NULL,step,step,eprNORMAL,
                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
      }
-    
+
      /* Stop when the maximum force lies below tolerance.
       * If we have reached machine precision, converged is already set to true.
-     */        
+     */
      converged = converged || (s_min->fmax < inputrec->em_tol);
-    
+
    } /* End of the loop */
-  
-  if (converged)       
+
+  if (converged)
      step--; /* we never took that last step in this case */
-  
+
      if (s_min->fmax > inputrec->em_tol)
      {
          if (MASTER(cr))
@@ -1325,16 +1342,16 @@ double do_cg(FILE *fplog,t_commrec *cr,
              warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
              warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
          }
-        converged = FALSE; 
+        converged = FALSE;
      }
-  
+
    if (MASTER(cr)) {
      /* If we printed energy and/or logfile last step (which was the last step)
       * we don't have to do it again, but otherwise print the final values.
       */
      if(!do_log) {
        /* Write final value to log since we didn't do anything the last step */
-      print_ebin_header(fplog,step,step,s_min->s.lambda);
+      print_ebin_header(fplog,step,step,s_min->s.lambda[efptFEP]);
      }
      if (!do_ene || !do_log) {
        /* Write final energy file entries */
@@ -1347,34 +1364,34 @@ double do_cg(FILE *fplog,t_commrec *cr,
    /* Print some stuff... */
    if (MASTER(cr))
      fprintf(stderr,"\nwriting lowest energy coordinates.\n");
-  
+
    /* IMPORTANT!
     * For accurate normal mode calculation it is imperative that we
     * store the last conformation into the full precision binary trajectory.
     *
     * However, we should only do it if we did NOT already write this step
     * above (which we did if do_x or do_f was true).
-   */  
+   */
    do_x = !do_per_step(step,inputrec->nstxout);
    do_f = (inputrec->nstfout > 0 && !do_per_step(step,inputrec->nstfout));
-  
+
    write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
                  top_global,inputrec,step,
                  s_min,state_global,f_global);
-  
+
    fnormn = s_min->fnorm/sqrt(state_global->natoms);
-  
+
    if (MASTER(cr)) {
      print_converged(stderr,CG,inputrec->em_tol,step,converged,number_steps,
                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
      print_converged(fplog,CG,inputrec->em_tol,step,converged,number_steps,
                     s_min->epot,s_min->fmax,s_min->a_fmax,fnormn);
-    
+
      fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
    }
-  
+
    finish_em(fplog,cr,outf,runtime,wcycle);
-  
+
    /* To print the actual number of steps we needed somewhere */
    runtime->nsteps_done = step;
  
@@ -1395,7 +1412,8 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
                  t_nrnb *nrnb,gmx_wallcycle_t wcycle,
                  gmx_edsam_t ed,
                  t_forcerec *fr,
-                int repl_ex_nst,int repl_ex_seed,
+                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+                gmx_membed_t membed,
                  real cpt_period,real max_hours,
                  const char *deviceOptions,
                  unsigned long Flags,
@@ -1411,7 +1429,7 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
    rvec   *f_global;
    int    ncorr,nmaxcorr,point,cp,neval,nminstep;
    double stepsize,gpa,gpb,gpc,tmp,minstep;
-  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;    
+  real   *rho,*alpha,*ff,*xx,*p,*s,*lastx,*lastf,**dx,**dg;
    real   *xa,*xb,*xc,*fa,*fb,*fc,*xtmp,*ftmp;
    real   a,b,c,maxdelta,delta;
    real   diag,Epot0,Epot,EpotA,EpotB,EpotC;
@@ -1439,7 +1457,7 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
  
    n = 3*state->natoms;
    nmaxcorr = inputrec->nbfgscorr;
-  
+
    /* Allocate memory */
    /* Use pointers to real so we dont have to loop over both atoms and
     * dimensions all the time...
@@ -1454,22 +1472,22 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
    snew(fc,n);
    snew(frozen,n);
  
-  snew(p,n); 
-  snew(lastx,n); 
-  snew(lastf,n); 
+  snew(p,n);
+  snew(lastx,n);
+  snew(lastf,n);
    snew(rho,nmaxcorr);
    snew(alpha,nmaxcorr);
-  
+
    snew(dx,nmaxcorr);
    for(i=0;i<nmaxcorr;i++)
      snew(dx[i],n);
-  
+
    snew(dg,nmaxcorr);
    for(i=0;i<nmaxcorr;i++)
      snew(dg[i],n);
  
    step = 0;
-  neval = 0; 
+  neval = 0;
  
    /* Init em */
    init_em(fplog,LBFGS,cr,inputrec,
@@ -1487,12 +1505,12 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
  
    start = mdatoms->start;
    end   = mdatoms->homenr + start;
-    
+
    /* Print to log file */
    print_em_start(fplog,cr,runtime,wcycle,LBFGS);
-  
+
    do_log = do_ene = do_x = do_f = TRUE;
-  
+
    /* Max number of steps */
    number_steps=inputrec->nsteps;
  
@@ -1501,19 +1519,19 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
    for(i=start; i<end; i++) {
      if (mdatoms->cFREEZE)
        gf = mdatoms->cFREEZE[i];
-     for(m=0; m<DIM; m++) 
-       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];  
+     for(m=0; m<DIM; m++)
+       frozen[3*i+m]=inputrec->opts.nFreeze[gf][m];
    }
    if (MASTER(cr))
      sp_header(stderr,LBFGS,inputrec->em_tol,number_steps);
    if (fplog)
      sp_header(fplog,LBFGS,inputrec->em_tol,number_steps);
-  
+
    if (vsite)
      construct_vsites(fplog,vsite,state->x,nrnb,1,NULL,
                      top->idef.iparams,top->idef.il,
                      fr->ePBC,fr->bMolPBC,graph,cr,state->box);
-  
+
    /* Call the force routine and some auxiliary (neighboursearching etc.) */
    /* do_force always puts the charge groups in the box and shifts again
     * We do not unshift, so molecules are always whole
@@ -1527,32 +1545,32 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
                   vsite,constr,fcd,graph,mdatoms,fr,
                   mu_tot,enerd,vir,pres,-1,TRUE);
    where();
-       
+
    if (MASTER(cr)) {
      /* Copy stuff to the energy bin for easy printing etc. */
      upd_mdebin(mdebin,FALSE,FALSE,(double)step,
-              mdatoms->tmass,enerd,state,state->box,
-              NULL,NULL,vir,pres,NULL,mu_tot,constr);
-    
-    print_ebin_header(fplog,step,step,state->lambda);
+               mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
+               NULL,NULL,vir,pres,NULL,mu_tot,constr);
+
+    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
      print_ebin(outf->fp_ene,TRUE,FALSE,FALSE,fplog,step,step,eprNORMAL,
                 TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
    }
    where();
-  
+
    /* This is the starting energy */
    Epot = enerd->term[F_EPOT];
-  
+
    fnorm = ems.fnorm;
    fmax  = ems.fmax;
    nfmax = ems.a_fmax;
-  
+
    /* Set the initial step.
-   * since it will be multiplied by the non-normalized search direction 
+   * since it will be multiplied by the non-normalized search direction
     * vector (force vector the first time), we scale it by the
     * norm of the force.
     */
-  
+
    if (MASTER(cr)) {
      fprintf(stderr,"Using %d BFGS correction steps.\n\n",nmaxcorr);
      fprintf(stderr,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
@@ -1563,8 +1581,8 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
      fprintf(fplog,"   F-max             = %12.5e on atom %d\n",fmax,nfmax+1);
      fprintf(fplog,"   F-Norm            = %12.5e\n",fnorm/sqrt(state->natoms));
      fprintf(fplog,"\n");
-  }   
-  
+  }
+
    point=0;
    for(i=0;i<n;i++)
      if(!frozen[i])
@@ -1574,22 +1592,22 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
  
    stepsize = 1.0/fnorm;
    converged = FALSE;
-  
-  /* Start the loop over BFGS steps.           
+
+  /* Start the loop over BFGS steps.
     * Each successful step is counted, and we continue until
     * we either converge or reach the max number of steps.
     */
-  
+
    ncorr=0;
  
    /* Set the gradient from the force */
    converged = FALSE;
    for(step=0; (number_steps<0 || (number_steps>=0 && step<=number_steps)) && !converged; step++) {
-    
+
      /* Write coordinates if necessary */
      do_x = do_per_step(step,inputrec->nstxout);
      do_f = do_per_step(step,inputrec->nstfout);
-    
+
      mdof_flags = 0;
      if (do_x)
      {
@@ -1605,16 +1623,16 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
                 top_global,step,(real)step,state,state,f,f,NULL,NULL);
  
      /* Do the linesearching in the direction dx[point][0..(n-1)] */
-    
+
      /* pointer to current direction - point=0 first time here */
      s=dx[point];
-    
+
      /* calculate line gradient */
-    for(gpa=0,i=0;i<n;i++) 
+    for(gpa=0,i=0;i<n;i++)
         gpa-=s[i]*ff[i];
  
-    /* Calculate minimum allowed stepsize, before the average (norm) 
-     * relative change in coordinate is smaller than precision 
+    /* Calculate minimum allowed stepsize, before the average (norm)
+     * relative change in coordinate is smaller than precision
       */
      for(minstep=0,i=0;i<n;i++) {
        tmp=fabs(xx[i]);
@@ -1624,24 +1642,24 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
        minstep += tmp*tmp;
      }
      minstep = GMX_REAL_EPS/sqrt(minstep/n);
-    
+
      if(stepsize<minstep) {
        converged=TRUE;
        break;
      }
-    
+
      /* Store old forces and coordinates */
      for(i=0;i<n;i++) {
        lastx[i]=xx[i];
        lastf[i]=ff[i];
      }
      Epot0=Epot;
-    
+
      first=TRUE;
-    
+
      for(i=0;i<n;i++)
        xa[i]=xx[i];
-    
+
      /* Take a step downhill.
       * In theory, we should minimize the function along this direction.
       * That is quite possible, but it turns out to take 5-10 function evaluations
@@ -1654,17 +1672,17 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
       * the continue straight to the next BFGS step without trying to find any minimum.
       * If it didn't work (higher energy), there must be a minimum somewhere between
       * the old position and the new one.
-     * 
+     *
       * Due to the finite numerical accuracy, it turns out that it is a good idea
       * to even accept a SMALL increase in energy, if the derivative is still downhill.
-     * This leads to lower final energies in the tests I've done. / Erik 
+     * This leads to lower final energies in the tests I've done. / Erik
       */
      foundlower=FALSE;
      EpotA = Epot0;
      a = 0.0;
      c = a + stepsize; /* reference position along line is zero */
  
-    /* Check stepsize first. We do not allow displacements 
+    /* Check stepsize first. We do not allow displacements
       * larger than emstep.
       */
      do {
@@ -1682,7 +1700,7 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
      /* Take a trial step */
      for (i=0; i<n; i++)
        xc[i] = lastx[i] + c*s[i];
-    
+
      neval++;
      /* Calculate energy for the trial step */
      ems.s.x = (rvec *)xc;
@@ -1693,7 +1711,7 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
                     vsite,constr,fcd,graph,mdatoms,fr,
                     mu_tot,enerd,vir,pres,step,FALSE);
      EpotC = ems.epot;
-    
+
      /* Calc derivative along line */
      for(gpc=0,i=0; i<n; i++) {
         gpc -= s[i]*fc[i];   /* f is negative gradient, thus the sign */
@@ -1701,10 +1719,10 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
      /* Sum the gradient along the line across CPUs */
      if (PAR(cr))
        gmx_sumd(1,&gpc,cr);
-    
+
       /* This is the max amount of increase in energy we tolerate */
     tmp=sqrt(GMX_REAL_EPS)*fabs(EpotA);
-    
+
      /* Accept the step if the energy is lower, or if it is not significantly higher
       * and the line derivative is still negative.
       */
@@ -1723,8 +1741,8 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
         */
        foundlower = FALSE;
        stepsize *= 0.618034;
-    }    
-    
+    }
+
      /* OK, if we didn't find a lower value we will have to locate one now - there must
       * be one in the interval [a=0,c].
       * The same thing is valid here, though: Don't spend dozens of iterations to find
@@ -1738,29 +1756,29 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
       */
  
      if(!foundlower) {
-     
+
        nminstep=0;
        do {
         /* Select a new trial point.
          * If the derivatives at points a & c have different sign we interpolate to zero,
          * otherwise just do a bisection.
          */
-       
+
         if(gpa<0 && gpc>0)
           b = a + gpa*(a-c)/(gpc-gpa);
         else
-         b = 0.5*(a+c);                
-       
+         b = 0.5*(a+c);
+
         /* safeguard if interpolation close to machine accuracy causes errors:
          * never go outside the interval
          */
         if(b<=a || b>=c)
           b = 0.5*(a+c);
-       
+
         /* Take a trial step */
-       for (i=0; i<n; i++) 
+       for (i=0; i<n; i++)
           xb[i] = lastx[i] + b*s[i];
-       
+
         neval++;
         /* Calculate energy for the trial step */
         ems.s.x = (rvec *)xb;
@@ -1771,16 +1789,16 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
                         vsite,constr,fcd,graph,mdatoms,fr,
                         mu_tot,enerd,vir,pres,step,FALSE);
         EpotB = ems.epot;
-       
+
         fnorm = ems.fnorm;
-       
-       for(gpb=0,i=0; i<n; i++) 
+
+       for(gpb=0,i=0; i<n; i++)
           gpb -= s[i]*fb[i];   /* f is negative gradient, thus the sign */
-       
+
         /* Sum the gradient along the line across CPUs */
         if (PAR(cr))
           gmx_sumd(1,&gpb,cr);
-       
+
         /* Keep one of the intervals based on the value of the derivative at the new point */
         if(gpb>0) {
           /* Replace c endpoint with b */
@@ -1788,9 +1806,9 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
           c = b;
           gpc = gpb;
           /* swap coord pointers b/c */
-         xtmp = xb; 
+         xtmp = xb;
           ftmp = fb;
-         xb = xc; 
+         xb = xc;
           fb = fc;
           xc = xtmp;
           fc = ftmp;
@@ -1800,20 +1818,20 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
           a = b;
           gpa = gpb;
           /* swap coord pointers a/b */
-         xtmp = xb; 
+         xtmp = xb;
           ftmp = fb;
-         xb = xa; 
+         xb = xa;
           fb = fa;
-         xa = xtmp; 
+         xa = xtmp;
           fa = ftmp;
         }
-       
-       /* 
+
+       /*
          * Stop search as soon as we find a value smaller than the endpoints,
          * or if the tolerance is below machine precision.
          * Never run more than 20 steps, no matter what.
          */
-       nminstep++; 
+       nminstep++;
        } while((EpotB>EpotA || EpotB>EpotC) && (nminstep<20));
  
        if(fabs(EpotB-Epot0)<GMX_REAL_EPS || nminstep>=20) {
@@ -1836,7 +1854,7 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
           continue;
         }
        }
-      
+
        /* Select min energy state of A & C, put the best in xx/ff/Epot
         */
        if(EpotC<EpotA) {
@@ -1856,7 +1874,7 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
         }
         stepsize=a;
        }
-      
+
      } else {
        /* found lower */
        Epot = EpotC;
@@ -1868,11 +1886,11 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
        stepsize=c;
      }
  
-    /* Update the memory information, and calculate a new 
-     * approximation of the inverse hessian 
+    /* Update the memory information, and calculate a new
+     * approximation of the inverse hessian
       */
-    
-    /* Have new data in Epot, xx, ff */        
+
+    /* Have new data in Epot, xx, ff */
      if(ncorr<nmaxcorr)
        ncorr++;
  
@@ -1880,64 +1898,64 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
        dg[point][i]=lastf[i]-ff[i];
        dx[point][i]*=stepsize;
      }
-    
+
      dgdg=0;
-    dgdx=0;    
+    dgdx=0;
      for(i=0;i<n;i++) {
        dgdg+=dg[point][i]*dg[point][i];
        dgdx+=dg[point][i]*dx[point][i];
      }
-    
+
      diag=dgdx/dgdg;
-    
+
      rho[point]=1.0/dgdx;
      point++;
-    
+
      if(point>=nmaxcorr)
        point=0;
-    
+
      /* Update */
      for(i=0;i<n;i++)
        p[i]=ff[i];
-    
+
      cp=point;
-    
+
      /* Recursive update. First go back over the memory points */
      for(k=0;k<ncorr;k++) {
        cp--;
-      if(cp<0) 
+      if(cp<0)
         cp=ncorr-1;
-      
+
        sq=0;
        for(i=0;i<n;i++)
         sq+=dx[cp][i]*p[i];
-      
+
        alpha[cp]=rho[cp]*sq;
-      
+
        for(i=0;i<n;i++)
-       p[i] -= alpha[cp]*dg[cp][i];            
+       p[i] -= alpha[cp]*dg[cp][i];
      }
-    
+
      for(i=0;i<n;i++)
        p[i] *= diag;
-    
+
      /* And then go forward again */
      for(k=0;k<ncorr;k++) {
        yr = 0;
        for(i=0;i<n;i++)
         yr += p[i]*dg[cp][i];
-      
-      beta = rho[cp]*yr;           
+
+      beta = rho[cp]*yr;
        beta = alpha[cp]-beta;
-      
+
        for(i=0;i<n;i++)
         p[i] += beta*dx[cp][i];
-      
-      cp++;    
+
+      cp++;
        if(cp>=ncorr)
         cp=0;
      }
-    
+
      for(i=0;i<n;i++)
        if(!frozen[i])
         dx[point][i] = p[i];
@@ -1945,10 +1963,10 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
         dx[point][i] = 0;
  
      stepsize=1.0;
-    
+
      /* Test whether the convergence criterion is met */
      get_f_norm_max(cr,&(inputrec->opts),mdatoms,f,&fnorm,&fmax,&nfmax);
-    
+
      /* Print it if necessary */
      if (MASTER(cr)) {
        if(bVerbose)
@@ -1956,28 +1974,28 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
                 step,Epot,fnorm/sqrt(state->natoms),fmax,nfmax+1);
        /* Store the new (lower) energies */
        upd_mdebin(mdebin,FALSE,FALSE,(double)step,
-                mdatoms->tmass,enerd,state,state->box,
-                NULL,NULL,vir,pres,NULL,mu_tot,constr);
+                 mdatoms->tmass,enerd,state,inputrec->fepvals,inputrec->expandedvals,state->box,
+                 NULL,NULL,vir,pres,NULL,mu_tot,constr);
        do_log = do_per_step(step,inputrec->nstlog);
        do_ene = do_per_step(step,inputrec->nstenergy);
        if(do_log)
-       print_ebin_header(fplog,step,step,state->lambda);
+          print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
        print_ebin(outf->fp_ene,do_ene,FALSE,FALSE,
                  do_log ? fplog : NULL,step,step,eprNORMAL,
                  TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
      }
-    
+
      /* Stop when the maximum force lies below tolerance.
       * If we have reached machine precision, converged is already set to true.
       */
-    
+
      converged = converged || (fmax < inputrec->em_tol);
-    
+
    } /* End of the loop */
-  
-  if(converged)        
+
+  if(converged)
      step--; /* we never took that last step in this case */
-  
+
      if(fmax>inputrec->em_tol)
      {
          if (MASTER(cr))
@@ -1985,45 +2003,45 @@ double do_lbfgs(FILE *fplog,t_commrec *cr,
              warn_step(stderr,inputrec->em_tol,step-1==number_steps,FALSE);
              warn_step(fplog ,inputrec->em_tol,step-1==number_steps,FALSE);
          }
-        converged = FALSE; 
+        converged = FALSE;
      }
-  
+
    /* If we printed energy and/or logfile last step (which was the last step)
     * we don't have to do it again, but otherwise print the final values.
     */
    if(!do_log) /* Write final value to log since we didn't do anythin last step */
-    print_ebin_header(fplog,step,step,state->lambda);
+    print_ebin_header(fplog,step,step,state->lambda[efptFEP]);
    if(!do_ene || !do_log) /* Write final energy file entries */
      print_ebin(outf->fp_ene,!do_ene,FALSE,FALSE,
                !do_log ? fplog : NULL,step,step,eprNORMAL,
                TRUE,mdebin,fcd,&(top_global->groups),&(inputrec->opts));
-  
+
    /* Print some stuff... */
    if (MASTER(cr))
      fprintf(stderr,"\nwriting lowest energy coordinates.\n");
-  
+
    /* IMPORTANT!
     * For accurate normal mode calculation it is imperative that we
     * store the last conformation into the full precision binary trajectory.
     *
     * However, we should only do it if we did NOT already write this step
     * above (which we did if do_x or do_f was true).
-   */  
+   */
    do_x = !do_per_step(step,inputrec->nstxout);
    do_f = !do_per_step(step,inputrec->nstfout);
    write_em_traj(fplog,cr,outf,do_x,do_f,ftp2fn(efSTO,nfile,fnm),
                  top_global,inputrec,step,
                  &ems,state,f);
-  
+
    if (MASTER(cr)) {
      print_converged(stderr,LBFGS,inputrec->em_tol,step,converged,
                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
      print_converged(fplog,LBFGS,inputrec->em_tol,step,converged,
                     number_steps,Epot,fmax,nfmax,fnorm/sqrt(state->natoms));
-    
+
      fprintf(fplog,"\nPerformed %d energy evaluations in total.\n",neval);
    }
-  
+
    finish_em(fplog,cr,outf,runtime,wcycle);
  
    /* To print the actual number of steps we needed somewhere */
@@ -2046,12 +2064,13 @@ double do_steep(FILE *fplog,t_commrec *cr,
                  t_nrnb *nrnb,gmx_wallcycle_t wcycle,
                  gmx_edsam_t ed,
                  t_forcerec *fr,
-                int repl_ex_nst,int repl_ex_seed,
+                int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+                gmx_membed_t membed,
                  real cpt_period,real max_hours,
                  const char *deviceOptions,
                  unsigned long Flags,
                  gmx_runtime_t *runtime)
-{ 
+{
    const char *SD="Steepest Descents";
    em_state_t *s_min,*s_try;
    rvec       *f_global;
@@ -2063,13 +2082,13 @@ double do_steep(FILE *fplog,t_commrec *cr,
    real   stepsize,constepsize;
    real   ustep,dvdlambda,fnormn;
    gmx_mdoutf_t *outf;
-  t_mdebin   *mdebin; 
-  gmx_bool   bDone,bAbort,do_x,do_f; 
-  tensor vir,pres; 
+  t_mdebin   *mdebin;
+  gmx_bool   bDone,bAbort,do_x,do_f;
+  tensor vir,pres;
    rvec   mu_tot;
    int    nsteps;
-  int    count=0; 
-  int    steps_accepted=0; 
+  int    count=0;
+  int    steps_accepted=0;
    /* not used */
    real   terminate=0;
  
@@ -2081,27 +2100,27 @@ double do_steep(FILE *fplog,t_commrec *cr,
            state_global,top_global,s_try,&top,&f,&f_global,
            nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
            nfile,fnm,&outf,&mdebin);
-       
+
    /* Print to log file  */
    print_em_start(fplog,cr,runtime,wcycle,SD);
-    
-  /* Set variables for stepsize (in nm). This is the largest  
-   * step that we are going to make in any direction. 
+
+  /* Set variables for stepsize (in nm). This is the largest
+   * step that we are going to make in any direction.
     */
-  ustep = inputrec->em_stepsize; 
+  ustep = inputrec->em_stepsize;
    stepsize = 0;
-  
+
    /* Max number of steps  */
-  nsteps = inputrec->nsteps; 
-  
-  if (MASTER(cr)) 
+  nsteps = inputrec->nsteps;
+
+  if (MASTER(cr))
      /* Print to the screen  */
      sp_header(stderr,SD,inputrec->em_tol,nsteps);
    if (fplog)
      sp_header(fplog,SD,inputrec->em_tol,nsteps);
-    
+
    /**** HERE STARTS THE LOOP ****
-   * count is the counter for the number of steps 
+   * count is the counter for the number of steps
     * bDone will be TRUE when the minimization has converged
     * bAbort will be TRUE when nsteps steps have been performed or when
     * the stepsize becomes smaller than is reasonable for machine precision
@@ -2111,25 +2130,26 @@ double do_steep(FILE *fplog,t_commrec *cr,
    bAbort = FALSE;
    while( !bDone && !bAbort ) {
      bAbort = (nsteps >= 0) && (count == nsteps);
-    
+
      /* set new coordinates, except for first step */
      if (count > 0) {
-      do_em_step(cr,inputrec,mdatoms,s_min,stepsize,s_min->f,s_try,
-                constr,top,nrnb,wcycle,count);
+        do_em_step(cr,inputrec,mdatoms,fr->bMolPBC,
+                   s_min,stepsize,s_min->f,s_try,
+                   constr,top,nrnb,wcycle,count);
      }
-    
+
      evaluate_energy(fplog,bVerbose,cr,
                     state_global,top_global,s_try,top,
                     inputrec,nrnb,wcycle,gstat,
                     vsite,constr,fcd,graph,mdatoms,fr,
                     mu_tot,enerd,vir,pres,count,count==0);
-        
+
      if (MASTER(cr))
-      print_ebin_header(fplog,count,count,s_try->s.lambda);
+      print_ebin_header(fplog,count,count,s_try->s.lambda[efptFEP]);
  
      if (count == 0)
        s_min->epot = s_try->epot + 1;
-    
+
      /* Print it if necessary  */
      if (MASTER(cr)) {
        if (bVerbose) {
@@ -2137,12 +2157,12 @@ double do_steep(FILE *fplog,t_commrec *cr,
                 count,ustep,s_try->epot,s_try->fmax,s_try->a_fmax+1,
                 (s_try->epot < s_min->epot) ? '\n' : '\r');
        }
-      
+
        if (s_try->epot < s_min->epot) {
         /* Store the new (lower) energies  */
         upd_mdebin(mdebin,FALSE,FALSE,(double)count,
-                  mdatoms->tmass,enerd,&s_try->s,s_try->s.box,
-                  NULL,NULL,vir,pres,NULL,mu_tot,constr);
+                  mdatoms->tmass,enerd,&s_try->s,inputrec->fepvals,inputrec->expandedvals,
+                   s_try->s.box, NULL,NULL,vir,pres,NULL,mu_tot,constr);
         print_ebin(outf->fp_ene,TRUE,
                    do_per_step(steps_accepted,inputrec->nstdisreout),
                    do_per_step(steps_accepted,inputrec->nstorireout),
@@ -2150,21 +2170,21 @@ double do_steep(FILE *fplog,t_commrec *cr,
                    mdebin,fcd,&(top_global->groups),&(inputrec->opts));
         fflush(fplog);
        }
-    } 
-    
-    /* Now if the new energy is smaller than the previous...  
+    }
+
+    /* Now if the new energy is smaller than the previous...
       * or if this is the first step!
-     * or if we did random steps! 
+     * or if we did random steps!
       */
-    
+
      if ( (count==0) || (s_try->epot < s_min->epot) ) {
-      steps_accepted++; 
+      steps_accepted++;
  
        /* Test whether the convergence criterion is met...  */
        bDone = (s_try->fmax < inputrec->em_tol);
-      
+
        /* Copy the arrays for force, positions and energy  */
-      /* The 'Min' array always holds the coords and forces of the minimal 
+      /* The 'Min' array always holds the coords and forces of the minimal
          sampled energy  */
        swap_em_state(s_min,s_try);
        if (count > 0)
@@ -2176,7 +2196,7 @@ double do_steep(FILE *fplog,t_commrec *cr,
        write_em_traj(fplog,cr,outf,do_x,do_f,NULL,
                      top_global,inputrec,count,
                      s_min,state_global,f_global);
-    } 
+    }
      else {
        /* If energy is not smaller make the step smaller...  */
        ustep *= 0.5;
@@ -2188,10 +2208,10 @@ double do_steep(FILE *fplog,t_commrec *cr,
                                nrnb,wcycle);
        }
      }
-    
+
      /* Determine new step  */
      stepsize = ustep/s_min->fmax;
-    
+
      /* Check if stepsize is too small, with 1 nm as a characteristic length */
  #ifdef GMX_DOUBLE
          if (count == nsteps || ustep < 1e-12)
@@ -2206,13 +2226,13 @@ double do_steep(FILE *fplog,t_commrec *cr,
              }
              bAbort=TRUE;
          }
-    
+
      count++;
    } /* End of the loop  */
-  
+
      /* Print some shit...  */
-  if (MASTER(cr)) 
-    fprintf(stderr,"\nwriting lowest energy coordinates.\n"); 
+  if (MASTER(cr))
+    fprintf(stderr,"\nwriting lowest energy coordinates.\n");
    write_em_traj(fplog,cr,outf,TRUE,inputrec->nstfout,ftp2fn(efSTO,nfile,fnm),
                 top_global,inputrec,count,
                 s_min,state_global,f_global);
@@ -2227,12 +2247,12 @@ double do_steep(FILE *fplog,t_commrec *cr,
    }
  
    finish_em(fplog,cr,outf,runtime,wcycle);
-  
+
    /* To print the actual number of steps we needed somewhere */
    inputrec->nsteps=count;
  
    runtime->nsteps_done = count;
-  
+
    return 0;
  } /* That's all folks */
  
@@ -2250,7 +2270,8 @@ double do_nm(FILE *fplog,t_commrec *cr,
               t_nrnb *nrnb,gmx_wallcycle_t wcycle,
               gmx_edsam_t ed,
               t_forcerec *fr,
-             int repl_ex_nst,int repl_ex_seed,
+             int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+             gmx_membed_t membed,
               real cpt_period,real max_hours,
               const char *deviceOptions,
               unsigned long Flags,
@@ -2266,7 +2287,7 @@ double do_nm(FILE *fplog,t_commrec *cr,
      rvec       *f;
      gmx_global_stat_t gstat;
      t_graph    *graph;
-    real       t,lambda;
+    real       t,t0,lambda,lam0;
      gmx_bool       bNS;
      tensor     vir,pres;
      rvec       mu_tot;
@@ -2276,31 +2297,31 @@ double do_nm(FILE *fplog,t_commrec *cr,
      gmx_sparsematrix_t * sparse_matrix = NULL;
      real *     full_matrix             = NULL;
      em_state_t *   state_work;
-       
+
      /* added with respect to mdrun */
      int        i,j,k,row,col;
      real       der_range=10.0*sqrt(GMX_REAL_EPS);
      real       x_min;
      real       fnorm,fmax;
-    
+
      if (constr != NULL)
      {
          gmx_fatal(FARGS,"Constraints present with Normal Mode Analysis, this combination is not supported");
      }
  
      state_work = init_em_state();
-    
+
      /* Init em and store the local state in state_minimum */
      init_em(fplog,NM,cr,inputrec,
              state_global,top_global,state_work,&top,
              &f,&f_global,
              nrnb,mu_tot,fr,&enerd,&graph,mdatoms,&gstat,vsite,constr,
              nfile,fnm,&outf,NULL);
-    
+
      natoms = top_global->natoms;
      snew(fneg,natoms);
      snew(dfdx,natoms);
-    
+
  #ifndef GMX_DOUBLE
      if (MASTER(cr))
      {
@@ -2311,11 +2332,11 @@ double do_nm(FILE *fplog,t_commrec *cr,
                  "      are fairly modest even if you recompile in double precision.\n\n");
      }
  #endif
-    
+
      /* Check if we can/should use sparse storage format.
       *
       * Sparse format is only useful when the Hessian itself is sparse, which it
-      * will be when we use a cutoff.    
+      * will be when we use a cutoff.
        * For small systems (n<1000) it is easier to always use full matrix format, though.
        */
      if(EEL_FULL(fr->eeltype) || fr->rlist==0.0)
@@ -2333,9 +2354,9 @@ double do_nm(FILE *fplog,t_commrec *cr,
          fprintf(stderr,"Using compressed symmetric sparse Hessian format.\n");
          bSparse = TRUE;
      }
-    
+
      sz = DIM*top_global->natoms;
-    
+
      fprintf(stderr,"Allocating Hessian memory...\n\n");
  
      if(bSparse)
@@ -2347,29 +2368,31 @@ double do_nm(FILE *fplog,t_commrec *cr,
      {
          snew(full_matrix,sz*sz);
      }
-    
+
      /* Initial values */
-    t      = inputrec->init_t;
-    lambda = inputrec->init_lambda;
-    
+    t0           = inputrec->init_t;
+    lam0         = inputrec->fepvals->init_lambda;
+    t            = t0;
+    lambda       = lam0;
+
      init_nrnb(nrnb);
-    
+
      where();
-    
+
      /* Write start time and temperature */
      print_em_start(fplog,cr,runtime,wcycle,NM);
  
      /* fudge nr of steps to nr of atoms */
      inputrec->nsteps = natoms*2;
  
-    if (MASTER(cr)) 
+    if (MASTER(cr))
      {
          fprintf(stderr,"starting normal mode calculation '%s'\n%d steps.\n\n",
                  *(top_global->name),(int)inputrec->nsteps);
      }
  
      nnodes = cr->nnodes;
-   
+
      /* Make evaluate_energy do a single node force calculation */
      cr->nnodes = 1;
      evaluate_energy(fplog,bVerbose,cr,
@@ -2385,7 +2408,7 @@ double do_nm(FILE *fplog,t_commrec *cr,
      if (MASTER(cr))
      {
          fprintf(stderr,"Maximum force:%12.5e\n",state_work->fmax);
-        if (state_work->fmax > 1.0e-3) 
+        if (state_work->fmax > 1.0e-3)
          {
              fprintf(stderr,"Maximum force probably not small enough to");
              fprintf(stderr," ensure that you are in an \nenergy well. ");
@@ -2393,26 +2416,26 @@ double do_nm(FILE *fplog,t_commrec *cr,
              fprintf(stderr," when the\nresulting matrix is diagonalized.\n");
          }
      }
-    
+
      /***********************************************************
       *
-     *      Loop over all pairs in matrix 
-     * 
-     *      do_force called twice. Once with positive and 
-     *      once with negative displacement 
+     *      Loop over all pairs in matrix
+     *
+     *      do_force called twice. Once with positive and
+     *      once with negative displacement
       *
       ************************************************************/
  
      /* Steps are divided one by one over the nodes */
-    for(atom=cr->nodeid; atom<natoms; atom+=nnodes) 
+    for(atom=cr->nodeid; atom<natoms; atom+=nnodes)
      {
-        
-        for (d=0; d<DIM; d++) 
+
+        for (d=0; d<DIM; d++)
          {
              x_min = state_work->s.x[atom][d];
  
              state_work->s.x[atom][d] = x_min - der_range;
-          
+
              /* Make evaluate_energy do a single node force calculation */
              cr->nnodes = 1;
              evaluate_energy(fplog,bVerbose,cr,
@@ -2420,14 +2443,14 @@ double do_nm(FILE *fplog,t_commrec *cr,
                              inputrec,nrnb,wcycle,gstat,
                              vsite,constr,fcd,graph,mdatoms,fr,
                              mu_tot,enerd,vir,pres,atom*2,FALSE);
-                       
+
              for(i=0; i<natoms; i++)
              {
                  copy_rvec(state_work->f[i], fneg[i]);
              }
-            
+
              state_work->s.x[atom][d] = x_min + der_range;
-            
+
              evaluate_energy(fplog,bVerbose,cr,
                              state_global,top_global,state_work,top,
                              inputrec,nrnb,wcycle,gstat,
@@ -2438,9 +2461,9 @@ double do_nm(FILE *fplog,t_commrec *cr,
              /* x is restored to original */
              state_work->s.x[atom][d] = x_min;
  
-            for(j=0; j<natoms; j++) 
+            for(j=0; j<natoms; j++)
              {
-                for (k=0; (k<DIM); k++) 
+                for (k=0; (k<DIM); k++)
                  {
                      dfdx[j][k] =
                          -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
@@ -2475,12 +2498,12 @@ double do_nm(FILE *fplog,t_commrec *cr,
  
                      row = (atom + node)*DIM + d;
  
-                    for(j=0; j<natoms; j++) 
+                    for(j=0; j<natoms; j++)
                      {
-                        for(k=0; k<DIM; k++) 
+                        for(k=0; k<DIM; k++)
                          {
                              col = j*DIM + k;
-                            
+
                              if (bSparse)
                              {
                                  if (col >= row && dfdx[j][k] != 0.0)
@@ -2497,22 +2520,22 @@ double do_nm(FILE *fplog,t_commrec *cr,
                      }
                  }
              }
-            
+
              if (bVerbose && fplog)
              {
-                fflush(fplog);            
+                fflush(fplog);
              }
          }
          /* write progress */
-        if (MASTER(cr) && bVerbose) 
+        if (MASTER(cr) && bVerbose)
          {
              fprintf(stderr,"\rFinished step %d out of %d",
-                    min(atom+nnodes,natoms),natoms); 
+                    min(atom+nnodes,natoms),natoms);
              fflush(stderr);
          }
      }
-    
-    if (MASTER(cr)) 
+
+    if (MASTER(cr))
      {
          fprintf(stderr,"\n\nWriting Hessian...\n");
          gmx_mtxio_write(ftp2fn(efMTX,nfile,fnm),sz,sz,full_matrix,sparse_matrix);
@@ -2521,6 +2544,7 @@ double do_nm(FILE *fplog,t_commrec *cr,
      finish_em(fplog,cr,outf,runtime,wcycle);
  
      runtime->nsteps_done = natoms*2;
-    
+
      return 0;
  }
+