Merge branch release-5-1 into release-2016

[alexxy/gromacs.git] / src / gromacs / mdlib / minimize.cpp
diff --git a/src/gromacs/mdlib/minimize.cpp b/src/gromacs/mdlib/minimize.cpp

index a6ffb5c793f445ef6779c929ac1548b8888f1648..2d5290d4b9502a9d1f900089717a5b1578c490ed 100644 (file)
--- a/src/gromacs/mdlib/minimize.cpp
+++ b/src/gromacs/mdlib/minimize.cpp
@@ -34,63 +34,86 @@
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
+/*! \internal \file
+ *
+ * \brief This file defines integrators for energy minimization
+ *
+ * \author Berk Hess <hess@kth.se>
+ * \author Erik Lindahl <erik@kth.se>
+ * \ingroup module_mdlib
+ */
  #include "gmxpre.h"
  
+#include "minimize.h"
+
  #include "config.h"
  
-#include <math.h>
-#include <string.h>
-#include <time.h>
+#include <cmath>
+#include <cstring>
+#include <ctime>
  
  #include <algorithm>
+#include <vector>
  
+#include "gromacs/commandline/filenm.h"
  #include "gromacs/domdec/domdec.h"
+#include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/ewald/pme.h"
  #include "gromacs/fileio/confio.h"
  #include "gromacs/fileio/mtxio.h"
-#include "gromacs/fileio/trajectory_writing.h"
+#include "gromacs/gmxlib/md_logging.h"
+#include "gromacs/gmxlib/network.h"
+#include "gromacs/gmxlib/nrnb.h"
  #include "gromacs/imd/imd.h"
-#include "gromacs/legacyheaders/constr.h"
-#include "gromacs/legacyheaders/force.h"
-#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/macros.h"
-#include "gromacs/legacyheaders/md_logging.h"
-#include "gromacs/legacyheaders/md_support.h"
-#include "gromacs/legacyheaders/mdatoms.h"
-#include "gromacs/legacyheaders/mdebin.h"
-#include "gromacs/legacyheaders/mdrun.h"
-#include "gromacs/legacyheaders/names.h"
-#include "gromacs/legacyheaders/network.h"
-#include "gromacs/legacyheaders/nrnb.h"
-#include "gromacs/legacyheaders/ns.h"
-#include "gromacs/legacyheaders/sim_util.h"
-#include "gromacs/legacyheaders/tgroup.h"
-#include "gromacs/legacyheaders/txtdump.h"
-#include "gromacs/legacyheaders/typedefs.h"
-#include "gromacs/legacyheaders/update.h"
-#include "gromacs/legacyheaders/vsite.h"
-#include "gromacs/legacyheaders/types/commrec.h"
  #include "gromacs/linearalgebra/sparsematrix.h"
  #include "gromacs/listed-forces/manage-threading.h"
+#include "gromacs/math/functions.h"
  #include "gromacs/math/vec.h"
+#include "gromacs/mdlib/constr.h"
+#include "gromacs/mdlib/force.h"
+#include "gromacs/mdlib/forcerec.h"
+#include "gromacs/mdlib/gmx_omp_nthreads.h"
+#include "gromacs/mdlib/md_support.h"
+#include "gromacs/mdlib/mdatoms.h"
+#include "gromacs/mdlib/mdebin.h"
+#include "gromacs/mdlib/mdrun.h"
+#include "gromacs/mdlib/ns.h"
+#include "gromacs/mdlib/shellfc.h"
+#include "gromacs/mdlib/sim_util.h"
+#include "gromacs/mdlib/tgroup.h"
+#include "gromacs/mdlib/trajectory_writing.h"
+#include "gromacs/mdlib/update.h"
+#include "gromacs/mdlib/vsite.h"
+#include "gromacs/mdtypes/commrec.h"
+#include "gromacs/mdtypes/inputrec.h"
+#include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/timing/walltime_accounting.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/utility/cstringutil.h"
+#include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/smalloc.h"
  
+//! Utility structure for manipulating states during EM
  typedef struct {
+    //! Copy of the global state
      t_state  s;
+    //! Force array
      rvec    *f;
+    //! Potential energy
      real     epot;
+    //! Norm of the force
      real     fnorm;
+    //! Maximum force
      real     fmax;
+    //! Direction
      int      a_fmax;
  } em_state_t;
  
+//! Initiate em_state_t structure and return pointer to it
  static em_state_t *init_em_state()
  {
      em_state_t *ems;
@@ -103,6 +126,7 @@ static em_state_t *init_em_state()
      return ems;
  }
  
+//! Print the EM starting conditions
  static void print_em_start(FILE                     *fplog,
                             t_commrec                *cr,
                             gmx_walltime_accounting_t walltime_accounting,
@@ -113,6 +137,8 @@ static void print_em_start(FILE                     *fplog,
      wallcycle_start(wcycle, ewcRUN);
      print_start(fplog, cr, walltime_accounting, name);
  }
+
+//! Stop counting time for EM
  static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
                          gmx_wallcycle_t           wcycle)
  {
@@ -121,6 +147,7 @@ static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
      walltime_accounting_end(walltime_accounting);
  }
  
+//! Printing a log file and console header
  static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
  {
      fprintf(out, "\n");
@@ -129,6 +156,7 @@ static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
      fprintf(out, "   Number of steps    = %12d\n", nsteps);
  }
  
+//! Print warning message
  static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
  {
      char buffer[2048];
@@ -164,8 +192,7 @@ static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstra
      fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
  }
  
-
-
+//! Print message about convergence of the EM
  static void print_converged(FILE *fp, const char *alg, real ftol,
                              gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
                              real epot, real fmax, int nfmax, real fnorm)
@@ -189,7 +216,7 @@ static void print_converged(FILE *fp, const char *alg, real ftol,
                  alg, ftol, gmx_step_str(count, buf));
      }
  
-#ifdef GMX_DOUBLE
+#if GMX_DOUBLE
      fprintf(fp, "Potential Energy  = %21.14e\n", epot);
      fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
      fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
@@ -200,6 +227,7 @@ static void print_converged(FILE *fp, const char *alg, real ftol,
  #endif
  }
  
+//! Compute the norm and max of the force array in parallel
  static void get_f_norm_max(t_commrec *cr,
                             t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
                             real *fnorm, real *fmax, int *a_fmax)
@@ -226,7 +254,7 @@ static void get_f_norm_max(t_commrec *cr,
              {
                  if (!opts->nFreeze[gf][m])
                  {
-                    fam += sqr(f[i][m]);
+                    fam += gmx::square(f[i][m]);
                  }
              }
              fnorm2 += fam;
@@ -293,6 +321,7 @@ static void get_f_norm_max(t_commrec *cr,
      }
  }
  
+//! Compute the norm of the force
  static void get_state_f_norm_max(t_commrec *cr,
                                   t_grpopts *opts, t_mdatoms *mdatoms,
                                   em_state_t *ems)
@@ -300,11 +329,12 @@ static void get_state_f_norm_max(t_commrec *cr,
      get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
  }
  
+//! Initialize the energy minimization
  void init_em(FILE *fplog, const char *title,
               t_commrec *cr, t_inputrec *ir,
               t_state *state_global, gmx_mtop_t *top_global,
               em_state_t *ems, gmx_localtop_t **top,
-             rvec **f, rvec **f_global,
+             rvec **f,
               t_nrnb *nrnb, rvec mu_tot,
               t_forcerec *fr, gmx_enerdata_t **enerd,
               t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
@@ -345,18 +375,10 @@ void init_em(FILE *fplog, const char *title,
          dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
                              state_global, top_global, ir,
                              &ems->s, &ems->f, mdatoms, *top,
-                            fr, vsite, NULL, constr,
+                            fr, vsite, constr,
                              nrnb, NULL, FALSE);
          dd_store_state(cr->dd, &ems->s);
  
-        if (ir->nstfout)
-        {
-            snew(*f_global, top_global->natoms);
-        }
-        else
-        {
-            *f_global = NULL;
-        }
          *graph = NULL;
      }
      else
@@ -365,16 +387,19 @@ void init_em(FILE *fplog, const char *title,
  
          /* Just copy the state */
          ems->s = *state_global;
-        snew(ems->s.x, ems->s.nalloc);
-        snew(ems->f, ems->s.nalloc);
+        /* We need to allocate one element extra, since we might use
+         * (unaligned) 4-wide SIMD loads to access rvec entries.
+         */
+        snew(ems->s.x, ems->s.nalloc + 1);
+        snew(ems->f, ems->s.nalloc+1);
+        snew(ems->s.v, ems->s.nalloc+1);
          for (i = 0; i < state_global->natoms; i++)
          {
              copy_rvec(state_global->x[i], ems->s.x[i]);
          }
          copy_mat(state_global->box, ems->s.box);
  
-        *top      = gmx_mtop_generate_local_top(top_global, ir);
-        *f_global = *f;
+        *top      = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO);
  
          setup_bonded_threading(fr, &(*top)->idef);
  
@@ -447,6 +472,7 @@ void init_em(FILE *fplog, const char *title,
      calc_shifts(ems->s.box, fr->shift_vec);
  }
  
+//! Finalize the minimization
  static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
                        gmx_walltime_accounting_t walltime_accounting,
                        gmx_wallcycle_t wcycle)
@@ -462,6 +488,7 @@ static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
      em_time_end(walltime_accounting, wcycle);
  }
  
+//! Swap two different EM states during minimization
  static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
  {
      em_state_t tmp;
@@ -471,6 +498,7 @@ static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
      *ems2 = tmp;
  }
  
+//! Copy coordinate from an EM state to a "normal" state structure
  static void copy_em_coords(em_state_t *ems, t_state *state)
  {
      int i;
@@ -481,13 +509,14 @@ static void copy_em_coords(em_state_t *ems, t_state *state)
      }
  }
  
+//! Save the EM trajectory
  static void write_em_traj(FILE *fplog, t_commrec *cr,
                            gmx_mdoutf_t outf,
                            gmx_bool bX, gmx_bool bF, const char *confout,
                            gmx_mtop_t *top_global,
                            t_inputrec *ir, gmx_int64_t step,
                            em_state_t *state,
-                          t_state *state_global, rvec *f_global)
+                          t_state *state_global)
  {
      int      mdof_flags;
      gmx_bool bIMDout = FALSE;
@@ -502,7 +531,6 @@ static void write_em_traj(FILE *fplog, t_commrec *cr,
      if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr))
      {
          copy_em_coords(state, state_global);
-        f_global = state->f;
      }
  
      mdof_flags = 0;
@@ -523,7 +551,7 @@ static void write_em_traj(FILE *fplog, t_commrec *cr,
  
      mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
                                       top_global, step, (double)step,
-                                     &state->s, state_global, state->f, f_global);
+                                     &state->s, state_global, state->f);
  
      if (confout != NULL && MASTER(cr))
      {
@@ -540,7 +568,10 @@ static void write_em_traj(FILE *fplog, t_commrec *cr,
      }
  }
  
-static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
+//! \brief Do one minimization step
+//
+// \returns true when the step succeeded, false when a constraint error occurred
+static bool do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
                         gmx_bool bMolPBC,
                         em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
                         gmx_constr_t constr, gmx_localtop_t *top,
@@ -553,6 +584,8 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
      real     dvdl_constr;
      int      nthreads gmx_unused;
  
+    bool     validStep = true;
+
      s1 = &ems1->s;
      s2 = &ems2->s;
  
@@ -566,11 +599,14 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
      if (s2->nalloc != s1->nalloc)
      {
          s2->nalloc = s1->nalloc;
-        srenew(s2->x, s1->nalloc);
+        /* We need to allocate one element extra, since we might use
+         * (unaligned) 4-wide SIMD loads to access rvec entries.
+         */
+        srenew(s2->x, s1->nalloc + 1);
          srenew(ems2->f,  s1->nalloc);
          if (s2->flags & (1<<estCGP))
          {
-            srenew(s2->cg_p,  s1->nalloc);
+            srenew(s2->cg_p,  s1->nalloc + 1);
          }
      }
  
@@ -597,21 +633,25 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
  #pragma omp for schedule(static) nowait
          for (int i = start; i < end; i++)
          {
-            if (md->cFREEZE)
-            {
-                gf = md->cFREEZE[i];
-            }
-            for (int m = 0; m < DIM; m++)
+            try
              {
-                if (ir->opts.nFreeze[gf][m])
+                if (md->cFREEZE)
                  {
-                    x2[i][m] = x1[i][m];
+                    gf = md->cFREEZE[i];
                  }
-                else
+                for (int m = 0; m < DIM; m++)
                  {
-                    x2[i][m] = x1[i][m] + a*f[i][m];
+                    if (ir->opts.nFreeze[gf][m])
+                    {
+                        x2[i][m] = x1[i][m];
+                    }
+                    else
+                    {
+                        x2[i][m] = x1[i][m] + a*f[i][m];
+                    }
                  }
              }
+            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          }
  
          if (s2->flags & (1<<estCGP))
@@ -622,6 +662,7 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
  #pragma omp for schedule(static) nowait
              for (int i = start; i < end; i++)
              {
+                // Trivial OpenMP block that does not throw
                  copy_rvec(p1[i], p2[i]);
              }
          }
@@ -633,7 +674,14 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
              {
  #pragma omp barrier
                  s2->cg_gl_nalloc = s1->cg_gl_nalloc;
-                srenew(s2->cg_gl, s2->cg_gl_nalloc);
+                try
+                {
+                    /* We need to allocate one element extra, since we might use
+                     * (unaligned) 4-wide SIMD loads to access rvec entries.
+                     */
+                    srenew(s2->cg_gl, s2->cg_gl_nalloc + 1);
+                }
+                GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  #pragma omp barrier
              }
              s2->ncg_gl = s1->ncg_gl;
@@ -650,15 +698,26 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
      {
          wallcycle_start(wcycle, ewcCONSTR);
          dvdl_constr = 0;
-        constrain(NULL, TRUE, TRUE, constr, &top->idef,
-                  ir, cr, count, 0, 1.0, md,
-                  s1->x, s2->x, NULL, bMolPBC, s2->box,
-                  s2->lambda[efptBONDED], &dvdl_constr,
-                  NULL, NULL, nrnb, econqCoord);
+        validStep   =
+            constrain(NULL, TRUE, TRUE, constr, &top->idef,
+                      ir, cr, count, 0, 1.0, md,
+                      s1->x, s2->x, NULL, bMolPBC, s2->box,
+                      s2->lambda[efptBONDED], &dvdl_constr,
+                      NULL, NULL, nrnb, econqCoord);
          wallcycle_stop(wcycle, ewcCONSTR);
+
+        // We should move this check to the different minimizers
+        if (!validStep && ir->eI != eiSteep)
+        {
+            gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",
+                      EI(ir->eI), EI(eiSteep), EI(ir->eI));
+        }
      }
+
+    return validStep;
  }
  
+//! Prepare EM for using domain decomposition parallellization
  static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
                                     gmx_mtop_t *top_global, t_inputrec *ir,
                                     em_state_t *ems, gmx_localtop_t *top,
@@ -670,11 +729,12 @@ static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
      dd_partition_system(fplog, step, cr, FALSE, 1,
                          NULL, top_global, ir,
                          &ems->s, &ems->f,
-                        mdatoms, top, fr, vsite, NULL, constr,
+                        mdatoms, top, fr, vsite, constr,
                          nrnb, wcycle, FALSE);
      dd_store_state(cr->dd, &ems->s);
  }
  
+//! De one energy evaluation
  static void evaluate_energy(FILE *fplog, t_commrec *cr,
                              gmx_mtop_t *top_global,
                              em_state_t *ems, gmx_localtop_t *top,
@@ -738,7 +798,7 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr,
               ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
               GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
               GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
-             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
+             (bNS ? GMX_FORCE_NS : 0));
  
      /* Clear the unused shake virial and pressure */
      clear_mat(shake_vir);
@@ -749,9 +809,9 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr,
      {
          wallcycle_start(wcycle, ewcMoveE);
  
-        global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
+        global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
                      inputrec, NULL, NULL, NULL, 1, &terminate,
-                    top_global, &ems->s, FALSE,
+                    NULL, FALSE,
                      CGLO_ENERGY |
                      CGLO_PRESSURE |
                      CGLO_CONSTRAINT);
@@ -760,7 +820,7 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr,
      }
  
      /* Calculate long range corrections to pressure and energy */
-    calc_dispcorr(inputrec, fr, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
+    calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW],
                    pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
      enerd->term[F_DISPCORR] = enercorr;
      enerd->term[F_EPOT]    += enercorr;
@@ -800,8 +860,9 @@ static void evaluate_energy(FILE *fplog, t_commrec *cr,
      }
  }
  
+//! Parallel utility summing energies and forces
  static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
-                              gmx_mtop_t *mtop,
+                              gmx_mtop_t *top_global,
                                em_state_t *s_min, em_state_t *s_b)
  {
      rvec          *fm, *fb, *fmg;
@@ -825,7 +886,7 @@ static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms
       * This conflicts with the spirit of domain decomposition,
       * but to fully optimize this a much more complicated algorithm is required.
       */
-    snew(fmg, mtop->natoms);
+    snew(fmg, top_global->natoms);
  
      ncg   = s_min->s.ncg_gl;
      cg_gl = s_min->s.cg_gl;
@@ -841,7 +902,7 @@ static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms
              i++;
          }
      }
-    gmx_sum(mtop->natoms*3, fmg[0], cr);
+    gmx_sum(top_global->natoms*3, fmg[0], cr);
  
      /* Now we will determine the part of the sum for the cgs in state s_b */
      ncg         = s_b->s.ncg_gl;
@@ -849,7 +910,7 @@ static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms
      partsum     = 0;
      i           = 0;
      gf          = 0;
-    grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
+    grpnrFREEZE = top_global->groups.grpnr[egcFREEZE];
      for (c = 0; c < ncg; c++)
      {
          cg = cg_gl[c];
@@ -877,8 +938,9 @@ static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms
      return partsum;
  }
  
+//! Print some stuff, like beta, whatever that means.
  static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
-                    gmx_mtop_t *mtop,
+                    gmx_mtop_t *top_global,
                      em_state_t *s_min, em_state_t *s_b)
  {
      rvec  *fm, *fb;
@@ -919,19 +981,43 @@ static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
      else
      {
          /* We need to reorder cgs while summing */
-        sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
+        sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b);
      }
      if (PAR(cr))
      {
          gmx_sumd(1, &sum, cr);
      }
  
-    return sum/sqr(s_min->fnorm);
+    return sum/gmx::square(s_min->fnorm);
  }
  
+namespace gmx
+{
+
+/*! \brief Do conjugate gradients minimization
+    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
+                           int nfile, const t_filenm fnm[],
+                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
+                           int nstglobalcomm,
+                           gmx_vsite_t *vsite, gmx_constr_t constr,
+                           int stepout,
+                           t_inputrec *inputrec,
+                           gmx_mtop_t *top_global, t_fcdata *fcd,
+                           t_state *state_global,
+                           t_mdatoms *mdatoms,
+                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+                           gmx_edsam_t ed,
+                           t_forcerec *fr,
+                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+                           gmx_membed_t gmx_unused *membed,
+                           real cpt_period, real max_hours,
+                           int imdport,
+                           unsigned long Flags,
+                           gmx_walltime_accounting_t walltime_accounting)
+ */
  double do_cg(FILE *fplog, t_commrec *cr,
               int nfile, const t_filenm fnm[],
-             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
+             const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
               int gmx_unused nstglobalcomm,
               gmx_vsite_t *vsite, gmx_constr_t constr,
               int gmx_unused stepout,
@@ -943,7 +1029,7 @@ double do_cg(FILE *fplog, t_commrec *cr,
               gmx_edsam_t gmx_unused ed,
               t_forcerec *fr,
               int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-             gmx_membed_t gmx_unused membed,
+             gmx_membed_t gmx_unused *membed,
               real gmx_unused cpt_period, real gmx_unused max_hours,
               int imdport,
               unsigned long gmx_unused Flags,
@@ -957,7 +1043,7 @@ double do_cg(FILE *fplog, t_commrec *cr,
      rvec             *f;
      gmx_global_stat_t gstat;
      t_graph          *graph;
-    rvec             *f_global, *p, *sf;
+    rvec             *p, *sf;
      double            gpa, gpb, gpc, tmp, minstep;
      real              fnormn;
      real              stepsize;
@@ -982,7 +1068,7 @@ double do_cg(FILE *fplog, t_commrec *cr,
  
      /* Init em and store the local state in s_min */
      init_em(fplog, CG, cr, inputrec,
-            state_global, top_global, s_min, &top, &f, &f_global,
+            state_global, top_global, s_min, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
  
@@ -1019,9 +1105,9 @@ double do_cg(FILE *fplog, t_commrec *cr,
                     mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
                     NULL, NULL, vir, pres, NULL, mu_tot, constr);
  
-        print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
+        print_ebin_header(fplog, step, step);
          print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
      where();
  
@@ -1048,7 +1134,7 @@ double do_cg(FILE *fplog, t_commrec *cr,
       * we either converge or reach the max number of steps.
       */
      converged = FALSE;
-    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
+    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
      {
  
          /* start taking steps in a new direction
@@ -1147,7 +1233,7 @@ double do_cg(FILE *fplog, t_commrec *cr,
  
          write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
                        top_global, inputrec, step,
-                      s_min, state_global, f_global);
+                      s_min, state_global);
  
          /* Take a step downhill.
           * In theory, we should minimize the function along this direction.
@@ -1442,6 +1528,7 @@ double do_cg(FILE *fplog, t_commrec *cr,
                  fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
                          step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
                          s_min->fmax, s_min->a_fmax+1);
+                fflush(stderr);
              }
              /* Store the new (lower) energies */
              upd_mdebin(mdebin, FALSE, FALSE, (double)step,
@@ -1456,11 +1543,11 @@ double do_cg(FILE *fplog, t_commrec *cr,
  
              if (do_log)
              {
-                print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
+                print_ebin_header(fplog, step, step);
              }
              print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
                         do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
  
          /* Send energies and positions to the IMD client if bIMD is TRUE. */
@@ -1474,7 +1561,7 @@ double do_cg(FILE *fplog, t_commrec *cr,
           */
          converged = converged || (s_min->fmax < inputrec->em_tol);
  
-    } /* End of the loop */
+    }   /* End of the loop */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
@@ -1502,14 +1589,14 @@ double do_cg(FILE *fplog, t_commrec *cr,
          if (!do_log)
          {
              /* Write final value to log since we didn't do anything the last step */
-            print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
+            print_ebin_header(fplog, step, step);
          }
          if (!do_ene || !do_log)
          {
              /* Write final energy file entries */
              print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
                         !do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
      }
  
@@ -1531,7 +1618,7 @@ double do_cg(FILE *fplog, t_commrec *cr,
  
      write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, step,
-                  s_min, state_global, f_global);
+                  s_min, state_global);
  
  
      if (MASTER(cr))
@@ -1552,24 +1639,44 @@ double do_cg(FILE *fplog, t_commrec *cr,
      walltime_accounting_set_nsteps_done(walltime_accounting, step);
  
      return 0;
-} /* That's all folks */
-
-
+}   /* That's all folks */
+
+
+/*! \brief Do L-BFGS conjugate gradients minimization
+    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
+                           int nfile, const t_filenm fnm[],
+                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
+                           int nstglobalcomm,
+                           gmx_vsite_t *vsite, gmx_constr_t constr,
+                           int stepout,
+                           t_inputrec *inputrec,
+                           gmx_mtop_t *top_global, t_fcdata *fcd,
+                           t_state *state_global,
+                           t_mdatoms *mdatoms,
+                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+                           gmx_edsam_t ed,
+                           t_forcerec *fr,
+                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+                           real cpt_period, real max_hours,
+                           int imdport,
+                           unsigned long Flags,
+                           gmx_walltime_accounting_t walltime_accounting)
+ */
  double do_lbfgs(FILE *fplog, t_commrec *cr,
                  int nfile, const t_filenm fnm[],
-                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
+                const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
                  int gmx_unused nstglobalcomm,
                  gmx_vsite_t *vsite, gmx_constr_t constr,
                  int gmx_unused stepout,
                  t_inputrec *inputrec,
                  gmx_mtop_t *top_global, t_fcdata *fcd,
-                t_state *state,
+                t_state *state_global,
                  t_mdatoms *mdatoms,
                  t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                  gmx_edsam_t gmx_unused ed,
                  t_forcerec *fr,
                  int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-                gmx_membed_t gmx_unused membed,
+                gmx_membed_t gmx_unused *membed,
                  real gmx_unused cpt_period, real gmx_unused max_hours,
                  int imdport,
                  unsigned long gmx_unused Flags,
@@ -1582,7 +1689,6 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
      rvec              *f;
      gmx_global_stat_t  gstat;
      t_graph           *graph;
-    rvec              *f_global;
      int                ncorr, nmaxcorr, point, cp, neval, nminstep;
      double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
      real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
@@ -1611,7 +1717,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
          gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
      }
  
-    n        = 3*state->natoms;
+    n        = 3*state_global->natoms;
      nmaxcorr = inputrec->nbfgscorr;
  
      /* Allocate memory */
@@ -1651,7 +1757,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
  
      /* Init em */
      init_em(fplog, LBFGS, cr, inputrec,
-            state, top_global, &ems, &top, &f, &f_global,
+            state_global, top_global, &ems, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
      /* Do_lbfgs is not completely updated like do_steep and do_cg,
@@ -1660,7 +1766,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
      sfree(ems.s.x);
      sfree(ems.f);
  
-    xx = (real *)state->x;
+    xx = (real *)state_global->x;
      ff = (real *)f;
  
      start = 0;
@@ -1698,9 +1804,9 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
  
      if (vsite)
      {
-        construct_vsites(vsite, state->x, 1, NULL,
+        construct_vsites(vsite, state_global->x, 1, NULL,
                           top->idef.iparams, top->idef.il,
-                         fr->ePBC, fr->bMolPBC, cr, state->box);
+                         fr->ePBC, fr->bMolPBC, cr, state_global->box);
      }
  
      /* Call the force routine and some auxiliary (neighboursearching etc.) */
@@ -1708,7 +1814,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
       * We do not unshift, so molecules are always whole
       */
      neval++;
-    ems.s.x = state->x;
+    ems.s.x = state_global->x;
      ems.f   = f;
      evaluate_energy(fplog, cr,
                      top_global, &ems, top,
@@ -1721,12 +1827,12 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
      {
          /* Copy stuff to the energy bin for easy printing etc. */
          upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                   mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
+                   mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
                     NULL, NULL, vir, pres, NULL, mu_tot, constr);
  
-        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
+        print_ebin_header(fplog, step, step);
          print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
      where();
  
@@ -1745,7 +1851,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
  
      if (MASTER(cr))
      {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
+        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
          fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
          fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
          fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
@@ -1788,7 +1894,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
  
      /* Set the gradient from the force */
      converged = FALSE;
-    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
+    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
      {
  
          /* Write coordinates if necessary */
@@ -1812,7 +1918,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
          }
  
          mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
-                                         top_global, step, (real)step, state, state, f, f);
+                                         top_global, step, (real)step, state_global, state_global, f);
  
          /* Do the linesearching in the direction dx[point][0..(n-1)] */
  
@@ -2258,27 +2364,28 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
          {
              if (bVerbose)
              {
-                double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
+                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
                  fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
                          step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1);
+                fflush(stderr);
              }
              /* Store the new (lower) energies */
              upd_mdebin(mdebin, FALSE, FALSE, (double)step,
-                       mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
+                       mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
                         NULL, NULL, vir, pres, NULL, mu_tot, constr);
              do_log = do_per_step(step, inputrec->nstlog);
              do_ene = do_per_step(step, inputrec->nstenergy);
              if (do_log)
              {
-                print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
+                print_ebin_header(fplog, step, step);
              }
              print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
                         do_log ? fplog : NULL, step, step, eprNORMAL,
-                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
  
          /* Send x and E to IMD client, if bIMD is TRUE. */
-        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr))
+        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
          {
              IMD_send_positions(inputrec->imd);
          }
@@ -2291,7 +2398,7 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
           */
          converged = converged || (fmax < inputrec->em_tol);
  
-    } /* End of the loop */
+    }   /* End of the loop */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
@@ -2316,13 +2423,13 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
       */
      if (!do_log) /* Write final value to log since we didn't do anythin last step */
      {
-        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
+        print_ebin_header(fplog, step, step);
      }
      if (!do_ene || !do_log) /* Write final energy file entries */
      {
          print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
                     !do_log ? fplog : NULL, step, step, eprNORMAL,
-                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
+                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
  
      /* Print some stuff... */
@@ -2342,11 +2449,11 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
      do_f = !do_per_step(step, inputrec->nstfout);
      write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, step,
-                  &ems, state, f);
+                  &ems, state_global);
  
      if (MASTER(cr))
      {
-        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
+        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
          print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
                          number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
          print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
@@ -2361,12 +2468,31 @@ double do_lbfgs(FILE *fplog, t_commrec *cr,
      walltime_accounting_set_nsteps_done(walltime_accounting, step);
  
      return 0;
-} /* That's all folks */
-
-
+}   /* That's all folks */
+
+/*! \brief Do steepest descents minimization
+    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
+                           int nfile, const t_filenm fnm[],
+                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
+                           int nstglobalcomm,
+                           gmx_vsite_t *vsite, gmx_constr_t constr,
+                           int stepout,
+                           t_inputrec *inputrec,
+                           gmx_mtop_t *top_global, t_fcdata *fcd,
+                           t_state *state_global,
+                           t_mdatoms *mdatoms,
+                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+                           gmx_edsam_t ed,
+                           t_forcerec *fr,
+                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+                           real cpt_period, real max_hours,
+                           int imdport,
+                           unsigned long Flags,
+                           gmx_walltime_accounting_t walltime_accounting)
+ */
  double do_steep(FILE *fplog, t_commrec *cr,
                  int nfile, const t_filenm fnm[],
-                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
+                const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
                  int gmx_unused nstglobalcomm,
                  gmx_vsite_t *vsite, gmx_constr_t constr,
                  int gmx_unused stepout,
@@ -2378,7 +2504,7 @@ double do_steep(FILE *fplog, t_commrec *cr,
                  gmx_edsam_t gmx_unused  ed,
                  t_forcerec *fr,
                  int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-                gmx_membed_t gmx_unused membed,
+                gmx_membed_t gmx_unused *membed,
                  real gmx_unused cpt_period, real gmx_unused max_hours,
                  int imdport,
                  unsigned long gmx_unused Flags,
@@ -2386,7 +2512,6 @@ double do_steep(FILE *fplog, t_commrec *cr,
  {
      const char       *SD = "Steepest Descents";
      em_state_t       *s_min, *s_try;
-    rvec             *f_global;
      gmx_localtop_t   *top;
      gmx_enerdata_t   *enerd;
      rvec             *f;
@@ -2408,7 +2533,7 @@ double do_steep(FILE *fplog, t_commrec *cr,
  
      /* Init em and store the local state in s_try */
      init_em(fplog, SD, cr, inputrec,
-            state_global, top_global, s_try, &top, &f, &f_global,
+            state_global, top_global, s_try, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
  
@@ -2448,22 +2573,32 @@ double do_steep(FILE *fplog, t_commrec *cr,
          bAbort = (nsteps >= 0) && (count == nsteps);
  
          /* set new coordinates, except for first step */
+        bool validStep = true;
          if (count > 0)
          {
-            do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
-                       s_min, stepsize, s_min->f, s_try,
-                       constr, top, nrnb, wcycle, count);
+            validStep =
+                do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
+                           s_min, stepsize, s_min->f, s_try,
+                           constr, top, nrnb, wcycle, count);
          }
  
-        evaluate_energy(fplog, cr,
-                        top_global, s_try, top,
-                        inputrec, nrnb, wcycle, gstat,
-                        vsite, constr, fcd, graph, mdatoms, fr,
-                        mu_tot, enerd, vir, pres, count, count == 0);
+        if (validStep)
+        {
+            evaluate_energy(fplog, cr,
+                            top_global, s_try, top,
+                            inputrec, nrnb, wcycle, gstat,
+                            vsite, constr, fcd, graph, mdatoms, fr,
+                            mu_tot, enerd, vir, pres, count, count == 0);
+        }
+        else
+        {
+            // Signal constraint error during stepping with energy=inf
+            s_try->epot = std::numeric_limits<real>::infinity();
+        }
  
          if (MASTER(cr))
          {
-            print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
+            print_ebin_header(fplog, count, count);
          }
  
          if (count == 0)
@@ -2479,6 +2614,7 @@ double do_steep(FILE *fplog, t_commrec *cr,
                  fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
                          count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
                          ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
+                fflush(stderr);
              }
  
              if ( (count == 0) || (s_try->epot < s_min->epot) )
@@ -2494,7 +2630,7 @@ double do_steep(FILE *fplog, t_commrec *cr,
                  print_ebin(mdoutf_get_fp_ene(outf), TRUE,
                             do_per_step(steps_accepted, inputrec->nstdisreout),
                             do_per_step(steps_accepted, inputrec->nstorireout),
-                           fplog, count, count, eprNORMAL, TRUE,
+                           fplog, count, count, eprNORMAL,
                             mdebin, fcd, &(top_global->groups), &(inputrec->opts));
                  fflush(fplog);
              }
@@ -2526,7 +2662,7 @@ double do_steep(FILE *fplog, t_commrec *cr,
              do_f = do_per_step(steps_accepted, inputrec->nstfout);
              write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
                            top_global, inputrec, count,
-                          s_min, state_global, f_global);
+                          s_min, state_global);
          }
          else
          {
@@ -2546,7 +2682,7 @@ double do_steep(FILE *fplog, t_commrec *cr,
          stepsize = ustep/s_min->fmax;
  
          /* Check if stepsize is too small, with 1 nm as a characteristic length */
-#ifdef GMX_DOUBLE
+#if GMX_DOUBLE
          if (count == nsteps || ustep < 1e-12)
  #else
          if (count == nsteps || ustep < 1e-6)
@@ -2567,7 +2703,7 @@ double do_steep(FILE *fplog, t_commrec *cr,
          }
  
          count++;
-    } /* End of the loop  */
+    }   /* End of the loop  */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
@@ -2579,7 +2715,7 @@ double do_steep(FILE *fplog, t_commrec *cr,
      }
      write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, count,
-                  s_min, state_global, f_global);
+                  s_min, state_global);
  
      if (MASTER(cr))
      {
@@ -2600,12 +2736,31 @@ double do_steep(FILE *fplog, t_commrec *cr,
      walltime_accounting_set_nsteps_done(walltime_accounting, count);
  
      return 0;
-} /* That's all folks */
-
-
+}   /* That's all folks */
+
+/*! \brief Do normal modes analysis
+    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
+                           int nfile, const t_filenm fnm[],
+                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
+                           int nstglobalcomm,
+                           gmx_vsite_t *vsite, gmx_constr_t constr,
+                           int stepout,
+                           t_inputrec *inputrec,
+                           gmx_mtop_t *top_global, t_fcdata *fcd,
+                           t_state *state_global,
+                           t_mdatoms *mdatoms,
+                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
+                           gmx_edsam_t ed,
+                           t_forcerec *fr,
+                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
+                           real cpt_period, real max_hours,
+                           int imdport,
+                           unsigned long Flags,
+                           gmx_walltime_accounting_t walltime_accounting)
+ */
  double do_nm(FILE *fplog, t_commrec *cr,
               int nfile, const t_filenm fnm[],
-             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused  bCompact,
+             const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
               int gmx_unused nstglobalcomm,
               gmx_vsite_t *vsite, gmx_constr_t constr,
               int gmx_unused stepout,
@@ -2617,7 +2772,7 @@ double do_nm(FILE *fplog, t_commrec *cr,
               gmx_edsam_t  gmx_unused ed,
               t_forcerec *fr,
               int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
-             gmx_membed_t gmx_unused membed,
+             gmx_membed_t gmx_unused *membed,
               real gmx_unused cpt_period, real gmx_unused max_hours,
               int imdport,
               unsigned long gmx_unused Flags,
@@ -2625,9 +2780,7 @@ double do_nm(FILE *fplog, t_commrec *cr,
  {
      const char          *NM = "Normal Mode Analysis";
      gmx_mdoutf_t         outf;
-    int                  natoms, atom, d;
      int                  nnodes, node;
-    rvec                *f_global;
      gmx_localtop_t      *top;
      gmx_enerdata_t      *enerd;
      rvec                *f;
@@ -2637,16 +2790,16 @@ double do_nm(FILE *fplog, t_commrec *cr,
      rvec                 mu_tot;
      rvec                *fneg, *dfdx;
      gmx_bool             bSparse; /* use sparse matrix storage format */
-    size_t               sz = 0;
+    size_t               sz;
      gmx_sparsematrix_t * sparse_matrix           = NULL;
      real           *     full_matrix             = NULL;
      em_state_t       *   state_work;
  
      /* added with respect to mdrun */
-    int        i, j, k, row, col;
-    real       der_range = 10.0*sqrt(GMX_REAL_EPS);
-    real       x_min;
-    bool       bIsMaster = MASTER(cr);
+    int                       row, col;
+    real                      der_range = 10.0*sqrt(GMX_REAL_EPS);
+    real                      x_min;
+    bool                      bIsMaster = MASTER(cr);
  
      if (constr != NULL)
      {
@@ -2658,15 +2811,25 @@ double do_nm(FILE *fplog, t_commrec *cr,
      /* Init em and store the local state in state_minimum */
      init_em(fplog, NM, cr, inputrec,
              state_global, top_global, state_work, &top,
-            &f, &f_global,
+            &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, NULL, imdport, Flags, wcycle);
  
-    natoms = top_global->natoms;
-    snew(fneg, natoms);
-    snew(dfdx, natoms);
+    gmx_shellfc_t *shellfc = init_shell_flexcon(stdout,
+                                                top_global,
+                                                n_flexible_constraints(constr),
+                                                inputrec->nstcalcenergy,
+                                                DOMAINDECOMP(cr));
  
-#ifndef GMX_DOUBLE
+    if (shellfc)
+    {
+        make_local_shells(cr, mdatoms, shellfc);
+    }
+    std::vector<size_t> atom_index = get_atom_index(top_global);
+    snew(fneg, atom_index.size());
+    snew(dfdx, atom_index.size());
+
+#if !GMX_DOUBLE
      if (bIsMaster)
      {
          fprintf(stderr,
@@ -2688,9 +2851,9 @@ double do_nm(FILE *fplog, t_commrec *cr,
          md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
          bSparse = FALSE;
      }
-    else if (top_global->natoms < 1000)
+    else if (atom_index.size() < 1000)
      {
-        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
+        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", atom_index.size());
          bSparse = FALSE;
      }
      else
@@ -2699,21 +2862,19 @@ double do_nm(FILE *fplog, t_commrec *cr,
          bSparse = TRUE;
      }
  
-    if (bIsMaster)
-    {
-        sz = DIM*top_global->natoms;
+    /* Number of dimensions, based on real atoms, that is not vsites or shell */
+    sz = DIM*atom_index.size();
  
-        fprintf(stderr, "Allocating Hessian memory...\n\n");
+    fprintf(stderr, "Allocating Hessian memory...\n\n");
  
-        if (bSparse)
-        {
-            sparse_matrix = gmx_sparsematrix_init(sz);
-            sparse_matrix->compressed_symmetric = TRUE;
-        }
-        else
-        {
-            snew(full_matrix, sz*sz);
-        }
+    if (bSparse)
+    {
+        sparse_matrix = gmx_sparsematrix_init(sz);
+        sparse_matrix->compressed_symmetric = TRUE;
+    }
+    else
+    {
+        snew(full_matrix, sz*sz);
      }
  
      init_nrnb(nrnb);
@@ -2724,7 +2885,7 @@ double do_nm(FILE *fplog, t_commrec *cr,
      print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
  
      /* fudge nr of steps to nr of atoms */
-    inputrec->nsteps = natoms*2;
+    inputrec->nsteps = atom_index.size()*2;
  
      if (bIsMaster)
      {
@@ -2766,70 +2927,95 @@ double do_nm(FILE *fplog, t_commrec *cr,
       ************************************************************/
  
      /* Steps are divided one by one over the nodes */
-    for (atom = cr->nodeid; atom < natoms; atom += nnodes)
+    bool bNS = true;
+    for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes)
      {
-
-        for (d = 0; d < DIM; d++)
+        size_t atom = atom_index[aid];
+        for (size_t d = 0; d < DIM; d++)
          {
-            x_min = state_work->s.x[atom][d];
+            gmx_bool    bBornRadii  = FALSE;
+            gmx_int64_t step        = 0;
+            int         force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
+            double      t           = 0;
  
-            state_work->s.x[atom][d] = x_min - der_range;
-
-            /* Make evaluate_energy do a single node force calculation */
-            cr->nnodes = 1;
-            evaluate_energy(fplog, cr,
-                            top_global, state_work, top,
-                            inputrec, nrnb, wcycle, gstat,
-                            vsite, constr, fcd, graph, mdatoms, fr,
-                            mu_tot, enerd, vir, pres, atom*2, FALSE);
+            x_min = state_work->s.x[atom][d];
  
-            for (i = 0; i < natoms; i++)
+            for (unsigned int dx = 0; (dx < 2); dx++)
              {
-                copy_rvec(state_work->f[i], fneg[i]);
-            }
+                if (dx == 0)
+                {
+                    state_work->s.x[atom][d] = x_min - der_range;
+                }
+                else
+                {
+                    state_work->s.x[atom][d] = x_min + der_range;
+                }
  
-            state_work->s.x[atom][d] = x_min + der_range;
+                /* Make evaluate_energy do a single node force calculation */
+                cr->nnodes = 1;
+                if (shellfc)
+                {
+                    /* Now is the time to relax the shells */
+                    (void) relax_shell_flexcon(fplog, cr, bVerbose, step,
+                                               inputrec, bNS, force_flags,
+                                               top,
+                                               constr, enerd, fcd,
+                                               &state_work->s, state_work->f, vir, mdatoms,
+                                               nrnb, wcycle, graph, &top_global->groups,
+                                               shellfc, fr, bBornRadii, t, mu_tot,
+                                               vsite, NULL);
+                    bNS = false;
+                    step++;
+                }
+                else
+                {
+                    evaluate_energy(fplog, cr,
+                                    top_global, state_work, top,
+                                    inputrec, nrnb, wcycle, gstat,
+                                    vsite, constr, fcd, graph, mdatoms, fr,
+                                    mu_tot, enerd, vir, pres, atom*2+dx, FALSE);
+                }
  
-            evaluate_energy(fplog, cr,
-                            top_global, state_work, top,
-                            inputrec, nrnb, wcycle, gstat,
-                            vsite, constr, fcd, graph, mdatoms, fr,
-                            mu_tot, enerd, vir, pres, atom*2+1, FALSE);
-            cr->nnodes = nnodes;
+                cr->nnodes = nnodes;
+
+                if (dx == 0)
+                {
+                    for (size_t i = 0; i < atom_index.size(); i++)
+                    {
+                        copy_rvec(state_work->f[atom_index[i]], fneg[i]);
+                    }
+                }
+            }
  
              /* x is restored to original */
              state_work->s.x[atom][d] = x_min;
  
-            for (j = 0; j < natoms; j++)
+            for (size_t j = 0; j < atom_index.size(); j++)
              {
-                for (k = 0; (k < DIM); k++)
+                for (size_t k = 0; (k < DIM); k++)
                  {
                      dfdx[j][k] =
-                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
+                        -(state_work->f[atom_index[j]][k] - fneg[j][k])/(2*der_range);
                  }
              }
  
              if (!bIsMaster)
              {
-#ifdef GMX_MPI
-#ifdef GMX_DOUBLE
-#define mpi_type MPI_DOUBLE
-#else
-#define mpi_type MPI_FLOAT
-#endif
-                MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
-                         cr->mpi_comm_mygroup);
+#if GMX_MPI
+#define mpi_type GMX_MPI_REAL
+                MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),
+                         cr->nodeid, cr->mpi_comm_mygroup);
  #endif
              }
              else
              {
-                for (node = 0; (node < nnodes && atom+node < natoms); node++)
+                for (node = 0; (node < nnodes && atom+node < atom_index.size()); node++)
                  {
                      if (node > 0)
                      {
-#ifdef GMX_MPI
+#if GMX_MPI
                          MPI_Status stat;
-                        MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
+                        MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,
                                   cr->mpi_comm_mygroup, &stat);
  #undef mpi_type
  #endif
@@ -2837,9 +3023,9 @@ double do_nm(FILE *fplog, t_commrec *cr,
  
                      row = (atom + node)*DIM + d;
  
-                    for (j = 0; j < natoms; j++)
+                    for (size_t j = 0; j < atom_index.size(); j++)
                      {
-                        for (k = 0; k < DIM; k++)
+                        for (size_t k = 0; k < DIM; k++)
                          {
                              col = j*DIM + k;
  
@@ -2869,7 +3055,8 @@ double do_nm(FILE *fplog, t_commrec *cr,
          if (bIsMaster && bVerbose)
          {
              fprintf(stderr, "\rFinished step %d out of %d",
-                    std::min(atom+nnodes, natoms), natoms);
+                    static_cast<int>(std::min(atom+nnodes, atom_index.size())),
+                    static_cast<int>(atom_index.size()));
              fflush(stderr);
          }
      }
@@ -2882,7 +3069,9 @@ double do_nm(FILE *fplog, t_commrec *cr,
  
      finish_em(cr, outf, walltime_accounting, wcycle);
  
-    walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2);
+    walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);
  
      return 0;
  }
+
+} // namespace gmx