Merge branch release-5-1 into release-2016
authorMark Abraham <mark.j.abraham@gmail.com>
Thu, 8 Sep 2016 10:40:51 +0000 (12:40 +0200)
committerMark Abraham <mark.j.abraham@gmail.com>
Thu, 8 Sep 2016 10:44:55 +0000 (12:44 +0200)
Trivial conflicts in minimize.cpp from bug fix in same region that now
has exceptions caught within an OpenMP region.

Change-Id: I8b3c269a2afd7d4967e0f7310c96af6022533ddd

1  2 
src/gromacs/mdlib/minimize.cpp

index 591e20a7dc88cbfa338f971b35b97ed014b2a718,a6ffb5c793f445ef6779c929ac1548b8888f1648..2d5290d4b9502a9d1f900089717a5b1578c490ed
   * To help us fund GROMACS development, we humbly ask that you cite
   * the research papers on the package. Check out http://www.gromacs.org.
   */
 +/*! \internal \file
 + *
 + * \brief This file defines integrators for energy minimization
 + *
 + * \author Berk Hess <hess@kth.se>
 + * \author Erik Lindahl <erik@kth.se>
 + * \ingroup module_mdlib
 + */
  #include "gmxpre.h"
  
 +#include "minimize.h"
 +
  #include "config.h"
  
 -#include <math.h>
 -#include <string.h>
 -#include <time.h>
 +#include <cmath>
 +#include <cstring>
 +#include <ctime>
  
  #include <algorithm>
 +#include <vector>
  
 +#include "gromacs/commandline/filenm.h"
  #include "gromacs/domdec/domdec.h"
 +#include "gromacs/domdec/domdec_struct.h"
  #include "gromacs/ewald/pme.h"
  #include "gromacs/fileio/confio.h"
  #include "gromacs/fileio/mtxio.h"
 -#include "gromacs/fileio/trajectory_writing.h"
 +#include "gromacs/gmxlib/md_logging.h"
 +#include "gromacs/gmxlib/network.h"
 +#include "gromacs/gmxlib/nrnb.h"
  #include "gromacs/imd/imd.h"
 -#include "gromacs/legacyheaders/constr.h"
 -#include "gromacs/legacyheaders/force.h"
 -#include "gromacs/legacyheaders/gmx_omp_nthreads.h"
 -#include "gromacs/legacyheaders/macros.h"
 -#include "gromacs/legacyheaders/md_logging.h"
 -#include "gromacs/legacyheaders/md_support.h"
 -#include "gromacs/legacyheaders/mdatoms.h"
 -#include "gromacs/legacyheaders/mdebin.h"
 -#include "gromacs/legacyheaders/mdrun.h"
 -#include "gromacs/legacyheaders/names.h"
 -#include "gromacs/legacyheaders/network.h"
 -#include "gromacs/legacyheaders/nrnb.h"
 -#include "gromacs/legacyheaders/ns.h"
 -#include "gromacs/legacyheaders/sim_util.h"
 -#include "gromacs/legacyheaders/tgroup.h"
 -#include "gromacs/legacyheaders/txtdump.h"
 -#include "gromacs/legacyheaders/typedefs.h"
 -#include "gromacs/legacyheaders/update.h"
 -#include "gromacs/legacyheaders/vsite.h"
 -#include "gromacs/legacyheaders/types/commrec.h"
  #include "gromacs/linearalgebra/sparsematrix.h"
  #include "gromacs/listed-forces/manage-threading.h"
 +#include "gromacs/math/functions.h"
  #include "gromacs/math/vec.h"
 +#include "gromacs/mdlib/constr.h"
 +#include "gromacs/mdlib/force.h"
 +#include "gromacs/mdlib/forcerec.h"
 +#include "gromacs/mdlib/gmx_omp_nthreads.h"
 +#include "gromacs/mdlib/md_support.h"
 +#include "gromacs/mdlib/mdatoms.h"
 +#include "gromacs/mdlib/mdebin.h"
 +#include "gromacs/mdlib/mdrun.h"
 +#include "gromacs/mdlib/ns.h"
 +#include "gromacs/mdlib/shellfc.h"
 +#include "gromacs/mdlib/sim_util.h"
 +#include "gromacs/mdlib/tgroup.h"
 +#include "gromacs/mdlib/trajectory_writing.h"
 +#include "gromacs/mdlib/update.h"
 +#include "gromacs/mdlib/vsite.h"
 +#include "gromacs/mdtypes/commrec.h"
 +#include "gromacs/mdtypes/inputrec.h"
 +#include "gromacs/mdtypes/md_enums.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/timing/wallcycle.h"
  #include "gromacs/timing/walltime_accounting.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/utility/cstringutil.h"
 +#include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/smalloc.h"
  
 +//! Utility structure for manipulating states during EM
  typedef struct {
 +    //! Copy of the global state
      t_state  s;
 +    //! Force array
      rvec    *f;
 +    //! Potential energy
      real     epot;
 +    //! Norm of the force
      real     fnorm;
 +    //! Maximum force
      real     fmax;
 +    //! Direction
      int      a_fmax;
  } em_state_t;
  
 +//! Initiate em_state_t structure and return pointer to it
  static em_state_t *init_em_state()
  {
      em_state_t *ems;
      return ems;
  }
  
 +//! Print the EM starting conditions
  static void print_em_start(FILE                     *fplog,
                             t_commrec                *cr,
                             gmx_walltime_accounting_t walltime_accounting,
      wallcycle_start(wcycle, ewcRUN);
      print_start(fplog, cr, walltime_accounting, name);
  }
 +
 +//! Stop counting time for EM
  static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
                          gmx_wallcycle_t           wcycle)
  {
      walltime_accounting_end(walltime_accounting);
  }
  
 +//! Printing a log file and console header
  static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
  {
      fprintf(out, "\n");
      fprintf(out, "   Number of steps    = %12d\n", nsteps);
  }
  
 +//! Print warning message
  static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
  {
      char buffer[2048];
      fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
  }
  
 -
 -
 +//! Print message about convergence of the EM
  static void print_converged(FILE *fp, const char *alg, real ftol,
                              gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
                              real epot, real fmax, int nfmax, real fnorm)
                  alg, ftol, gmx_step_str(count, buf));
      }
  
 -#ifdef GMX_DOUBLE
 +#if GMX_DOUBLE
      fprintf(fp, "Potential Energy  = %21.14e\n", epot);
      fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
      fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
  #endif
  }
  
 +//! Compute the norm and max of the force array in parallel
  static void get_f_norm_max(t_commrec *cr,
                             t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
                             real *fnorm, real *fmax, int *a_fmax)
              {
                  if (!opts->nFreeze[gf][m])
                  {
 -                    fam += sqr(f[i][m]);
 +                    fam += gmx::square(f[i][m]);
                  }
              }
              fnorm2 += fam;
      }
  }
  
 +//! Compute the norm of the force
  static void get_state_f_norm_max(t_commrec *cr,
                                   t_grpopts *opts, t_mdatoms *mdatoms,
                                   em_state_t *ems)
      get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
  }
  
 +//! Initialize the energy minimization
  void init_em(FILE *fplog, const char *title,
               t_commrec *cr, t_inputrec *ir,
               t_state *state_global, gmx_mtop_t *top_global,
               em_state_t *ems, gmx_localtop_t **top,
 -             rvec **f, rvec **f_global,
 +             rvec **f,
               t_nrnb *nrnb, rvec mu_tot,
               t_forcerec *fr, gmx_enerdata_t **enerd,
               t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
          dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
                              state_global, top_global, ir,
                              &ems->s, &ems->f, mdatoms, *top,
 -                            fr, vsite, NULL, constr,
 +                            fr, vsite, constr,
                              nrnb, NULL, FALSE);
          dd_store_state(cr->dd, &ems->s);
  
 -        if (ir->nstfout)
 -        {
 -            snew(*f_global, top_global->natoms);
 -        }
 -        else
 -        {
 -            *f_global = NULL;
 -        }
          *graph = NULL;
      }
      else
  
          /* Just copy the state */
          ems->s = *state_global;
 -        snew(ems->s.x, ems->s.nalloc);
 -        snew(ems->f, ems->s.nalloc);
 +        /* We need to allocate one element extra, since we might use
 +         * (unaligned) 4-wide SIMD loads to access rvec entries.
 +         */
 +        snew(ems->s.x, ems->s.nalloc + 1);
 +        snew(ems->f, ems->s.nalloc+1);
 +        snew(ems->s.v, ems->s.nalloc+1);
          for (i = 0; i < state_global->natoms; i++)
          {
              copy_rvec(state_global->x[i], ems->s.x[i]);
          }
          copy_mat(state_global->box, ems->s.box);
  
 -        *top      = gmx_mtop_generate_local_top(top_global, ir);
 -        *f_global = *f;
 +        *top      = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO);
  
          setup_bonded_threading(fr, &(*top)->idef);
  
      calc_shifts(ems->s.box, fr->shift_vec);
  }
  
 +//! Finalize the minimization
  static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
                        gmx_walltime_accounting_t walltime_accounting,
                        gmx_wallcycle_t wcycle)
      em_time_end(walltime_accounting, wcycle);
  }
  
 +//! Swap two different EM states during minimization
  static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
  {
      em_state_t tmp;
      *ems2 = tmp;
  }
  
 +//! Copy coordinate from an EM state to a "normal" state structure
  static void copy_em_coords(em_state_t *ems, t_state *state)
  {
      int i;
      }
  }
  
 +//! Save the EM trajectory
  static void write_em_traj(FILE *fplog, t_commrec *cr,
                            gmx_mdoutf_t outf,
                            gmx_bool bX, gmx_bool bF, const char *confout,
                            gmx_mtop_t *top_global,
                            t_inputrec *ir, gmx_int64_t step,
                            em_state_t *state,
 -                          t_state *state_global, rvec *f_global)
 +                          t_state *state_global)
  {
      int      mdof_flags;
      gmx_bool bIMDout = FALSE;
      if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr))
      {
          copy_em_coords(state, state_global);
 -        f_global = state->f;
      }
  
      mdof_flags = 0;
  
      mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
                                       top_global, step, (double)step,
 -                                     &state->s, state_global, state->f, f_global);
 +                                     &state->s, state_global, state->f);
  
      if (confout != NULL && MASTER(cr))
      {
      }
  }
  
 -static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
 +//! \brief Do one minimization step
 +//
 +// \returns true when the step succeeded, false when a constraint error occurred
 +static bool do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
                         gmx_bool bMolPBC,
                         em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
                         gmx_constr_t constr, gmx_localtop_t *top,
  
  {
      t_state *s1, *s2;
-     int      i;
      int      start, end;
-     rvec    *x1, *x2;
      real     dvdl_constr;
      int      nthreads gmx_unused;
  
 +    bool     validStep = true;
 +
      s1 = &ems1->s;
      s2 = &ems2->s;
  
      if (s2->nalloc != s1->nalloc)
      {
          s2->nalloc = s1->nalloc;
 -        srenew(s2->x, s1->nalloc);
 +        /* We need to allocate one element extra, since we might use
 +         * (unaligned) 4-wide SIMD loads to access rvec entries.
 +         */
 +        srenew(s2->x, s1->nalloc + 1);
          srenew(ems2->f,  s1->nalloc);
          if (s2->flags & (1<<estCGP))
          {
 -            srenew(s2->cg_p,  s1->nalloc);
 +            srenew(s2->cg_p,  s1->nalloc + 1);
          }
      }
  
      s2->natoms = s1->natoms;
      copy_mat(s1->box, s2->box);
      /* Copy free energy state */
-     for (i = 0; i < efptNR; i++)
+     for (int i = 0; i < efptNR; i++)
      {
          s2->lambda[i] = s1->lambda[i];
      }
      start = 0;
      end   = md->homenr;
  
-     x1 = s1->x;
-     x2 = s2->x;
      // cppcheck-suppress unreadVariable
      nthreads = gmx_omp_nthreads_get(emntUpdate);
  #pragma omp parallel num_threads(nthreads)
      {
-         int gf, i, m;
+         rvec *x1 = s1->x;
+         rvec *x2 = s2->x;
  
-         gf = 0;
+         int   gf = 0;
  #pragma omp for schedule(static) nowait
-         for (i = start; i < end; i++)
+         for (int i = start; i < end; i++)
          {
 -            if (md->cFREEZE)
 -            {
 -                gf = md->cFREEZE[i];
 -            }
 -            for (int m = 0; m < DIM; m++)
 +            try
              {
 -                if (ir->opts.nFreeze[gf][m])
 +                if (md->cFREEZE)
                  {
 -                    x2[i][m] = x1[i][m];
 +                    gf = md->cFREEZE[i];
                  }
-                 for (m = 0; m < DIM; m++)
 -                else
++                for (int m = 0; m < DIM; m++)
                  {
 -                    x2[i][m] = x1[i][m] + a*f[i][m];
 +                    if (ir->opts.nFreeze[gf][m])
 +                    {
 +                        x2[i][m] = x1[i][m];
 +                    }
 +                    else
 +                    {
 +                        x2[i][m] = x1[i][m] + a*f[i][m];
 +                    }
                  }
              }
 +            GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
          }
  
          if (s2->flags & (1<<estCGP))
          {
              /* Copy the CG p vector */
-             x1 = s1->cg_p;
-             x2 = s2->cg_p;
+             rvec *p1 = s1->cg_p;
+             rvec *p2 = s2->cg_p;
  #pragma omp for schedule(static) nowait
-             for (i = start; i < end; i++)
+             for (int i = start; i < end; i++)
              {
-                 copy_rvec(x1[i], x2[i]);
 +                // Trivial OpenMP block that does not throw
+                 copy_rvec(p1[i], p2[i]);
              }
          }
  
              {
  #pragma omp barrier
                  s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 -                srenew(s2->cg_gl, s2->cg_gl_nalloc);
 +                try
 +                {
 +                    /* We need to allocate one element extra, since we might use
 +                     * (unaligned) 4-wide SIMD loads to access rvec entries.
 +                     */
 +                    srenew(s2->cg_gl, s2->cg_gl_nalloc + 1);
 +                }
 +                GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
  #pragma omp barrier
              }
              s2->ncg_gl = s1->ncg_gl;
  #pragma omp for schedule(static) nowait
-             for (i = 0; i < s2->ncg_gl; i++)
+             for (int i = 0; i < s2->ncg_gl; i++)
              {
                  s2->cg_gl[i] = s1->cg_gl[i];
              }
      {
          wallcycle_start(wcycle, ewcCONSTR);
          dvdl_constr = 0;
 -        constrain(NULL, TRUE, TRUE, constr, &top->idef,
 -                  ir, cr, count, 0, 1.0, md,
 -                  s1->x, s2->x, NULL, bMolPBC, s2->box,
 -                  s2->lambda[efptBONDED], &dvdl_constr,
 -                  NULL, NULL, nrnb, econqCoord);
 +        validStep   =
 +            constrain(NULL, TRUE, TRUE, constr, &top->idef,
 +                      ir, cr, count, 0, 1.0, md,
 +                      s1->x, s2->x, NULL, bMolPBC, s2->box,
 +                      s2->lambda[efptBONDED], &dvdl_constr,
 +                      NULL, NULL, nrnb, econqCoord);
          wallcycle_stop(wcycle, ewcCONSTR);
 +
 +        // We should move this check to the different minimizers
 +        if (!validStep && ir->eI != eiSteep)
 +        {
 +            gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",
 +                      EI(ir->eI), EI(eiSteep), EI(ir->eI));
 +        }
      }
 +
 +    return validStep;
  }
  
 +//! Prepare EM for using domain decomposition parallellization
  static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
                                     gmx_mtop_t *top_global, t_inputrec *ir,
                                     em_state_t *ems, gmx_localtop_t *top,
      dd_partition_system(fplog, step, cr, FALSE, 1,
                          NULL, top_global, ir,
                          &ems->s, &ems->f,
 -                        mdatoms, top, fr, vsite, NULL, constr,
 +                        mdatoms, top, fr, vsite, constr,
                          nrnb, wcycle, FALSE);
      dd_store_state(cr->dd, &ems->s);
  }
  
 +//! De one energy evaluation
  static void evaluate_energy(FILE *fplog, t_commrec *cr,
                              gmx_mtop_t *top_global,
                              em_state_t *ems, gmx_localtop_t *top,
               ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
               GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
               GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
 -             (bNS ? GMX_FORCE_NS | GMX_FORCE_DO_LR : 0));
 +             (bNS ? GMX_FORCE_NS : 0));
  
      /* Clear the unused shake virial and pressure */
      clear_mat(shake_vir);
      {
          wallcycle_start(wcycle, ewcMoveE);
  
 -        global_stat(fplog, gstat, cr, enerd, force_vir, shake_vir, mu_tot,
 +        global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
                      inputrec, NULL, NULL, NULL, 1, &terminate,
 -                    top_global, &ems->s, FALSE,
 +                    NULL, FALSE,
                      CGLO_ENERGY |
                      CGLO_PRESSURE |
                      CGLO_CONSTRAINT);
      }
  
      /* Calculate long range corrections to pressure and energy */
 -    calc_dispcorr(inputrec, fr, top_global->natoms, ems->s.box, ems->s.lambda[efptVDW],
 +    calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW],
                    pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
      enerd->term[F_DISPCORR] = enercorr;
      enerd->term[F_EPOT]    += enercorr;
      }
  }
  
 +//! Parallel utility summing energies and forces
  static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 -                              gmx_mtop_t *mtop,
 +                              gmx_mtop_t *top_global,
                                em_state_t *s_min, em_state_t *s_b)
  {
      rvec          *fm, *fb, *fmg;
       * This conflicts with the spirit of domain decomposition,
       * but to fully optimize this a much more complicated algorithm is required.
       */
 -    snew(fmg, mtop->natoms);
 +    snew(fmg, top_global->natoms);
  
      ncg   = s_min->s.ncg_gl;
      cg_gl = s_min->s.cg_gl;
              i++;
          }
      }
 -    gmx_sum(mtop->natoms*3, fmg[0], cr);
 +    gmx_sum(top_global->natoms*3, fmg[0], cr);
  
      /* Now we will determine the part of the sum for the cgs in state s_b */
      ncg         = s_b->s.ncg_gl;
      partsum     = 0;
      i           = 0;
      gf          = 0;
 -    grpnrFREEZE = mtop->groups.grpnr[egcFREEZE];
 +    grpnrFREEZE = top_global->groups.grpnr[egcFREEZE];
      for (c = 0; c < ncg; c++)
      {
          cg = cg_gl[c];
      return partsum;
  }
  
 +//! Print some stuff, like beta, whatever that means.
  static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 -                    gmx_mtop_t *mtop,
 +                    gmx_mtop_t *top_global,
                      em_state_t *s_min, em_state_t *s_b)
  {
      rvec  *fm, *fb;
      else
      {
          /* We need to reorder cgs while summing */
 -        sum = reorder_partsum(cr, opts, mdatoms, mtop, s_min, s_b);
 +        sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b);
      }
      if (PAR(cr))
      {
          gmx_sumd(1, &sum, cr);
      }
  
 -    return sum/sqr(s_min->fnorm);
 +    return sum/gmx::square(s_min->fnorm);
  }
  
 +namespace gmx
 +{
 +
 +/*! \brief Do conjugate gradients minimization
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           gmx_membed_t gmx_unused *membed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
  double do_cg(FILE *fplog, t_commrec *cr,
               int nfile, const t_filenm fnm[],
 -             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
 +             const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
               int gmx_unused nstglobalcomm,
               gmx_vsite_t *vsite, gmx_constr_t constr,
               int gmx_unused stepout,
               gmx_edsam_t gmx_unused ed,
               t_forcerec *fr,
               int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 -             gmx_membed_t gmx_unused membed,
 +             gmx_membed_t gmx_unused *membed,
               real gmx_unused cpt_period, real gmx_unused max_hours,
               int imdport,
               unsigned long gmx_unused Flags,
      rvec             *f;
      gmx_global_stat_t gstat;
      t_graph          *graph;
 -    rvec             *f_global, *p, *sf;
 +    rvec             *p, *sf;
      double            gpa, gpb, gpc, tmp, minstep;
      real              fnormn;
      real              stepsize;
  
      /* Init em and store the local state in s_min */
      init_em(fplog, CG, cr, inputrec,
 -            state_global, top_global, s_min, &top, &f, &f_global,
 +            state_global, top_global, s_min, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
  
                     mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
                     NULL, NULL, vir, pres, NULL, mu_tot, constr);
  
 -        print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
 +        print_ebin_header(fplog, step, step);
          print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
 -                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
      where();
  
       * we either converge or reach the max number of steps.
       */
      converged = FALSE;
 -    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
 +    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
      {
  
          /* start taking steps in a new direction
  
          write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
                        top_global, inputrec, step,
 -                      s_min, state_global, f_global);
 +                      s_min, state_global);
  
          /* Take a step downhill.
           * In theory, we should minimize the function along this direction.
                  fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
                          step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
                          s_min->fmax, s_min->a_fmax+1);
 +                fflush(stderr);
              }
              /* Store the new (lower) energies */
              upd_mdebin(mdebin, FALSE, FALSE, (double)step,
  
              if (do_log)
              {
 -                print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
 +                print_ebin_header(fplog, step, step);
              }
              print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
                         do_log ? fplog : NULL, step, step, eprNORMAL,
 -                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
  
          /* Send energies and positions to the IMD client if bIMD is TRUE. */
           */
          converged = converged || (s_min->fmax < inputrec->em_tol);
  
 -    } /* End of the loop */
 +    }   /* End of the loop */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
          if (!do_log)
          {
              /* Write final value to log since we didn't do anything the last step */
 -            print_ebin_header(fplog, step, step, s_min->s.lambda[efptFEP]);
 +            print_ebin_header(fplog, step, step);
          }
          if (!do_ene || !do_log)
          {
              /* Write final energy file entries */
              print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
                         !do_log ? fplog : NULL, step, step, eprNORMAL,
 -                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
      }
  
  
      write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, step,
 -                  s_min, state_global, f_global);
 +                  s_min, state_global);
  
  
      if (MASTER(cr))
      walltime_accounting_set_nsteps_done(walltime_accounting, step);
  
      return 0;
 -} /* That's all folks */
 -
 -
 +}   /* That's all folks */
 +
 +
 +/*! \brief Do L-BFGS conjugate gradients minimization
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
  double do_lbfgs(FILE *fplog, t_commrec *cr,
                  int nfile, const t_filenm fnm[],
 -                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
 +                const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
                  int gmx_unused nstglobalcomm,
                  gmx_vsite_t *vsite, gmx_constr_t constr,
                  int gmx_unused stepout,
                  t_inputrec *inputrec,
                  gmx_mtop_t *top_global, t_fcdata *fcd,
 -                t_state *state,
 +                t_state *state_global,
                  t_mdatoms *mdatoms,
                  t_nrnb *nrnb, gmx_wallcycle_t wcycle,
                  gmx_edsam_t gmx_unused ed,
                  t_forcerec *fr,
                  int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 -                gmx_membed_t gmx_unused membed,
 +                gmx_membed_t gmx_unused *membed,
                  real gmx_unused cpt_period, real gmx_unused max_hours,
                  int imdport,
                  unsigned long gmx_unused Flags,
      rvec              *f;
      gmx_global_stat_t  gstat;
      t_graph           *graph;
 -    rvec              *f_global;
      int                ncorr, nmaxcorr, point, cp, neval, nminstep;
      double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
      real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
          gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
      }
  
 -    n        = 3*state->natoms;
 +    n        = 3*state_global->natoms;
      nmaxcorr = inputrec->nbfgscorr;
  
      /* Allocate memory */
  
      /* Init em */
      init_em(fplog, LBFGS, cr, inputrec,
 -            state, top_global, &ems, &top, &f, &f_global,
 +            state_global, top_global, &ems, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
      /* Do_lbfgs is not completely updated like do_steep and do_cg,
      sfree(ems.s.x);
      sfree(ems.f);
  
 -    xx = (real *)state->x;
 +    xx = (real *)state_global->x;
      ff = (real *)f;
  
      start = 0;
  
      if (vsite)
      {
 -        construct_vsites(vsite, state->x, 1, NULL,
 +        construct_vsites(vsite, state_global->x, 1, NULL,
                           top->idef.iparams, top->idef.il,
 -                         fr->ePBC, fr->bMolPBC, cr, state->box);
 +                         fr->ePBC, fr->bMolPBC, cr, state_global->box);
      }
  
      /* Call the force routine and some auxiliary (neighboursearching etc.) */
       * We do not unshift, so molecules are always whole
       */
      neval++;
 -    ems.s.x = state->x;
 +    ems.s.x = state_global->x;
      ems.f   = f;
      evaluate_energy(fplog, cr,
                      top_global, &ems, top,
      {
          /* Copy stuff to the energy bin for easy printing etc. */
          upd_mdebin(mdebin, FALSE, FALSE, (double)step,
 -                   mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
 +                   mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
                     NULL, NULL, vir, pres, NULL, mu_tot, constr);
  
 -        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
 +        print_ebin_header(fplog, step, step);
          print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
 -                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
      where();
  
  
      if (MASTER(cr))
      {
 -        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
 +        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
          fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
          fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
          fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
  
      /* Set the gradient from the force */
      converged = FALSE;
 -    for (step = 0; (number_steps < 0 || (number_steps >= 0 && step <= number_steps)) && !converged; step++)
 +    for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
      {
  
          /* Write coordinates if necessary */
          }
  
          mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
 -                                         top_global, step, (real)step, state, state, f, f);
 +                                         top_global, step, (real)step, state_global, state_global, f);
  
          /* Do the linesearching in the direction dx[point][0..(n-1)] */
  
          {
              if (bVerbose)
              {
 -                double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
 +                double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
                  fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
                          step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1);
 +                fflush(stderr);
              }
              /* Store the new (lower) energies */
              upd_mdebin(mdebin, FALSE, FALSE, (double)step,
 -                       mdatoms->tmass, enerd, state, inputrec->fepvals, inputrec->expandedvals, state->box,
 +                       mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
                         NULL, NULL, vir, pres, NULL, mu_tot, constr);
              do_log = do_per_step(step, inputrec->nstlog);
              do_ene = do_per_step(step, inputrec->nstenergy);
              if (do_log)
              {
 -                print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
 +                print_ebin_header(fplog, step, step);
              }
              print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
                         do_log ? fplog : NULL, step, step, eprNORMAL,
 -                       TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                       mdebin, fcd, &(top_global->groups), &(inputrec->opts));
          }
  
          /* Send x and E to IMD client, if bIMD is TRUE. */
 -        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state->box, state->x, inputrec, 0, wcycle) && MASTER(cr))
 +        if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
          {
              IMD_send_positions(inputrec->imd);
          }
           */
          converged = converged || (fmax < inputrec->em_tol);
  
 -    } /* End of the loop */
 +    }   /* End of the loop */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
       */
      if (!do_log) /* Write final value to log since we didn't do anythin last step */
      {
 -        print_ebin_header(fplog, step, step, state->lambda[efptFEP]);
 +        print_ebin_header(fplog, step, step);
      }
      if (!do_ene || !do_log) /* Write final energy file entries */
      {
          print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
                     !do_log ? fplog : NULL, step, step, eprNORMAL,
 -                   TRUE, mdebin, fcd, &(top_global->groups), &(inputrec->opts));
 +                   mdebin, fcd, &(top_global->groups), &(inputrec->opts));
      }
  
      /* Print some stuff... */
      do_f = !do_per_step(step, inputrec->nstfout);
      write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, step,
 -                  &ems, state, f);
 +                  &ems, state_global);
  
      if (MASTER(cr))
      {
 -        double sqrtNumAtoms = sqrt(static_cast<double>(state->natoms));
 +        double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
          print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
                          number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
          print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
      walltime_accounting_set_nsteps_done(walltime_accounting, step);
  
      return 0;
 -} /* That's all folks */
 -
 -
 +}   /* That's all folks */
 +
 +/*! \brief Do steepest descents minimization
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
  double do_steep(FILE *fplog, t_commrec *cr,
                  int nfile, const t_filenm fnm[],
 -                const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused bCompact,
 +                const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
                  int gmx_unused nstglobalcomm,
                  gmx_vsite_t *vsite, gmx_constr_t constr,
                  int gmx_unused stepout,
                  gmx_edsam_t gmx_unused  ed,
                  t_forcerec *fr,
                  int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 -                gmx_membed_t gmx_unused membed,
 +                gmx_membed_t gmx_unused *membed,
                  real gmx_unused cpt_period, real gmx_unused max_hours,
                  int imdport,
                  unsigned long gmx_unused Flags,
  {
      const char       *SD = "Steepest Descents";
      em_state_t       *s_min, *s_try;
 -    rvec             *f_global;
      gmx_localtop_t   *top;
      gmx_enerdata_t   *enerd;
      rvec             *f;
  
      /* Init em and store the local state in s_try */
      init_em(fplog, SD, cr, inputrec,
 -            state_global, top_global, s_try, &top, &f, &f_global,
 +            state_global, top_global, s_try, &top, &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
  
          bAbort = (nsteps >= 0) && (count == nsteps);
  
          /* set new coordinates, except for first step */
 +        bool validStep = true;
          if (count > 0)
          {
 -            do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
 -                       s_min, stepsize, s_min->f, s_try,
 -                       constr, top, nrnb, wcycle, count);
 +            validStep =
 +                do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
 +                           s_min, stepsize, s_min->f, s_try,
 +                           constr, top, nrnb, wcycle, count);
          }
  
 -        evaluate_energy(fplog, cr,
 -                        top_global, s_try, top,
 -                        inputrec, nrnb, wcycle, gstat,
 -                        vsite, constr, fcd, graph, mdatoms, fr,
 -                        mu_tot, enerd, vir, pres, count, count == 0);
 +        if (validStep)
 +        {
 +            evaluate_energy(fplog, cr,
 +                            top_global, s_try, top,
 +                            inputrec, nrnb, wcycle, gstat,
 +                            vsite, constr, fcd, graph, mdatoms, fr,
 +                            mu_tot, enerd, vir, pres, count, count == 0);
 +        }
 +        else
 +        {
 +            // Signal constraint error during stepping with energy=inf
 +            s_try->epot = std::numeric_limits<real>::infinity();
 +        }
  
          if (MASTER(cr))
          {
 -            print_ebin_header(fplog, count, count, s_try->s.lambda[efptFEP]);
 +            print_ebin_header(fplog, count, count);
          }
  
          if (count == 0)
                  fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
                          count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
                          ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
 +                fflush(stderr);
              }
  
              if ( (count == 0) || (s_try->epot < s_min->epot) )
                  print_ebin(mdoutf_get_fp_ene(outf), TRUE,
                             do_per_step(steps_accepted, inputrec->nstdisreout),
                             do_per_step(steps_accepted, inputrec->nstorireout),
 -                           fplog, count, count, eprNORMAL, TRUE,
 +                           fplog, count, count, eprNORMAL,
                             mdebin, fcd, &(top_global->groups), &(inputrec->opts));
                  fflush(fplog);
              }
              do_f = do_per_step(steps_accepted, inputrec->nstfout);
              write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
                            top_global, inputrec, count,
 -                          s_min, state_global, f_global);
 +                          s_min, state_global);
          }
          else
          {
          stepsize = ustep/s_min->fmax;
  
          /* Check if stepsize is too small, with 1 nm as a characteristic length */
 -#ifdef GMX_DOUBLE
 +#if GMX_DOUBLE
          if (count == nsteps || ustep < 1e-12)
  #else
          if (count == nsteps || ustep < 1e-6)
          }
  
          count++;
 -    } /* End of the loop  */
 +    }   /* End of the loop  */
  
      /* IMD cleanup, if bIMD is TRUE. */
      IMD_finalize(inputrec->bIMD, inputrec->imd);
      }
      write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
                    top_global, inputrec, count,
 -                  s_min, state_global, f_global);
 +                  s_min, state_global);
  
      if (MASTER(cr))
      {
      walltime_accounting_set_nsteps_done(walltime_accounting, count);
  
      return 0;
 -} /* That's all folks */
 -
 -
 +}   /* That's all folks */
 +
 +/*! \brief Do normal modes analysis
 +    \copydoc integrator_t (FILE *fplog, t_commrec *cr,
 +                           int nfile, const t_filenm fnm[],
 +                           const gmx_output_env_t *oenv, gmx_bool bVerbose,
 +                           int nstglobalcomm,
 +                           gmx_vsite_t *vsite, gmx_constr_t constr,
 +                           int stepout,
 +                           t_inputrec *inputrec,
 +                           gmx_mtop_t *top_global, t_fcdata *fcd,
 +                           t_state *state_global,
 +                           t_mdatoms *mdatoms,
 +                           t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 +                           gmx_edsam_t ed,
 +                           t_forcerec *fr,
 +                           int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
 +                           real cpt_period, real max_hours,
 +                           int imdport,
 +                           unsigned long Flags,
 +                           gmx_walltime_accounting_t walltime_accounting)
 + */
  double do_nm(FILE *fplog, t_commrec *cr,
               int nfile, const t_filenm fnm[],
 -             const output_env_t gmx_unused oenv, gmx_bool bVerbose, gmx_bool gmx_unused  bCompact,
 +             const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
               int gmx_unused nstglobalcomm,
               gmx_vsite_t *vsite, gmx_constr_t constr,
               int gmx_unused stepout,
               gmx_edsam_t  gmx_unused ed,
               t_forcerec *fr,
               int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
 -             gmx_membed_t gmx_unused membed,
 +             gmx_membed_t gmx_unused *membed,
               real gmx_unused cpt_period, real gmx_unused max_hours,
               int imdport,
               unsigned long gmx_unused Flags,
  {
      const char          *NM = "Normal Mode Analysis";
      gmx_mdoutf_t         outf;
 -    int                  natoms, atom, d;
      int                  nnodes, node;
 -    rvec                *f_global;
      gmx_localtop_t      *top;
      gmx_enerdata_t      *enerd;
      rvec                *f;
      rvec                 mu_tot;
      rvec                *fneg, *dfdx;
      gmx_bool             bSparse; /* use sparse matrix storage format */
 -    size_t               sz = 0;
 +    size_t               sz;
      gmx_sparsematrix_t * sparse_matrix           = NULL;
      real           *     full_matrix             = NULL;
      em_state_t       *   state_work;
  
      /* added with respect to mdrun */
 -    int        i, j, k, row, col;
 -    real       der_range = 10.0*sqrt(GMX_REAL_EPS);
 -    real       x_min;
 -    bool       bIsMaster = MASTER(cr);
 +    int                       row, col;
 +    real                      der_range = 10.0*sqrt(GMX_REAL_EPS);
 +    real                      x_min;
 +    bool                      bIsMaster = MASTER(cr);
  
      if (constr != NULL)
      {
      /* Init em and store the local state in state_minimum */
      init_em(fplog, NM, cr, inputrec,
              state_global, top_global, state_work, &top,
 -            &f, &f_global,
 +            &f,
              nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
              nfile, fnm, &outf, NULL, imdport, Flags, wcycle);
  
 -    natoms = top_global->natoms;
 -    snew(fneg, natoms);
 -    snew(dfdx, natoms);
 +    gmx_shellfc_t *shellfc = init_shell_flexcon(stdout,
 +                                                top_global,
 +                                                n_flexible_constraints(constr),
 +                                                inputrec->nstcalcenergy,
 +                                                DOMAINDECOMP(cr));
  
 -#ifndef GMX_DOUBLE
 +    if (shellfc)
 +    {
 +        make_local_shells(cr, mdatoms, shellfc);
 +    }
 +    std::vector<size_t> atom_index = get_atom_index(top_global);
 +    snew(fneg, atom_index.size());
 +    snew(dfdx, atom_index.size());
 +
 +#if !GMX_DOUBLE
      if (bIsMaster)
      {
          fprintf(stderr,
          md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
          bSparse = FALSE;
      }
 -    else if (top_global->natoms < 1000)
 +    else if (atom_index.size() < 1000)
      {
 -        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", top_global->natoms);
 +        md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", atom_index.size());
          bSparse = FALSE;
      }
      else
          bSparse = TRUE;
      }
  
 -    if (bIsMaster)
 -    {
 -        sz = DIM*top_global->natoms;
 +    /* Number of dimensions, based on real atoms, that is not vsites or shell */
 +    sz = DIM*atom_index.size();
  
 -        fprintf(stderr, "Allocating Hessian memory...\n\n");
 +    fprintf(stderr, "Allocating Hessian memory...\n\n");
  
 -        if (bSparse)
 -        {
 -            sparse_matrix = gmx_sparsematrix_init(sz);
 -            sparse_matrix->compressed_symmetric = TRUE;
 -        }
 -        else
 -        {
 -            snew(full_matrix, sz*sz);
 -        }
 +    if (bSparse)
 +    {
 +        sparse_matrix = gmx_sparsematrix_init(sz);
 +        sparse_matrix->compressed_symmetric = TRUE;
 +    }
 +    else
 +    {
 +        snew(full_matrix, sz*sz);
      }
  
      init_nrnb(nrnb);
      print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
  
      /* fudge nr of steps to nr of atoms */
 -    inputrec->nsteps = natoms*2;
 +    inputrec->nsteps = atom_index.size()*2;
  
      if (bIsMaster)
      {
       ************************************************************/
  
      /* Steps are divided one by one over the nodes */
 -    for (atom = cr->nodeid; atom < natoms; atom += nnodes)
 +    bool bNS = true;
 +    for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes)
      {
 -
 -        for (d = 0; d < DIM; d++)
 +        size_t atom = atom_index[aid];
 +        for (size_t d = 0; d < DIM; d++)
          {
 -            x_min = state_work->s.x[atom][d];
 +            gmx_bool    bBornRadii  = FALSE;
 +            gmx_int64_t step        = 0;
 +            int         force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
 +            double      t           = 0;
  
 -            state_work->s.x[atom][d] = x_min - der_range;
 -
 -            /* Make evaluate_energy do a single node force calculation */
 -            cr->nnodes = 1;
 -            evaluate_energy(fplog, cr,
 -                            top_global, state_work, top,
 -                            inputrec, nrnb, wcycle, gstat,
 -                            vsite, constr, fcd, graph, mdatoms, fr,
 -                            mu_tot, enerd, vir, pres, atom*2, FALSE);
 +            x_min = state_work->s.x[atom][d];
  
 -            for (i = 0; i < natoms; i++)
 +            for (unsigned int dx = 0; (dx < 2); dx++)
              {
 -                copy_rvec(state_work->f[i], fneg[i]);
 -            }
 +                if (dx == 0)
 +                {
 +                    state_work->s.x[atom][d] = x_min - der_range;
 +                }
 +                else
 +                {
 +                    state_work->s.x[atom][d] = x_min + der_range;
 +                }
  
 -            state_work->s.x[atom][d] = x_min + der_range;
 +                /* Make evaluate_energy do a single node force calculation */
 +                cr->nnodes = 1;
 +                if (shellfc)
 +                {
 +                    /* Now is the time to relax the shells */
 +                    (void) relax_shell_flexcon(fplog, cr, bVerbose, step,
 +                                               inputrec, bNS, force_flags,
 +                                               top,
 +                                               constr, enerd, fcd,
 +                                               &state_work->s, state_work->f, vir, mdatoms,
 +                                               nrnb, wcycle, graph, &top_global->groups,
 +                                               shellfc, fr, bBornRadii, t, mu_tot,
 +                                               vsite, NULL);
 +                    bNS = false;
 +                    step++;
 +                }
 +                else
 +                {
 +                    evaluate_energy(fplog, cr,
 +                                    top_global, state_work, top,
 +                                    inputrec, nrnb, wcycle, gstat,
 +                                    vsite, constr, fcd, graph, mdatoms, fr,
 +                                    mu_tot, enerd, vir, pres, atom*2+dx, FALSE);
 +                }
  
 -            evaluate_energy(fplog, cr,
 -                            top_global, state_work, top,
 -                            inputrec, nrnb, wcycle, gstat,
 -                            vsite, constr, fcd, graph, mdatoms, fr,
 -                            mu_tot, enerd, vir, pres, atom*2+1, FALSE);
 -            cr->nnodes = nnodes;
 +                cr->nnodes = nnodes;
 +
 +                if (dx == 0)
 +                {
 +                    for (size_t i = 0; i < atom_index.size(); i++)
 +                    {
 +                        copy_rvec(state_work->f[atom_index[i]], fneg[i]);
 +                    }
 +                }
 +            }
  
              /* x is restored to original */
              state_work->s.x[atom][d] = x_min;
  
 -            for (j = 0; j < natoms; j++)
 +            for (size_t j = 0; j < atom_index.size(); j++)
              {
 -                for (k = 0; (k < DIM); k++)
 +                for (size_t k = 0; (k < DIM); k++)
                  {
                      dfdx[j][k] =
 -                        -(state_work->f[j][k] - fneg[j][k])/(2*der_range);
 +                        -(state_work->f[atom_index[j]][k] - fneg[j][k])/(2*der_range);
                  }
              }
  
              if (!bIsMaster)
              {
 -#ifdef GMX_MPI
 -#ifdef GMX_DOUBLE
 -#define mpi_type MPI_DOUBLE
 -#else
 -#define mpi_type MPI_FLOAT
 -#endif
 -                MPI_Send(dfdx[0], natoms*DIM, mpi_type, MASTERNODE(cr), cr->nodeid,
 -                         cr->mpi_comm_mygroup);
 +#if GMX_MPI
 +#define mpi_type GMX_MPI_REAL
 +                MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),
 +                         cr->nodeid, cr->mpi_comm_mygroup);
  #endif
              }
              else
              {
 -                for (node = 0; (node < nnodes && atom+node < natoms); node++)
 +                for (node = 0; (node < nnodes && atom+node < atom_index.size()); node++)
                  {
                      if (node > 0)
                      {
 -#ifdef GMX_MPI
 +#if GMX_MPI
                          MPI_Status stat;
 -                        MPI_Recv(dfdx[0], natoms*DIM, mpi_type, node, node,
 +                        MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,
                                   cr->mpi_comm_mygroup, &stat);
  #undef mpi_type
  #endif
  
                      row = (atom + node)*DIM + d;
  
 -                    for (j = 0; j < natoms; j++)
 +                    for (size_t j = 0; j < atom_index.size(); j++)
                      {
 -                        for (k = 0; k < DIM; k++)
 +                        for (size_t k = 0; k < DIM; k++)
                          {
                              col = j*DIM + k;
  
          if (bIsMaster && bVerbose)
          {
              fprintf(stderr, "\rFinished step %d out of %d",
 -                    std::min(atom+nnodes, natoms), natoms);
 +                    static_cast<int>(std::min(atom+nnodes, atom_index.size())),
 +                    static_cast<int>(atom_index.size()));
              fflush(stderr);
          }
      }
  
      finish_em(cr, outf, walltime_accounting, wcycle);
  
 -    walltime_accounting_set_nsteps_done(walltime_accounting, natoms*2);
 +    walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);
  
      return 0;
  }
 +
 +} // namespace gmx