src/gromacs/mdlib/minimize.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /*! \internal \file
  38  *
  39  * \brief This file defines integrators for energy minimization
  40  *
  41  * \author Berk Hess <hess@kth.se>
  42  * \author Erik Lindahl <erik@kth.se>
  43  * \ingroup module_mdlib
  44  */
  45 #include "gmxpre.h"
  46
  47 #include "minimize.h"
  48
  49 #include "config.h"
  50
  51 #include <cmath>
  52 #include <cstring>
  53 #include <ctime>
  54
  55 #include <algorithm>
  56 #include <vector>
  57
  58 #include "gromacs/commandline/filenm.h"
  59 #include "gromacs/domdec/domdec.h"
  60 #include "gromacs/domdec/domdec_struct.h"
  61 #include "gromacs/ewald/pme.h"
  62 #include "gromacs/fileio/confio.h"
  63 #include "gromacs/fileio/mtxio.h"
  64 #include "gromacs/gmxlib/md_logging.h"
  65 #include "gromacs/gmxlib/network.h"
  66 #include "gromacs/gmxlib/nrnb.h"
  67 #include "gromacs/imd/imd.h"
  68 #include "gromacs/linearalgebra/sparsematrix.h"
  69 #include "gromacs/listed-forces/manage-threading.h"
  70 #include "gromacs/math/functions.h"
  71 #include "gromacs/math/vec.h"
  72 #include "gromacs/mdlib/constr.h"
  73 #include "gromacs/mdlib/force.h"
  74 #include "gromacs/mdlib/forcerec.h"
  75 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  76 #include "gromacs/mdlib/md_support.h"
  77 #include "gromacs/mdlib/mdatoms.h"
  78 #include "gromacs/mdlib/mdebin.h"
  79 #include "gromacs/mdlib/mdrun.h"
  80 #include "gromacs/mdlib/ns.h"
  81 #include "gromacs/mdlib/shellfc.h"
  82 #include "gromacs/mdlib/sim_util.h"
  83 #include "gromacs/mdlib/tgroup.h"
  84 #include "gromacs/mdlib/trajectory_writing.h"
  85 #include "gromacs/mdlib/update.h"
  86 #include "gromacs/mdlib/vsite.h"
  87 #include "gromacs/mdtypes/commrec.h"
  88 #include "gromacs/mdtypes/inputrec.h"
  89 #include "gromacs/mdtypes/md_enums.h"
  90 #include "gromacs/pbcutil/mshift.h"
  91 #include "gromacs/pbcutil/pbc.h"
  92 #include "gromacs/timing/wallcycle.h"
  93 #include "gromacs/timing/walltime_accounting.h"
  94 #include "gromacs/topology/mtop_util.h"
  95 #include "gromacs/utility/cstringutil.h"
  96 #include "gromacs/utility/exceptions.h"
  97 #include "gromacs/utility/fatalerror.h"
  98 #include "gromacs/utility/smalloc.h"
  99
 100 //! Utility structure for manipulating states during EM
 101 typedef struct {
 102     //! Copy of the global state
 103     t_state  s;
 104     //! Force array
 105     rvec    *f;
 106     //! Potential energy
 107     real     epot;
 108     //! Norm of the force
 109     real     fnorm;
 110     //! Maximum force
 111     real     fmax;
 112     //! Direction
 113     int      a_fmax;
 114 } em_state_t;
 115
 116 //! Initiate em_state_t structure and return pointer to it
 117 static em_state_t *init_em_state()
 118 {
 119     em_state_t *ems;
 120
 121     snew(ems, 1);
 122
 123     /* does this need to be here?  Should the array be declared differently (staticaly)in the state definition? */
 124     snew(ems->s.lambda, efptNR);
 125
 126     return ems;
 127 }
 128
 129 //! Print the EM starting conditions
 130 static void print_em_start(FILE                     *fplog,
 131                            t_commrec                *cr,
 132                            gmx_walltime_accounting_t walltime_accounting,
 133                            gmx_wallcycle_t           wcycle,
 134                            const char               *name)
 135 {
 136     walltime_accounting_start(walltime_accounting);
 137     wallcycle_start(wcycle, ewcRUN);
 138     print_start(fplog, cr, walltime_accounting, name);
 139 }
 140
 141 //! Stop counting time for EM
 142 static void em_time_end(gmx_walltime_accounting_t walltime_accounting,
 143                         gmx_wallcycle_t           wcycle)
 144 {
 145     wallcycle_stop(wcycle, ewcRUN);
 146
 147     walltime_accounting_end(walltime_accounting);
 148 }
 149
 150 //! Printing a log file and console header
 151 static void sp_header(FILE *out, const char *minimizer, real ftol, int nsteps)
 152 {
 153     fprintf(out, "\n");
 154     fprintf(out, "%s:\n", minimizer);
 155     fprintf(out, "   Tolerance (Fmax)   = %12.5e\n", ftol);
 156     fprintf(out, "   Number of steps    = %12d\n", nsteps);
 157 }
 158
 159 //! Print warning message
 160 static void warn_step(FILE *fp, real ftol, gmx_bool bLastStep, gmx_bool bConstrain)
 161 {
 162     char buffer[2048];
 163     if (bLastStep)
 164     {
 165         sprintf(buffer,
 166                 "\nEnergy minimization reached the maximum number "
 167                 "of steps before the forces reached the requested "
 168                 "precision Fmax < %g.\n", ftol);
 169     }
 170     else
 171     {
 172         sprintf(buffer,
 173                 "\nEnergy minimization has stopped, but the forces have "
 174                 "not converged to the requested precision Fmax < %g (which "
 175                 "may not be possible for your system). It stopped "
 176                 "because the algorithm tried to make a new step whose size "
 177                 "was too small, or there was no change in the energy since "
 178                 "last step. Either way, we regard the minimization as "
 179                 "converged to within the available machine precision, "
 180                 "given your starting configuration and EM parameters.\n%s%s",
 181                 ftol,
 182                 sizeof(real) < sizeof(double) ?
 183                 "\nDouble precision normally gives you higher accuracy, but "
 184                 "this is often not needed for preparing to run molecular "
 185                 "dynamics.\n" :
 186                 "",
 187                 bConstrain ?
 188                 "You might need to increase your constraint accuracy, or turn\n"
 189                 "off constraints altogether (set constraints = none in mdp file)\n" :
 190                 "");
 191     }
 192     fputs(wrap_lines(buffer, 78, 0, FALSE), fp);
 193 }
 194
 195 //! Print message about convergence of the EM
 196 static void print_converged(FILE *fp, const char *alg, real ftol,
 197                             gmx_int64_t count, gmx_bool bDone, gmx_int64_t nsteps,
 198                             real epot, real fmax, int nfmax, real fnorm)
 199 {
 200     char buf[STEPSTRSIZE];
 201
 202     if (bDone)
 203     {
 204         fprintf(fp, "\n%s converged to Fmax < %g in %s steps\n",
 205                 alg, ftol, gmx_step_str(count, buf));
 206     }
 207     else if (count < nsteps)
 208     {
 209         fprintf(fp, "\n%s converged to machine precision in %s steps,\n"
 210                 "but did not reach the requested Fmax < %g.\n",
 211                 alg, gmx_step_str(count, buf), ftol);
 212     }
 213     else
 214     {
 215         fprintf(fp, "\n%s did not converge to Fmax < %g in %s steps.\n",
 216                 alg, ftol, gmx_step_str(count, buf));
 217     }
 218
 219 #if GMX_DOUBLE
 220     fprintf(fp, "Potential Energy  = %21.14e\n", epot);
 221     fprintf(fp, "Maximum force     = %21.14e on atom %d\n", fmax, nfmax+1);
 222     fprintf(fp, "Norm of force     = %21.14e\n", fnorm);
 223 #else
 224     fprintf(fp, "Potential Energy  = %14.7e\n", epot);
 225     fprintf(fp, "Maximum force     = %14.7e on atom %d\n", fmax, nfmax+1);
 226     fprintf(fp, "Norm of force     = %14.7e\n", fnorm);
 227 #endif
 228 }
 229
 230 //! Compute the norm and max of the force array in parallel
 231 static void get_f_norm_max(t_commrec *cr,
 232                            t_grpopts *opts, t_mdatoms *mdatoms, rvec *f,
 233                            real *fnorm, real *fmax, int *a_fmax)
 234 {
 235     double fnorm2, *sum;
 236     real   fmax2, fam;
 237     int    la_max, a_max, start, end, i, m, gf;
 238
 239     /* This routine finds the largest force and returns it.
 240      * On parallel machines the global max is taken.
 241      */
 242     fnorm2 = 0;
 243     fmax2  = 0;
 244     la_max = -1;
 245     start  = 0;
 246     end    = mdatoms->homenr;
 247     if (mdatoms->cFREEZE)
 248     {
 249         for (i = start; i < end; i++)
 250         {
 251             gf  = mdatoms->cFREEZE[i];
 252             fam = 0;
 253             for (m = 0; m < DIM; m++)
 254             {
 255                 if (!opts->nFreeze[gf][m])
 256                 {
 257                     fam += gmx::square(f[i][m]);
 258                 }
 259             }
 260             fnorm2 += fam;
 261             if (fam > fmax2)
 262             {
 263                 fmax2  = fam;
 264                 la_max = i;
 265             }
 266         }
 267     }
 268     else
 269     {
 270         for (i = start; i < end; i++)
 271         {
 272             fam     = norm2(f[i]);
 273             fnorm2 += fam;
 274             if (fam > fmax2)
 275             {
 276                 fmax2  = fam;
 277                 la_max = i;
 278             }
 279         }
 280     }
 281
 282     if (la_max >= 0 && DOMAINDECOMP(cr))
 283     {
 284         a_max = cr->dd->gatindex[la_max];
 285     }
 286     else
 287     {
 288         a_max = la_max;
 289     }
 290     if (PAR(cr))
 291     {
 292         snew(sum, 2*cr->nnodes+1);
 293         sum[2*cr->nodeid]   = fmax2;
 294         sum[2*cr->nodeid+1] = a_max;
 295         sum[2*cr->nnodes]   = fnorm2;
 296         gmx_sumd(2*cr->nnodes+1, sum, cr);
 297         fnorm2 = sum[2*cr->nnodes];
 298         /* Determine the global maximum */
 299         for (i = 0; i < cr->nnodes; i++)
 300         {
 301             if (sum[2*i] > fmax2)
 302             {
 303                 fmax2 = sum[2*i];
 304                 a_max = (int)(sum[2*i+1] + 0.5);
 305             }
 306         }
 307         sfree(sum);
 308     }
 309
 310     if (fnorm)
 311     {
 312         *fnorm = sqrt(fnorm2);
 313     }
 314     if (fmax)
 315     {
 316         *fmax  = sqrt(fmax2);
 317     }
 318     if (a_fmax)
 319     {
 320         *a_fmax = a_max;
 321     }
 322 }
 323
 324 //! Compute the norm of the force
 325 static void get_state_f_norm_max(t_commrec *cr,
 326                                  t_grpopts *opts, t_mdatoms *mdatoms,
 327                                  em_state_t *ems)
 328 {
 329     get_f_norm_max(cr, opts, mdatoms, ems->f, &ems->fnorm, &ems->fmax, &ems->a_fmax);
 330 }
 331
 332 //! Initialize the energy minimization
 333 void init_em(FILE *fplog, const char *title,
 334              t_commrec *cr, t_inputrec *ir,
 335              t_state *state_global, gmx_mtop_t *top_global,
 336              em_state_t *ems, gmx_localtop_t **top,
 337              rvec **f,
 338              t_nrnb *nrnb, rvec mu_tot,
 339              t_forcerec *fr, gmx_enerdata_t **enerd,
 340              t_graph **graph, t_mdatoms *mdatoms, gmx_global_stat_t *gstat,
 341              gmx_vsite_t *vsite, gmx_constr_t constr,
 342              int nfile, const t_filenm fnm[],
 343              gmx_mdoutf_t *outf, t_mdebin **mdebin,
 344              int imdport, unsigned long gmx_unused Flags,
 345              gmx_wallcycle_t wcycle)
 346 {
 347     int  i;
 348     real dvdl_constr;
 349
 350     if (fplog)
 351     {
 352         fprintf(fplog, "Initiating %s\n", title);
 353     }
 354
 355     state_global->ngtc = 0;
 356
 357     /* Initialize lambda variables */
 358     initialize_lambdas(fplog, ir, &(state_global->fep_state), state_global->lambda, NULL);
 359
 360     init_nrnb(nrnb);
 361
 362     /* Interactive molecular dynamics */
 363     init_IMD(ir, cr, top_global, fplog, 1, state_global->x,
 364              nfile, fnm, NULL, imdport, Flags);
 365
 366     if (DOMAINDECOMP(cr))
 367     {
 368         *top = dd_init_local_top(top_global);
 369
 370         dd_init_local_state(cr->dd, state_global, &ems->s);
 371
 372         *f = NULL;
 373
 374         /* Distribute the charge groups over the nodes from the master node */
 375         dd_partition_system(fplog, ir->init_step, cr, TRUE, 1,
 376                             state_global, top_global, ir,
 377                             &ems->s, &ems->f, mdatoms, *top,
 378                             fr, vsite, constr,
 379                             nrnb, NULL, FALSE);
 380         dd_store_state(cr->dd, &ems->s);
 381
 382         *graph = NULL;
 383     }
 384     else
 385     {
 386         snew(*f, top_global->natoms);
 387
 388         /* Just copy the state */
 389         ems->s = *state_global;
 390         /* We need to allocate one element extra, since we might use
 391          * (unaligned) 4-wide SIMD loads to access rvec entries.
 392          */
 393         snew(ems->s.x, ems->s.nalloc + 1);
 394         snew(ems->f, ems->s.nalloc+1);
 395         snew(ems->s.v, ems->s.nalloc+1);
 396         for (i = 0; i < state_global->natoms; i++)
 397         {
 398             copy_rvec(state_global->x[i], ems->s.x[i]);
 399         }
 400         copy_mat(state_global->box, ems->s.box);
 401
 402         *top      = gmx_mtop_generate_local_top(top_global, ir->efep != efepNO);
 403
 404         forcerec_set_excl_load(fr, *top);
 405
 406         setup_bonded_threading(fr, &(*top)->idef);
 407
 408         if (ir->ePBC != epbcNONE && !fr->bMolPBC)
 409         {
 410             *graph = mk_graph(fplog, &((*top)->idef), 0, top_global->natoms, FALSE, FALSE);
 411         }
 412         else
 413         {
 414             *graph = NULL;
 415         }
 416
 417         atoms2md(top_global, ir, 0, NULL, top_global->natoms, mdatoms);
 418         update_mdatoms(mdatoms, state_global->lambda[efptFEP]);
 419
 420         if (vsite)
 421         {
 422             set_vsite_top(vsite, *top, mdatoms, cr);
 423         }
 424     }
 425
 426     if (constr)
 427     {
 428         if (ir->eConstrAlg == econtSHAKE &&
 429             gmx_mtop_ftype_count(top_global, F_CONSTR) > 0)
 430         {
 431             gmx_fatal(FARGS, "Can not do energy minimization with %s, use %s\n",
 432                       econstr_names[econtSHAKE], econstr_names[econtLINCS]);
 433         }
 434
 435         if (!DOMAINDECOMP(cr))
 436         {
 437             set_constraints(constr, *top, ir, mdatoms, cr);
 438         }
 439
 440         if (!ir->bContinuation)
 441         {
 442             /* Constrain the starting coordinates */
 443             dvdl_constr = 0;
 444             constrain(PAR(cr) ? NULL : fplog, TRUE, TRUE, constr, &(*top)->idef,
 445                       ir, cr, -1, 0, 1.0, mdatoms,
 446                       ems->s.x, ems->s.x, NULL, fr->bMolPBC, ems->s.box,
 447                       ems->s.lambda[efptFEP], &dvdl_constr,
 448                       NULL, NULL, nrnb, econqCoord);
 449         }
 450     }
 451
 452     if (PAR(cr))
 453     {
 454         *gstat = global_stat_init(ir);
 455     }
 456     else
 457     {
 458         *gstat = NULL;
 459     }
 460
 461     *outf = init_mdoutf(fplog, nfile, fnm, 0, cr, ir, top_global, NULL, wcycle);
 462
 463     snew(*enerd, 1);
 464     init_enerdata(top_global->groups.grps[egcENER].nr, ir->fepvals->n_lambda,
 465                   *enerd);
 466
 467     if (mdebin != NULL)
 468     {
 469         /* Init bin for energy stuff */
 470         *mdebin = init_mdebin(mdoutf_get_fp_ene(*outf), top_global, ir, NULL);
 471     }
 472
 473     clear_rvec(mu_tot);
 474     calc_shifts(ems->s.box, fr->shift_vec);
 475 }
 476
 477 //! Finalize the minimization
 478 static void finish_em(t_commrec *cr, gmx_mdoutf_t outf,
 479                       gmx_walltime_accounting_t walltime_accounting,
 480                       gmx_wallcycle_t wcycle)
 481 {
 482     if (!(cr->duty & DUTY_PME))
 483     {
 484         /* Tell the PME only node to finish */
 485         gmx_pme_send_finish(cr);
 486     }
 487
 488     done_mdoutf(outf);
 489
 490     em_time_end(walltime_accounting, wcycle);
 491 }
 492
 493 //! Swap two different EM states during minimization
 494 static void swap_em_state(em_state_t *ems1, em_state_t *ems2)
 495 {
 496     em_state_t tmp;
 497
 498     tmp   = *ems1;
 499     *ems1 = *ems2;
 500     *ems2 = tmp;
 501 }
 502
 503 //! Copy coordinate from an EM state to a "normal" state structure
 504 static void copy_em_coords(em_state_t *ems, t_state *state)
 505 {
 506     int i;
 507
 508     for (i = 0; (i < state->natoms); i++)
 509     {
 510         copy_rvec(ems->s.x[i], state->x[i]);
 511     }
 512 }
 513
 514 //! Save the EM trajectory
 515 static void write_em_traj(FILE *fplog, t_commrec *cr,
 516                           gmx_mdoutf_t outf,
 517                           gmx_bool bX, gmx_bool bF, const char *confout,
 518                           gmx_mtop_t *top_global,
 519                           t_inputrec *ir, gmx_int64_t step,
 520                           em_state_t *state,
 521                           t_state *state_global)
 522 {
 523     int      mdof_flags;
 524     gmx_bool bIMDout = FALSE;
 525
 526
 527     /* Shall we do IMD output? */
 528     if (ir->bIMD)
 529     {
 530         bIMDout = do_per_step(step, IMD_get_step(ir->imd->setup));
 531     }
 532
 533     if ((bX || bF || bIMDout || confout != NULL) && !DOMAINDECOMP(cr))
 534     {
 535         copy_em_coords(state, state_global);
 536     }
 537
 538     mdof_flags = 0;
 539     if (bX)
 540     {
 541         mdof_flags |= MDOF_X;
 542     }
 543     if (bF)
 544     {
 545         mdof_flags |= MDOF_F;
 546     }
 547
 548     /* If we want IMD output, set appropriate MDOF flag */
 549     if (ir->bIMD)
 550     {
 551         mdof_flags |= MDOF_IMD;
 552     }
 553
 554     mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
 555                                      top_global, step, (double)step,
 556                                      &state->s, state_global, state->f);
 557
 558     if (confout != NULL && MASTER(cr))
 559     {
 560         if (ir->ePBC != epbcNONE && !ir->bPeriodicMols && DOMAINDECOMP(cr))
 561         {
 562             /* Make molecules whole only for confout writing */
 563             do_pbc_mtop(fplog, ir->ePBC, state_global->box, top_global,
 564                         state_global->x);
 565         }
 566
 567         write_sto_conf_mtop(confout,
 568                             *top_global->name, top_global,
 569                             state_global->x, NULL, ir->ePBC, state_global->box);
 570     }
 571 }
 572
 573 //! \brief Do one minimization step
 574 //
 575 // \returns true when the step succeeded, false when a constraint error occurred
 576 static bool do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md,
 577                        gmx_bool bMolPBC,
 578                        em_state_t *ems1, real a, rvec *f, em_state_t *ems2,
 579                        gmx_constr_t constr, gmx_localtop_t *top,
 580                        t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 581                        gmx_int64_t count)
 582
 583 {
 584     t_state *s1, *s2;
 585     int      i;
 586     int      start, end;
 587     rvec    *x1, *x2;
 588     real     dvdl_constr;
 589     int      nthreads gmx_unused;
 590
 591     bool     validStep = true;
 592
 593     s1 = &ems1->s;
 594     s2 = &ems2->s;
 595
 596     if (DOMAINDECOMP(cr) && s1->ddp_count != cr->dd->ddp_count)
 597     {
 598         gmx_incons("state mismatch in do_em_step");
 599     }
 600
 601     s2->flags = s1->flags;
 602
 603     if (s2->nalloc != s1->nalloc)
 604     {
 605         s2->nalloc = s1->nalloc;
 606         /* We need to allocate one element extra, since we might use
 607          * (unaligned) 4-wide SIMD loads to access rvec entries.
 608          */
 609         srenew(s2->x, s1->nalloc + 1);
 610         srenew(ems2->f,  s1->nalloc);
 611         if (s2->flags & (1<<estCGP))
 612         {
 613             srenew(s2->cg_p,  s1->nalloc + 1);
 614         }
 615     }
 616
 617     s2->natoms = s1->natoms;
 618     copy_mat(s1->box, s2->box);
 619     /* Copy free energy state */
 620     for (i = 0; i < efptNR; i++)
 621     {
 622         s2->lambda[i] = s1->lambda[i];
 623     }
 624     copy_mat(s1->box, s2->box);
 625
 626     start = 0;
 627     end   = md->homenr;
 628
 629     x1 = s1->x;
 630     x2 = s2->x;
 631
 632     // cppcheck-suppress unreadVariable
 633     nthreads = gmx_omp_nthreads_get(emntUpdate);
 634 #pragma omp parallel num_threads(nthreads)
 635     {
 636         int gf, i, m;
 637
 638         gf = 0;
 639 #pragma omp for schedule(static) nowait
 640         for (i = start; i < end; i++)
 641         {
 642             try
 643             {
 644                 if (md->cFREEZE)
 645                 {
 646                     gf = md->cFREEZE[i];
 647                 }
 648                 for (m = 0; m < DIM; m++)
 649                 {
 650                     if (ir->opts.nFreeze[gf][m])
 651                     {
 652                         x2[i][m] = x1[i][m];
 653                     }
 654                     else
 655                     {
 656                         x2[i][m] = x1[i][m] + a*f[i][m];
 657                     }
 658                 }
 659             }
 660             GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 661         }
 662
 663         if (s2->flags & (1<<estCGP))
 664         {
 665             /* Copy the CG p vector */
 666             x1 = s1->cg_p;
 667             x2 = s2->cg_p;
 668 #pragma omp for schedule(static) nowait
 669             for (i = start; i < end; i++)
 670             {
 671                 // Trivial OpenMP block that does not throw
 672                 copy_rvec(x1[i], x2[i]);
 673             }
 674         }
 675
 676         if (DOMAINDECOMP(cr))
 677         {
 678             s2->ddp_count = s1->ddp_count;
 679             if (s2->cg_gl_nalloc < s1->cg_gl_nalloc)
 680             {
 681 #pragma omp barrier
 682                 s2->cg_gl_nalloc = s1->cg_gl_nalloc;
 683                 try
 684                 {
 685                     /* We need to allocate one element extra, since we might use
 686                      * (unaligned) 4-wide SIMD loads to access rvec entries.
 687                      */
 688                     srenew(s2->cg_gl, s2->cg_gl_nalloc + 1);
 689                 }
 690                 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 691 #pragma omp barrier
 692             }
 693             s2->ncg_gl = s1->ncg_gl;
 694 #pragma omp for schedule(static) nowait
 695             for (i = 0; i < s2->ncg_gl; i++)
 696             {
 697                 s2->cg_gl[i] = s1->cg_gl[i];
 698             }
 699             s2->ddp_count_cg_gl = s1->ddp_count_cg_gl;
 700         }
 701     }
 702
 703     if (constr)
 704     {
 705         wallcycle_start(wcycle, ewcCONSTR);
 706         dvdl_constr = 0;
 707         validStep   =
 708             constrain(NULL, TRUE, TRUE, constr, &top->idef,
 709                       ir, cr, count, 0, 1.0, md,
 710                       s1->x, s2->x, NULL, bMolPBC, s2->box,
 711                       s2->lambda[efptBONDED], &dvdl_constr,
 712                       NULL, NULL, nrnb, econqCoord);
 713         wallcycle_stop(wcycle, ewcCONSTR);
 714
 715         // We should move this check to the different minimizers
 716         if (!validStep && ir->eI != eiSteep)
 717         {
 718             gmx_fatal(FARGS, "The coordinates could not be constrained. Minimizer '%s' can not handle constraint failures, use minimizer '%s' before using '%s'.",
 719                       EI(ir->eI), EI(eiSteep), EI(ir->eI));
 720         }
 721     }
 722
 723     return validStep;
 724 }
 725
 726 //! Prepare EM for using domain decomposition parallellization
 727 static void em_dd_partition_system(FILE *fplog, int step, t_commrec *cr,
 728                                    gmx_mtop_t *top_global, t_inputrec *ir,
 729                                    em_state_t *ems, gmx_localtop_t *top,
 730                                    t_mdatoms *mdatoms, t_forcerec *fr,
 731                                    gmx_vsite_t *vsite, gmx_constr_t constr,
 732                                    t_nrnb *nrnb, gmx_wallcycle_t wcycle)
 733 {
 734     /* Repartition the domain decomposition */
 735     dd_partition_system(fplog, step, cr, FALSE, 1,
 736                         NULL, top_global, ir,
 737                         &ems->s, &ems->f,
 738                         mdatoms, top, fr, vsite, constr,
 739                         nrnb, wcycle, FALSE);
 740     dd_store_state(cr->dd, &ems->s);
 741 }
 742
 743 //! De one energy evaluation
 744 static void evaluate_energy(FILE *fplog, t_commrec *cr,
 745                             gmx_mtop_t *top_global,
 746                             em_state_t *ems, gmx_localtop_t *top,
 747                             t_inputrec *inputrec,
 748                             t_nrnb *nrnb, gmx_wallcycle_t wcycle,
 749                             gmx_global_stat_t gstat,
 750                             gmx_vsite_t *vsite, gmx_constr_t constr,
 751                             t_fcdata *fcd,
 752                             t_graph *graph, t_mdatoms *mdatoms,
 753                             t_forcerec *fr, rvec mu_tot,
 754                             gmx_enerdata_t *enerd, tensor vir, tensor pres,
 755                             gmx_int64_t count, gmx_bool bFirst)
 756 {
 757     real     t;
 758     gmx_bool bNS;
 759     tensor   force_vir, shake_vir, ekin;
 760     real     dvdl_constr, prescorr, enercorr, dvdlcorr;
 761     real     terminate = 0;
 762
 763     /* Set the time to the initial time, the time does not change during EM */
 764     t = inputrec->init_t;
 765
 766     if (bFirst ||
 767         (DOMAINDECOMP(cr) && ems->s.ddp_count < cr->dd->ddp_count))
 768     {
 769         /* This is the first state or an old state used before the last ns */
 770         bNS = TRUE;
 771     }
 772     else
 773     {
 774         bNS = FALSE;
 775         if (inputrec->nstlist > 0)
 776         {
 777             bNS = TRUE;
 778         }
 779     }
 780
 781     if (vsite)
 782     {
 783         construct_vsites(vsite, ems->s.x, 1, NULL,
 784                          top->idef.iparams, top->idef.il,
 785                          fr->ePBC, fr->bMolPBC, cr, ems->s.box);
 786     }
 787
 788     if (DOMAINDECOMP(cr) && bNS)
 789     {
 790         /* Repartition the domain decomposition */
 791         em_dd_partition_system(fplog, count, cr, top_global, inputrec,
 792                                ems, top, mdatoms, fr, vsite, constr,
 793                                nrnb, wcycle);
 794     }
 795
 796     /* Calc force & energy on new trial position  */
 797     /* do_force always puts the charge groups in the box and shifts again
 798      * We do not unshift, so molecules are always whole in congrad.c
 799      */
 800     do_force(fplog, cr, inputrec,
 801              count, nrnb, wcycle, top, &top_global->groups,
 802              ems->s.box, ems->s.x, &ems->s.hist,
 803              ems->f, force_vir, mdatoms, enerd, fcd,
 804              ems->s.lambda, graph, fr, vsite, mu_tot, t, NULL, NULL, TRUE,
 805              GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES |
 806              GMX_FORCE_VIRIAL | GMX_FORCE_ENERGY |
 807              (bNS ? GMX_FORCE_NS : 0));
 808
 809     /* Clear the unused shake virial and pressure */
 810     clear_mat(shake_vir);
 811     clear_mat(pres);
 812
 813     /* Communicate stuff when parallel */
 814     if (PAR(cr) && inputrec->eI != eiNM)
 815     {
 816         wallcycle_start(wcycle, ewcMoveE);
 817
 818         global_stat(gstat, cr, enerd, force_vir, shake_vir, mu_tot,
 819                     inputrec, NULL, NULL, NULL, 1, &terminate,
 820                     NULL, FALSE,
 821                     CGLO_ENERGY |
 822                     CGLO_PRESSURE |
 823                     CGLO_CONSTRAINT);
 824
 825         wallcycle_stop(wcycle, ewcMoveE);
 826     }
 827
 828     /* Calculate long range corrections to pressure and energy */
 829     calc_dispcorr(inputrec, fr, ems->s.box, ems->s.lambda[efptVDW],
 830                   pres, force_vir, &prescorr, &enercorr, &dvdlcorr);
 831     enerd->term[F_DISPCORR] = enercorr;
 832     enerd->term[F_EPOT]    += enercorr;
 833     enerd->term[F_PRES]    += prescorr;
 834     enerd->term[F_DVDL]    += dvdlcorr;
 835
 836     ems->epot = enerd->term[F_EPOT];
 837
 838     if (constr)
 839     {
 840         /* Project out the constraint components of the force */
 841         wallcycle_start(wcycle, ewcCONSTR);
 842         dvdl_constr = 0;
 843         constrain(NULL, FALSE, FALSE, constr, &top->idef,
 844                   inputrec, cr, count, 0, 1.0, mdatoms,
 845                   ems->s.x, ems->f, ems->f, fr->bMolPBC, ems->s.box,
 846                   ems->s.lambda[efptBONDED], &dvdl_constr,
 847                   NULL, &shake_vir, nrnb, econqForceDispl);
 848         enerd->term[F_DVDL_CONSTR] += dvdl_constr;
 849         m_add(force_vir, shake_vir, vir);
 850         wallcycle_stop(wcycle, ewcCONSTR);
 851     }
 852     else
 853     {
 854         copy_mat(force_vir, vir);
 855     }
 856
 857     clear_mat(ekin);
 858     enerd->term[F_PRES] =
 859         calc_pres(fr->ePBC, inputrec->nwall, ems->s.box, ekin, vir, pres);
 860
 861     sum_dhdl(enerd, ems->s.lambda, inputrec->fepvals);
 862
 863     if (EI_ENERGY_MINIMIZATION(inputrec->eI))
 864     {
 865         get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, ems);
 866     }
 867 }
 868
 869 //! Parallel utility summing energies and forces
 870 static double reorder_partsum(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 871                               gmx_mtop_t *top_global,
 872                               em_state_t *s_min, em_state_t *s_b)
 873 {
 874     rvec          *fm, *fb, *fmg;
 875     t_block       *cgs_gl;
 876     int            ncg, *cg_gl, *index, c, cg, i, a0, a1, a, gf, m;
 877     double         partsum;
 878     unsigned char *grpnrFREEZE;
 879
 880     if (debug)
 881     {
 882         fprintf(debug, "Doing reorder_partsum\n");
 883     }
 884
 885     fm = s_min->f;
 886     fb = s_b->f;
 887
 888     cgs_gl = dd_charge_groups_global(cr->dd);
 889     index  = cgs_gl->index;
 890
 891     /* Collect fm in a global vector fmg.
 892      * This conflicts with the spirit of domain decomposition,
 893      * but to fully optimize this a much more complicated algorithm is required.
 894      */
 895     snew(fmg, top_global->natoms);
 896
 897     ncg   = s_min->s.ncg_gl;
 898     cg_gl = s_min->s.cg_gl;
 899     i     = 0;
 900     for (c = 0; c < ncg; c++)
 901     {
 902         cg = cg_gl[c];
 903         a0 = index[cg];
 904         a1 = index[cg+1];
 905         for (a = a0; a < a1; a++)
 906         {
 907             copy_rvec(fm[i], fmg[a]);
 908             i++;
 909         }
 910     }
 911     gmx_sum(top_global->natoms*3, fmg[0], cr);
 912
 913     /* Now we will determine the part of the sum for the cgs in state s_b */
 914     ncg         = s_b->s.ncg_gl;
 915     cg_gl       = s_b->s.cg_gl;
 916     partsum     = 0;
 917     i           = 0;
 918     gf          = 0;
 919     grpnrFREEZE = top_global->groups.grpnr[egcFREEZE];
 920     for (c = 0; c < ncg; c++)
 921     {
 922         cg = cg_gl[c];
 923         a0 = index[cg];
 924         a1 = index[cg+1];
 925         for (a = a0; a < a1; a++)
 926         {
 927             if (mdatoms->cFREEZE && grpnrFREEZE)
 928             {
 929                 gf = grpnrFREEZE[i];
 930             }
 931             for (m = 0; m < DIM; m++)
 932             {
 933                 if (!opts->nFreeze[gf][m])
 934                 {
 935                     partsum += (fb[i][m] - fmg[a][m])*fb[i][m];
 936                 }
 937             }
 938             i++;
 939         }
 940     }
 941
 942     sfree(fmg);
 943
 944     return partsum;
 945 }
 946
 947 //! Print some stuff, like beta, whatever that means.
 948 static real pr_beta(t_commrec *cr, t_grpopts *opts, t_mdatoms *mdatoms,
 949                     gmx_mtop_t *top_global,
 950                     em_state_t *s_min, em_state_t *s_b)
 951 {
 952     rvec  *fm, *fb;
 953     double sum;
 954     int    gf, i, m;
 955
 956     /* This is just the classical Polak-Ribiere calculation of beta;
 957      * it looks a bit complicated since we take freeze groups into account,
 958      * and might have to sum it in parallel runs.
 959      */
 960
 961     if (!DOMAINDECOMP(cr) ||
 962         (s_min->s.ddp_count == cr->dd->ddp_count &&
 963          s_b->s.ddp_count   == cr->dd->ddp_count))
 964     {
 965         fm  = s_min->f;
 966         fb  = s_b->f;
 967         sum = 0;
 968         gf  = 0;
 969         /* This part of code can be incorrect with DD,
 970          * since the atom ordering in s_b and s_min might differ.
 971          */
 972         for (i = 0; i < mdatoms->homenr; i++)
 973         {
 974             if (mdatoms->cFREEZE)
 975             {
 976                 gf = mdatoms->cFREEZE[i];
 977             }
 978             for (m = 0; m < DIM; m++)
 979             {
 980                 if (!opts->nFreeze[gf][m])
 981                 {
 982                     sum += (fb[i][m] - fm[i][m])*fb[i][m];
 983                 }
 984             }
 985         }
 986     }
 987     else
 988     {
 989         /* We need to reorder cgs while summing */
 990         sum = reorder_partsum(cr, opts, mdatoms, top_global, s_min, s_b);
 991     }
 992     if (PAR(cr))
 993     {
 994         gmx_sumd(1, &sum, cr);
 995     }
 996
 997     return sum/gmx::square(s_min->fnorm);
 998 }
 999
1000 namespace gmx
1001 {
1002
1003 /*! \brief Do conjugate gradients minimization
1004     \copydoc integrator_t (FILE *fplog, t_commrec *cr,
1005                            int nfile, const t_filenm fnm[],
1006                            const gmx_output_env_t *oenv, gmx_bool bVerbose,
1007                            int nstglobalcomm,
1008                            gmx_vsite_t *vsite, gmx_constr_t constr,
1009                            int stepout,
1010                            t_inputrec *inputrec,
1011                            gmx_mtop_t *top_global, t_fcdata *fcd,
1012                            t_state *state_global,
1013                            t_mdatoms *mdatoms,
1014                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1015                            gmx_edsam_t ed,
1016                            t_forcerec *fr,
1017                            int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1018                            gmx_membed_t gmx_unused *membed,
1019                            real cpt_period, real max_hours,
1020                            int imdport,
1021                            unsigned long Flags,
1022                            gmx_walltime_accounting_t walltime_accounting)
1023  */
1024 double do_cg(FILE *fplog, t_commrec *cr,
1025              int nfile, const t_filenm fnm[],
1026              const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
1027              int gmx_unused nstglobalcomm,
1028              gmx_vsite_t *vsite, gmx_constr_t constr,
1029              int gmx_unused stepout,
1030              t_inputrec *inputrec,
1031              gmx_mtop_t *top_global, t_fcdata *fcd,
1032              t_state *state_global,
1033              t_mdatoms *mdatoms,
1034              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1035              gmx_edsam_t gmx_unused ed,
1036              t_forcerec *fr,
1037              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
1038              gmx_membed_t gmx_unused *membed,
1039              real gmx_unused cpt_period, real gmx_unused max_hours,
1040              int imdport,
1041              unsigned long gmx_unused Flags,
1042              gmx_walltime_accounting_t walltime_accounting)
1043 {
1044     const char       *CG = "Polak-Ribiere Conjugate Gradients";
1045
1046     em_state_t       *s_min, *s_a, *s_b, *s_c;
1047     gmx_localtop_t   *top;
1048     gmx_enerdata_t   *enerd;
1049     rvec             *f;
1050     gmx_global_stat_t gstat;
1051     t_graph          *graph;
1052     rvec             *p, *sf;
1053     double            gpa, gpb, gpc, tmp, minstep;
1054     real              fnormn;
1055     real              stepsize;
1056     real              a, b, c, beta = 0.0;
1057     real              epot_repl = 0;
1058     real              pnorm;
1059     t_mdebin         *mdebin;
1060     gmx_bool          converged, foundlower;
1061     rvec              mu_tot;
1062     gmx_bool          do_log = FALSE, do_ene = FALSE, do_x, do_f;
1063     tensor            vir, pres;
1064     int               number_steps, neval = 0, nstcg = inputrec->nstcgsteep;
1065     gmx_mdoutf_t      outf;
1066     int               i, m, gf, step, nminstep;
1067
1068     step = 0;
1069
1070     s_min = init_em_state();
1071     s_a   = init_em_state();
1072     s_b   = init_em_state();
1073     s_c   = init_em_state();
1074
1075     /* Init em and store the local state in s_min */
1076     init_em(fplog, CG, cr, inputrec,
1077             state_global, top_global, s_min, &top, &f,
1078             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
1079             nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
1080
1081     /* Print to log file */
1082     print_em_start(fplog, cr, walltime_accounting, wcycle, CG);
1083
1084     /* Max number of steps */
1085     number_steps = inputrec->nsteps;
1086
1087     if (MASTER(cr))
1088     {
1089         sp_header(stderr, CG, inputrec->em_tol, number_steps);
1090     }
1091     if (fplog)
1092     {
1093         sp_header(fplog, CG, inputrec->em_tol, number_steps);
1094     }
1095
1096     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1097     /* do_force always puts the charge groups in the box and shifts again
1098      * We do not unshift, so molecules are always whole in congrad.c
1099      */
1100     evaluate_energy(fplog, cr,
1101                     top_global, s_min, top,
1102                     inputrec, nrnb, wcycle, gstat,
1103                     vsite, constr, fcd, graph, mdatoms, fr,
1104                     mu_tot, enerd, vir, pres, -1, TRUE);
1105     where();
1106
1107     if (MASTER(cr))
1108     {
1109         /* Copy stuff to the energy bin for easy printing etc. */
1110         upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1111                    mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1112                    NULL, NULL, vir, pres, NULL, mu_tot, constr);
1113
1114         print_ebin_header(fplog, step, step);
1115         print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1116                    mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1117     }
1118     where();
1119
1120     /* Estimate/guess the initial stepsize */
1121     stepsize = inputrec->em_stepsize/s_min->fnorm;
1122
1123     if (MASTER(cr))
1124     {
1125         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1126         fprintf(stderr, "   F-max             = %12.5e on atom %d\n",
1127                 s_min->fmax, s_min->a_fmax+1);
1128         fprintf(stderr, "   F-Norm            = %12.5e\n",
1129                 s_min->fnorm/sqrtNumAtoms);
1130         fprintf(stderr, "\n");
1131         /* and copy to the log file too... */
1132         fprintf(fplog, "   F-max             = %12.5e on atom %d\n",
1133                 s_min->fmax, s_min->a_fmax+1);
1134         fprintf(fplog, "   F-Norm            = %12.5e\n",
1135                 s_min->fnorm/sqrtNumAtoms);
1136         fprintf(fplog, "\n");
1137     }
1138     /* Start the loop over CG steps.
1139      * Each successful step is counted, and we continue until
1140      * we either converge or reach the max number of steps.
1141      */
1142     converged = FALSE;
1143     for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1144     {
1145
1146         /* start taking steps in a new direction
1147          * First time we enter the routine, beta=0, and the direction is
1148          * simply the negative gradient.
1149          */
1150
1151         /* Calculate the new direction in p, and the gradient in this direction, gpa */
1152         p   = s_min->s.cg_p;
1153         sf  = s_min->f;
1154         gpa = 0;
1155         gf  = 0;
1156         for (i = 0; i < mdatoms->homenr; i++)
1157         {
1158             if (mdatoms->cFREEZE)
1159             {
1160                 gf = mdatoms->cFREEZE[i];
1161             }
1162             for (m = 0; m < DIM; m++)
1163             {
1164                 if (!inputrec->opts.nFreeze[gf][m])
1165                 {
1166                     p[i][m] = sf[i][m] + beta*p[i][m];
1167                     gpa    -= p[i][m]*sf[i][m];
1168                     /* f is negative gradient, thus the sign */
1169                 }
1170                 else
1171                 {
1172                     p[i][m] = 0;
1173                 }
1174             }
1175         }
1176
1177         /* Sum the gradient along the line across CPUs */
1178         if (PAR(cr))
1179         {
1180             gmx_sumd(1, &gpa, cr);
1181         }
1182
1183         /* Calculate the norm of the search vector */
1184         get_f_norm_max(cr, &(inputrec->opts), mdatoms, p, &pnorm, NULL, NULL);
1185
1186         /* Just in case stepsize reaches zero due to numerical precision... */
1187         if (stepsize <= 0)
1188         {
1189             stepsize = inputrec->em_stepsize/pnorm;
1190         }
1191
1192         /*
1193          * Double check the value of the derivative in the search direction.
1194          * If it is positive it must be due to the old information in the
1195          * CG formula, so just remove that and start over with beta=0.
1196          * This corresponds to a steepest descent step.
1197          */
1198         if (gpa > 0)
1199         {
1200             beta = 0;
1201             step--;   /* Don't count this step since we are restarting */
1202             continue; /* Go back to the beginning of the big for-loop */
1203         }
1204
1205         /* Calculate minimum allowed stepsize, before the average (norm)
1206          * relative change in coordinate is smaller than precision
1207          */
1208         minstep = 0;
1209         for (i = 0; i < mdatoms->homenr; i++)
1210         {
1211             for (m = 0; m < DIM; m++)
1212             {
1213                 tmp = fabs(s_min->s.x[i][m]);
1214                 if (tmp < 1.0)
1215                 {
1216                     tmp = 1.0;
1217                 }
1218                 tmp      = p[i][m]/tmp;
1219                 minstep += tmp*tmp;
1220             }
1221         }
1222         /* Add up from all CPUs */
1223         if (PAR(cr))
1224         {
1225             gmx_sumd(1, &minstep, cr);
1226         }
1227
1228         minstep = GMX_REAL_EPS/sqrt(minstep/(3*state_global->natoms));
1229
1230         if (stepsize < minstep)
1231         {
1232             converged = TRUE;
1233             break;
1234         }
1235
1236         /* Write coordinates if necessary */
1237         do_x = do_per_step(step, inputrec->nstxout);
1238         do_f = do_per_step(step, inputrec->nstfout);
1239
1240         write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
1241                       top_global, inputrec, step,
1242                       s_min, state_global);
1243
1244         /* Take a step downhill.
1245          * In theory, we should minimize the function along this direction.
1246          * That is quite possible, but it turns out to take 5-10 function evaluations
1247          * for each line. However, we dont really need to find the exact minimum -
1248          * it is much better to start a new CG step in a modified direction as soon
1249          * as we are close to it. This will save a lot of energy evaluations.
1250          *
1251          * In practice, we just try to take a single step.
1252          * If it worked (i.e. lowered the energy), we increase the stepsize but
1253          * the continue straight to the next CG step without trying to find any minimum.
1254          * If it didn't work (higher energy), there must be a minimum somewhere between
1255          * the old position and the new one.
1256          *
1257          * Due to the finite numerical accuracy, it turns out that it is a good idea
1258          * to even accept a SMALL increase in energy, if the derivative is still downhill.
1259          * This leads to lower final energies in the tests I've done. / Erik
1260          */
1261         s_a->epot = s_min->epot;
1262         a         = 0.0;
1263         c         = a + stepsize; /* reference position along line is zero */
1264
1265         if (DOMAINDECOMP(cr) && s_min->s.ddp_count < cr->dd->ddp_count)
1266         {
1267             em_dd_partition_system(fplog, step, cr, top_global, inputrec,
1268                                    s_min, top, mdatoms, fr, vsite, constr,
1269                                    nrnb, wcycle);
1270         }
1271
1272         /* Take a trial step (new coords in s_c) */
1273         do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, c, s_min->s.cg_p, s_c,
1274                    constr, top, nrnb, wcycle, -1);
1275
1276         neval++;
1277         /* Calculate energy for the trial step */
1278         evaluate_energy(fplog, cr,
1279                         top_global, s_c, top,
1280                         inputrec, nrnb, wcycle, gstat,
1281                         vsite, constr, fcd, graph, mdatoms, fr,
1282                         mu_tot, enerd, vir, pres, -1, FALSE);
1283
1284         /* Calc derivative along line */
1285         p   = s_c->s.cg_p;
1286         sf  = s_c->f;
1287         gpc = 0;
1288         for (i = 0; i < mdatoms->homenr; i++)
1289         {
1290             for (m = 0; m < DIM; m++)
1291             {
1292                 gpc -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1293             }
1294         }
1295         /* Sum the gradient along the line across CPUs */
1296         if (PAR(cr))
1297         {
1298             gmx_sumd(1, &gpc, cr);
1299         }
1300
1301         /* This is the max amount of increase in energy we tolerate */
1302         tmp = sqrt(GMX_REAL_EPS)*fabs(s_a->epot);
1303
1304         /* Accept the step if the energy is lower, or if it is not significantly higher
1305          * and the line derivative is still negative.
1306          */
1307         if (s_c->epot < s_a->epot || (gpc < 0 && s_c->epot < (s_a->epot + tmp)))
1308         {
1309             foundlower = TRUE;
1310             /* Great, we found a better energy. Increase step for next iteration
1311              * if we are still going down, decrease it otherwise
1312              */
1313             if (gpc < 0)
1314             {
1315                 stepsize *= 1.618034; /* The golden section */
1316             }
1317             else
1318             {
1319                 stepsize *= 0.618034; /* 1/golden section */
1320             }
1321         }
1322         else
1323         {
1324             /* New energy is the same or higher. We will have to do some work
1325              * to find a smaller value in the interval. Take smaller step next time!
1326              */
1327             foundlower = FALSE;
1328             stepsize  *= 0.618034;
1329         }
1330
1331
1332
1333
1334         /* OK, if we didn't find a lower value we will have to locate one now - there must
1335          * be one in the interval [a=0,c].
1336          * The same thing is valid here, though: Don't spend dozens of iterations to find
1337          * the line minimum. We try to interpolate based on the derivative at the endpoints,
1338          * and only continue until we find a lower value. In most cases this means 1-2 iterations.
1339          *
1340          * I also have a safeguard for potentially really pathological functions so we never
1341          * take more than 20 steps before we give up ...
1342          *
1343          * If we already found a lower value we just skip this step and continue to the update.
1344          */
1345         if (!foundlower)
1346         {
1347             nminstep = 0;
1348
1349             do
1350             {
1351                 /* Select a new trial point.
1352                  * If the derivatives at points a & c have different sign we interpolate to zero,
1353                  * otherwise just do a bisection.
1354                  */
1355                 if (gpa < 0 && gpc > 0)
1356                 {
1357                     b = a + gpa*(a-c)/(gpc-gpa);
1358                 }
1359                 else
1360                 {
1361                     b = 0.5*(a+c);
1362                 }
1363
1364                 /* safeguard if interpolation close to machine accuracy causes errors:
1365                  * never go outside the interval
1366                  */
1367                 if (b <= a || b >= c)
1368                 {
1369                     b = 0.5*(a+c);
1370                 }
1371
1372                 if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
1373                 {
1374                     /* Reload the old state */
1375                     em_dd_partition_system(fplog, -1, cr, top_global, inputrec,
1376                                            s_min, top, mdatoms, fr, vsite, constr,
1377                                            nrnb, wcycle);
1378                 }
1379
1380                 /* Take a trial step to this new point - new coords in s_b */
1381                 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC, s_min, b, s_min->s.cg_p, s_b,
1382                            constr, top, nrnb, wcycle, -1);
1383
1384                 neval++;
1385                 /* Calculate energy for the trial step */
1386                 evaluate_energy(fplog, cr,
1387                                 top_global, s_b, top,
1388                                 inputrec, nrnb, wcycle, gstat,
1389                                 vsite, constr, fcd, graph, mdatoms, fr,
1390                                 mu_tot, enerd, vir, pres, -1, FALSE);
1391
1392                 /* p does not change within a step, but since the domain decomposition
1393                  * might change, we have to use cg_p of s_b here.
1394                  */
1395                 p   = s_b->s.cg_p;
1396                 sf  = s_b->f;
1397                 gpb = 0;
1398                 for (i = 0; i < mdatoms->homenr; i++)
1399                 {
1400                     for (m = 0; m < DIM; m++)
1401                     {
1402                         gpb -= p[i][m]*sf[i][m]; /* f is negative gradient, thus the sign */
1403                     }
1404                 }
1405                 /* Sum the gradient along the line across CPUs */
1406                 if (PAR(cr))
1407                 {
1408                     gmx_sumd(1, &gpb, cr);
1409                 }
1410
1411                 if (debug)
1412                 {
1413                     fprintf(debug, "CGE: EpotA %f EpotB %f EpotC %f gpb %f\n",
1414                             s_a->epot, s_b->epot, s_c->epot, gpb);
1415                 }
1416
1417                 epot_repl = s_b->epot;
1418
1419                 /* Keep one of the intervals based on the value of the derivative at the new point */
1420                 if (gpb > 0)
1421                 {
1422                     /* Replace c endpoint with b */
1423                     swap_em_state(s_b, s_c);
1424                     c   = b;
1425                     gpc = gpb;
1426                 }
1427                 else
1428                 {
1429                     /* Replace a endpoint with b */
1430                     swap_em_state(s_b, s_a);
1431                     a   = b;
1432                     gpa = gpb;
1433                 }
1434
1435                 /*
1436                  * Stop search as soon as we find a value smaller than the endpoints.
1437                  * Never run more than 20 steps, no matter what.
1438                  */
1439                 nminstep++;
1440             }
1441             while ((epot_repl > s_a->epot || epot_repl > s_c->epot) &&
1442                    (nminstep < 20));
1443
1444             if (fabs(epot_repl - s_min->epot) < fabs(s_min->epot)*GMX_REAL_EPS ||
1445                 nminstep >= 20)
1446             {
1447                 /* OK. We couldn't find a significantly lower energy.
1448                  * If beta==0 this was steepest descent, and then we give up.
1449                  * If not, set beta=0 and restart with steepest descent before quitting.
1450                  */
1451                 if (beta == 0.0)
1452                 {
1453                     /* Converged */
1454                     converged = TRUE;
1455                     break;
1456                 }
1457                 else
1458                 {
1459                     /* Reset memory before giving up */
1460                     beta = 0.0;
1461                     continue;
1462                 }
1463             }
1464
1465             /* Select min energy state of A & C, put the best in B.
1466              */
1467             if (s_c->epot < s_a->epot)
1468             {
1469                 if (debug)
1470                 {
1471                     fprintf(debug, "CGE: C (%f) is lower than A (%f), moving C to B\n",
1472                             s_c->epot, s_a->epot);
1473                 }
1474                 swap_em_state(s_b, s_c);
1475                 gpb = gpc;
1476             }
1477             else
1478             {
1479                 if (debug)
1480                 {
1481                     fprintf(debug, "CGE: A (%f) is lower than C (%f), moving A to B\n",
1482                             s_a->epot, s_c->epot);
1483                 }
1484                 swap_em_state(s_b, s_a);
1485                 gpb = gpa;
1486             }
1487
1488         }
1489         else
1490         {
1491             if (debug)
1492             {
1493                 fprintf(debug, "CGE: Found a lower energy %f, moving C to B\n",
1494                         s_c->epot);
1495             }
1496             swap_em_state(s_b, s_c);
1497             gpb = gpc;
1498         }
1499
1500         /* new search direction */
1501         /* beta = 0 means forget all memory and restart with steepest descents. */
1502         if (nstcg && ((step % nstcg) == 0))
1503         {
1504             beta = 0.0;
1505         }
1506         else
1507         {
1508             /* s_min->fnorm cannot be zero, because then we would have converged
1509              * and broken out.
1510              */
1511
1512             /* Polak-Ribiere update.
1513              * Change to fnorm2/fnorm2_old for Fletcher-Reeves
1514              */
1515             beta = pr_beta(cr, &inputrec->opts, mdatoms, top_global, s_min, s_b);
1516         }
1517         /* Limit beta to prevent oscillations */
1518         if (fabs(beta) > 5.0)
1519         {
1520             beta = 0.0;
1521         }
1522
1523
1524         /* update positions */
1525         swap_em_state(s_min, s_b);
1526         gpa = gpb;
1527
1528         /* Print it if necessary */
1529         if (MASTER(cr))
1530         {
1531             if (bVerbose)
1532             {
1533                 double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1534                 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
1535                         step, s_min->epot, s_min->fnorm/sqrtNumAtoms,
1536                         s_min->fmax, s_min->a_fmax+1);
1537                 fflush(stderr);
1538             }
1539             /* Store the new (lower) energies */
1540             upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1541                        mdatoms->tmass, enerd, &s_min->s, inputrec->fepvals, inputrec->expandedvals, s_min->s.box,
1542                        NULL, NULL, vir, pres, NULL, mu_tot, constr);
1543
1544             do_log = do_per_step(step, inputrec->nstlog);
1545             do_ene = do_per_step(step, inputrec->nstenergy);
1546
1547             /* Prepare IMD energy record, if bIMD is TRUE. */
1548             IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, step, TRUE);
1549
1550             if (do_log)
1551             {
1552                 print_ebin_header(fplog, step, step);
1553             }
1554             print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
1555                        do_log ? fplog : NULL, step, step, eprNORMAL,
1556                        mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1557         }
1558
1559         /* Send energies and positions to the IMD client if bIMD is TRUE. */
1560         if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
1561         {
1562             IMD_send_positions(inputrec->imd);
1563         }
1564
1565         /* Stop when the maximum force lies below tolerance.
1566          * If we have reached machine precision, converged is already set to true.
1567          */
1568         converged = converged || (s_min->fmax < inputrec->em_tol);
1569
1570     }   /* End of the loop */
1571
1572     /* IMD cleanup, if bIMD is TRUE. */
1573     IMD_finalize(inputrec->bIMD, inputrec->imd);
1574
1575     if (converged)
1576     {
1577         step--; /* we never took that last step in this case */
1578
1579     }
1580     if (s_min->fmax > inputrec->em_tol)
1581     {
1582         if (MASTER(cr))
1583         {
1584             warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
1585             warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
1586         }
1587         converged = FALSE;
1588     }
1589
1590     if (MASTER(cr))
1591     {
1592         /* If we printed energy and/or logfile last step (which was the last step)
1593          * we don't have to do it again, but otherwise print the final values.
1594          */
1595         if (!do_log)
1596         {
1597             /* Write final value to log since we didn't do anything the last step */
1598             print_ebin_header(fplog, step, step);
1599         }
1600         if (!do_ene || !do_log)
1601         {
1602             /* Write final energy file entries */
1603             print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
1604                        !do_log ? fplog : NULL, step, step, eprNORMAL,
1605                        mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1606         }
1607     }
1608
1609     /* Print some stuff... */
1610     if (MASTER(cr))
1611     {
1612         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
1613     }
1614
1615     /* IMPORTANT!
1616      * For accurate normal mode calculation it is imperative that we
1617      * store the last conformation into the full precision binary trajectory.
1618      *
1619      * However, we should only do it if we did NOT already write this step
1620      * above (which we did if do_x or do_f was true).
1621      */
1622     do_x = !do_per_step(step, inputrec->nstxout);
1623     do_f = (inputrec->nstfout > 0 && !do_per_step(step, inputrec->nstfout));
1624
1625     write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
1626                   top_global, inputrec, step,
1627                   s_min, state_global);
1628
1629
1630     if (MASTER(cr))
1631     {
1632         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1633         fnormn = s_min->fnorm/sqrtNumAtoms;
1634         print_converged(stderr, CG, inputrec->em_tol, step, converged, number_steps,
1635                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1636         print_converged(fplog, CG, inputrec->em_tol, step, converged, number_steps,
1637                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
1638
1639         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
1640     }
1641
1642     finish_em(cr, outf, walltime_accounting, wcycle);
1643
1644     /* To print the actual number of steps we needed somewhere */
1645     walltime_accounting_set_nsteps_done(walltime_accounting, step);
1646
1647     return 0;
1648 }   /* That's all folks */
1649
1650
1651 /*! \brief Do L-BFGS conjugate gradients minimization
1652     \copydoc integrator_t (FILE *fplog, t_commrec *cr,
1653                            int nfile, const t_filenm fnm[],
1654                            const gmx_output_env_t *oenv, gmx_bool bVerbose,
1655                            int nstglobalcomm,
1656                            gmx_vsite_t *vsite, gmx_constr_t constr,
1657                            int stepout,
1658                            t_inputrec *inputrec,
1659                            gmx_mtop_t *top_global, t_fcdata *fcd,
1660                            t_state *state_global,
1661                            t_mdatoms *mdatoms,
1662                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1663                            gmx_edsam_t ed,
1664                            t_forcerec *fr,
1665                            int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
1666                            real cpt_period, real max_hours,
1667                            int imdport,
1668                            unsigned long Flags,
1669                            gmx_walltime_accounting_t walltime_accounting)
1670  */
1671 double do_lbfgs(FILE *fplog, t_commrec *cr,
1672                 int nfile, const t_filenm fnm[],
1673                 const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
1674                 int gmx_unused nstglobalcomm,
1675                 gmx_vsite_t *vsite, gmx_constr_t constr,
1676                 int gmx_unused stepout,
1677                 t_inputrec *inputrec,
1678                 gmx_mtop_t *top_global, t_fcdata *fcd,
1679                 t_state *state_global,
1680                 t_mdatoms *mdatoms,
1681                 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
1682                 gmx_edsam_t gmx_unused ed,
1683                 t_forcerec *fr,
1684                 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
1685                 gmx_membed_t gmx_unused *membed,
1686                 real gmx_unused cpt_period, real gmx_unused max_hours,
1687                 int imdport,
1688                 unsigned long gmx_unused Flags,
1689                 gmx_walltime_accounting_t walltime_accounting)
1690 {
1691     static const char *LBFGS = "Low-Memory BFGS Minimizer";
1692     em_state_t         ems;
1693     gmx_localtop_t    *top;
1694     gmx_enerdata_t    *enerd;
1695     rvec              *f;
1696     gmx_global_stat_t  gstat;
1697     t_graph           *graph;
1698     int                ncorr, nmaxcorr, point, cp, neval, nminstep;
1699     double             stepsize, step_taken, gpa, gpb, gpc, tmp, minstep;
1700     real              *rho, *alpha, *ff, *xx, *p, *s, *lastx, *lastf, **dx, **dg;
1701     real              *xa, *xb, *xc, *fa, *fb, *fc, *xtmp, *ftmp;
1702     real               a, b, c, maxdelta, delta;
1703     real               diag, Epot0, Epot, EpotA, EpotB, EpotC;
1704     real               dgdx, dgdg, sq, yr, beta;
1705     t_mdebin          *mdebin;
1706     gmx_bool           converged;
1707     rvec               mu_tot;
1708     real               fnorm, fmax;
1709     gmx_bool           do_log, do_ene, do_x, do_f, foundlower, *frozen;
1710     tensor             vir, pres;
1711     int                start, end, number_steps;
1712     gmx_mdoutf_t       outf;
1713     int                i, k, m, n, nfmax, gf, step;
1714     int                mdof_flags;
1715
1716     if (PAR(cr))
1717     {
1718         gmx_fatal(FARGS, "Cannot do parallel L-BFGS Minimization - yet.\n");
1719     }
1720
1721     if (NULL != constr)
1722     {
1723         gmx_fatal(FARGS, "The combination of constraints and L-BFGS minimization is not implemented. Either do not use constraints, or use another minimizer (e.g. steepest descent).");
1724     }
1725
1726     n        = 3*state_global->natoms;
1727     nmaxcorr = inputrec->nbfgscorr;
1728
1729     /* Allocate memory */
1730     /* Use pointers to real so we dont have to loop over both atoms and
1731      * dimensions all the time...
1732      * x/f are allocated as rvec *, so make new x0/f0 pointers-to-real
1733      * that point to the same memory.
1734      */
1735     snew(xa, n);
1736     snew(xb, n);
1737     snew(xc, n);
1738     snew(fa, n);
1739     snew(fb, n);
1740     snew(fc, n);
1741     snew(frozen, n);
1742
1743     snew(p, n);
1744     snew(lastx, n);
1745     snew(lastf, n);
1746     snew(rho, nmaxcorr);
1747     snew(alpha, nmaxcorr);
1748
1749     snew(dx, nmaxcorr);
1750     for (i = 0; i < nmaxcorr; i++)
1751     {
1752         snew(dx[i], n);
1753     }
1754
1755     snew(dg, nmaxcorr);
1756     for (i = 0; i < nmaxcorr; i++)
1757     {
1758         snew(dg[i], n);
1759     }
1760
1761     step  = 0;
1762     neval = 0;
1763
1764     /* Init em */
1765     init_em(fplog, LBFGS, cr, inputrec,
1766             state_global, top_global, &ems, &top, &f,
1767             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
1768             nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
1769     /* Do_lbfgs is not completely updated like do_steep and do_cg,
1770      * so we free some memory again.
1771      */
1772     sfree(ems.s.x);
1773     sfree(ems.f);
1774
1775     xx = (real *)state_global->x;
1776     ff = (real *)f;
1777
1778     start = 0;
1779     end   = mdatoms->homenr;
1780
1781     /* Print to log file */
1782     print_em_start(fplog, cr, walltime_accounting, wcycle, LBFGS);
1783
1784     do_log = do_ene = do_x = do_f = TRUE;
1785
1786     /* Max number of steps */
1787     number_steps = inputrec->nsteps;
1788
1789     /* Create a 3*natoms index to tell whether each degree of freedom is frozen */
1790     gf = 0;
1791     for (i = start; i < end; i++)
1792     {
1793         if (mdatoms->cFREEZE)
1794         {
1795             gf = mdatoms->cFREEZE[i];
1796         }
1797         for (m = 0; m < DIM; m++)
1798         {
1799             frozen[3*i+m] = inputrec->opts.nFreeze[gf][m];
1800         }
1801     }
1802     if (MASTER(cr))
1803     {
1804         sp_header(stderr, LBFGS, inputrec->em_tol, number_steps);
1805     }
1806     if (fplog)
1807     {
1808         sp_header(fplog, LBFGS, inputrec->em_tol, number_steps);
1809     }
1810
1811     if (vsite)
1812     {
1813         construct_vsites(vsite, state_global->x, 1, NULL,
1814                          top->idef.iparams, top->idef.il,
1815                          fr->ePBC, fr->bMolPBC, cr, state_global->box);
1816     }
1817
1818     /* Call the force routine and some auxiliary (neighboursearching etc.) */
1819     /* do_force always puts the charge groups in the box and shifts again
1820      * We do not unshift, so molecules are always whole
1821      */
1822     neval++;
1823     ems.s.x = state_global->x;
1824     ems.f   = f;
1825     evaluate_energy(fplog, cr,
1826                     top_global, &ems, top,
1827                     inputrec, nrnb, wcycle, gstat,
1828                     vsite, constr, fcd, graph, mdatoms, fr,
1829                     mu_tot, enerd, vir, pres, -1, TRUE);
1830     where();
1831
1832     if (MASTER(cr))
1833     {
1834         /* Copy stuff to the energy bin for easy printing etc. */
1835         upd_mdebin(mdebin, FALSE, FALSE, (double)step,
1836                    mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
1837                    NULL, NULL, vir, pres, NULL, mu_tot, constr);
1838
1839         print_ebin_header(fplog, step, step);
1840         print_ebin(mdoutf_get_fp_ene(outf), TRUE, FALSE, FALSE, fplog, step, step, eprNORMAL,
1841                    mdebin, fcd, &(top_global->groups), &(inputrec->opts));
1842     }
1843     where();
1844
1845     /* This is the starting energy */
1846     Epot = enerd->term[F_EPOT];
1847
1848     fnorm = ems.fnorm;
1849     fmax  = ems.fmax;
1850     nfmax = ems.a_fmax;
1851
1852     /* Set the initial step.
1853      * since it will be multiplied by the non-normalized search direction
1854      * vector (force vector the first time), we scale it by the
1855      * norm of the force.
1856      */
1857
1858     if (MASTER(cr))
1859     {
1860         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
1861         fprintf(stderr, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1862         fprintf(stderr, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
1863         fprintf(stderr, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
1864         fprintf(stderr, "\n");
1865         /* and copy to the log file too... */
1866         fprintf(fplog, "Using %d BFGS correction steps.\n\n", nmaxcorr);
1867         fprintf(fplog, "   F-max             = %12.5e on atom %d\n", fmax, nfmax+1);
1868         fprintf(fplog, "   F-Norm            = %12.5e\n", fnorm/sqrtNumAtoms);
1869         fprintf(fplog, "\n");
1870     }
1871
1872     // Point is an index to the memory of search directions, where 0 is the first one.
1873     point = 0;
1874
1875     // Set initial search direction to the force (-gradient), or 0 for frozen particles.
1876     for (i = 0; i < n; i++)
1877     {
1878         if (!frozen[i])
1879         {
1880             dx[point][i] = ff[i]; /* Initial search direction */
1881         }
1882         else
1883         {
1884             dx[point][i] = 0;
1885         }
1886     }
1887
1888     // Stepsize will be modified during the search, and actually it is not critical
1889     // (the main efficiency in the algorithm comes from changing directions), but
1890     // we still need an initial value, so estimate it as the inverse of the norm
1891     // so we take small steps where the potential fluctuates a lot.
1892     stepsize  = 1.0/fnorm;
1893
1894     /* Start the loop over BFGS steps.
1895      * Each successful step is counted, and we continue until
1896      * we either converge or reach the max number of steps.
1897      */
1898
1899     ncorr = 0;
1900
1901     /* Set the gradient from the force */
1902     converged = FALSE;
1903     for (step = 0; (number_steps < 0 || step <= number_steps) && !converged; step++)
1904     {
1905
1906         /* Write coordinates if necessary */
1907         do_x = do_per_step(step, inputrec->nstxout);
1908         do_f = do_per_step(step, inputrec->nstfout);
1909
1910         mdof_flags = 0;
1911         if (do_x)
1912         {
1913             mdof_flags |= MDOF_X;
1914         }
1915
1916         if (do_f)
1917         {
1918             mdof_flags |= MDOF_F;
1919         }
1920
1921         if (inputrec->bIMD)
1922         {
1923             mdof_flags |= MDOF_IMD;
1924         }
1925
1926         mdoutf_write_to_trajectory_files(fplog, cr, outf, mdof_flags,
1927                                          top_global, step, (real)step, state_global, state_global, f);
1928
1929         /* Do the linesearching in the direction dx[point][0..(n-1)] */
1930
1931         /* make s a pointer to current search direction - point=0 first time we get here */
1932         s = dx[point];
1933
1934         // calculate line gradient in position A
1935         for (gpa = 0, i = 0; i < n; i++)
1936         {
1937             gpa -= s[i]*ff[i];
1938         }
1939
1940         /* Calculate minimum allowed stepsize along the line, before the average (norm)
1941          * relative change in coordinate is smaller than precision
1942          */
1943         for (minstep = 0, i = 0; i < n; i++)
1944         {
1945             tmp = fabs(xx[i]);
1946             if (tmp < 1.0)
1947             {
1948                 tmp = 1.0;
1949             }
1950             tmp      = s[i]/tmp;
1951             minstep += tmp*tmp;
1952         }
1953         minstep = GMX_REAL_EPS/sqrt(minstep/n);
1954
1955         if (stepsize < minstep)
1956         {
1957             converged = TRUE;
1958             break;
1959         }
1960
1961         // Before taking any steps along the line, store the old position
1962         for (i = 0; i < n; i++)
1963         {
1964             lastx[i] = xx[i];
1965             lastf[i] = ff[i];
1966         }
1967         Epot0 = Epot;
1968
1969         for (i = 0; i < n; i++)
1970         {
1971             xa[i] = xx[i];
1972         }
1973
1974         /* Take a step downhill.
1975          * In theory, we should find the actual minimum of the function in this
1976          * direction, somewhere along the line.
1977          * That is quite possible, but it turns out to take 5-10 function evaluations
1978          * for each line. However, we dont really need to find the exact minimum -
1979          * it is much better to start a new BFGS step in a modified direction as soon
1980          * as we are close to it. This will save a lot of energy evaluations.
1981          *
1982          * In practice, we just try to take a single step.
1983          * If it worked (i.e. lowered the energy), we increase the stepsize but
1984          * continue straight to the next BFGS step without trying to find any minimum,
1985          * i.e. we change the search direction too. If the line was smooth, it is
1986          * likely we are in a smooth region, and then it makes sense to take longer
1987          * steps in the modified search direction too.
1988          *
1989          * If it didn't work (higher energy), there must be a minimum somewhere between
1990          * the old position and the new one. Then we need to start by finding a lower
1991          * value before we change search direction. Since the energy was apparently
1992          * quite rough, we need to decrease the step size.
1993          *
1994          * Due to the finite numerical accuracy, it turns out that it is a good idea
1995          * to accept a SMALL increase in energy, if the derivative is still downhill.
1996          * This leads to lower final energies in the tests I've done. / Erik
1997          */
1998
1999         // State "A" is the first position along the line.
2000         // reference position along line is initially zero
2001         EpotA      = Epot0;
2002         a          = 0.0;
2003
2004         // Check stepsize first. We do not allow displacements
2005         // larger than emstep.
2006         //
2007         do
2008         {
2009             // Pick a new position C by adding stepsize to A.
2010             c        = a + stepsize;
2011
2012             // Calculate what the largest change in any individual coordinate
2013             // would be (translation along line * gradient along line)
2014             maxdelta = 0;
2015             for (i = 0; i < n; i++)
2016             {
2017                 delta = c*s[i];
2018                 if (delta > maxdelta)
2019                 {
2020                     maxdelta = delta;
2021                 }
2022             }
2023             // If any displacement is larger than the stepsize limit, reduce the step
2024             if (maxdelta > inputrec->em_stepsize)
2025             {
2026                 stepsize *= 0.1;
2027             }
2028         }
2029         while (maxdelta > inputrec->em_stepsize);
2030
2031         // Take a trial step and move the coordinate array xc[] to position C
2032         for (i = 0; i < n; i++)
2033         {
2034             xc[i] = lastx[i] + c*s[i];
2035         }
2036
2037         neval++;
2038         // Calculate energy for the trial step in position C
2039         ems.s.x = (rvec *)xc;
2040         ems.f   = (rvec *)fc;
2041         evaluate_energy(fplog, cr,
2042                         top_global, &ems, top,
2043                         inputrec, nrnb, wcycle, gstat,
2044                         vsite, constr, fcd, graph, mdatoms, fr,
2045                         mu_tot, enerd, vir, pres, step, FALSE);
2046         EpotC = ems.epot;
2047
2048         // Calc line gradient in position C
2049         for (gpc = 0, i = 0; i < n; i++)
2050         {
2051             gpc -= s[i]*fc[i]; /* f is negative gradient, thus the sign */
2052         }
2053         /* Sum the gradient along the line across CPUs */
2054         if (PAR(cr))
2055         {
2056             gmx_sumd(1, &gpc, cr);
2057         }
2058
2059         // This is the max amount of increase in energy we tolerate.
2060         // By allowing VERY small changes (close to numerical precision) we
2061         // frequently find even better (lower) final energies.
2062         tmp = sqrt(GMX_REAL_EPS)*fabs(EpotA);
2063
2064         // Accept the step if the energy is lower in the new position C (compared to A),
2065         // or if it is not significantly higher and the line derivative is still negative.
2066         if (EpotC < EpotA || (gpc < 0 && EpotC < (EpotA+tmp)))
2067         {
2068             // Great, we found a better energy. We no longer try to alter the
2069             // stepsize, but simply accept this new better position. The we select a new
2070             // search direction instead, which will be much more efficient than continuing
2071             // to take smaller steps along a line. Set fnorm based on the new C position,
2072             // which will be used to update the stepsize to 1/fnorm further down.
2073             foundlower = TRUE;
2074             fnorm      = ems.fnorm;
2075         }
2076         else
2077         {
2078             // If we got here, the energy is NOT lower in point C, i.e. it will be the same
2079             // or higher than in point A. In this case it is pointless to move to point C,
2080             // so we will have to do more iterations along the same line to find a smaller
2081             // value in the interval [A=0.0,C].
2082             // Here, A is still 0.0, but that will change when we do a search in the interval
2083             // [0.0,C] below. That search we will do by interpolation or bisection rather
2084             // than with the stepsize, so no need to modify it. For the next search direction
2085             // it will be reset to 1/fnorm anyway.
2086             foundlower = FALSE;
2087         }
2088
2089         if (!foundlower)
2090         {
2091             // OK, if we didn't find a lower value we will have to locate one now - there must
2092             // be one in the interval [a,c].
2093             // The same thing is valid here, though: Don't spend dozens of iterations to find
2094             // the line minimum. We try to interpolate based on the derivative at the endpoints,
2095             // and only continue until we find a lower value. In most cases this means 1-2 iterations.
2096             // I also have a safeguard for potentially really pathological functions so we never
2097             // take more than 20 steps before we give up.
2098             // If we already found a lower value we just skip this step and continue to the update.
2099             nminstep = 0;
2100             do
2101             {
2102                 // Select a new trial point B in the interval [A,C].
2103                 // If the derivatives at points a & c have different sign we interpolate to zero,
2104                 // otherwise just do a bisection since there might be multiple minima/maxima
2105                 // inside the interval.
2106                 if (gpa < 0 && gpc > 0)
2107                 {
2108                     b = a + gpa*(a-c)/(gpc-gpa);
2109                 }
2110                 else
2111                 {
2112                     b = 0.5*(a+c);
2113                 }
2114
2115                 /* safeguard if interpolation close to machine accuracy causes errors:
2116                  * never go outside the interval
2117                  */
2118                 if (b <= a || b >= c)
2119                 {
2120                     b = 0.5*(a+c);
2121                 }
2122
2123                 // Take a trial step to point B
2124                 for (i = 0; i < n; i++)
2125                 {
2126                     xb[i] = lastx[i] + b*s[i];
2127                 }
2128
2129                 neval++;
2130                 // Calculate energy for the trial step in point B
2131                 ems.s.x = (rvec *)xb;
2132                 ems.f   = (rvec *)fb;
2133                 evaluate_energy(fplog, cr,
2134                                 top_global, &ems, top,
2135                                 inputrec, nrnb, wcycle, gstat,
2136                                 vsite, constr, fcd, graph, mdatoms, fr,
2137                                 mu_tot, enerd, vir, pres, step, FALSE);
2138                 EpotB = ems.epot;
2139                 fnorm = ems.fnorm;
2140
2141                 // Calculate gradient in point B
2142                 for (gpb = 0, i = 0; i < n; i++)
2143                 {
2144                     gpb -= s[i]*fb[i]; /* f is negative gradient, thus the sign */
2145
2146                 }
2147                 /* Sum the gradient along the line across CPUs */
2148                 if (PAR(cr))
2149                 {
2150                     gmx_sumd(1, &gpb, cr);
2151                 }
2152
2153                 // Keep one of the intervals [A,B] or [B,C] based on the value of the derivative
2154                 // at the new point B, and rename the endpoints of this new interval A and C.
2155                 if (gpb > 0)
2156                 {
2157                     /* Replace c endpoint with b */
2158                     EpotC = EpotB;
2159                     c     = b;
2160                     gpc   = gpb;
2161                     /* swap coord pointers b/c */
2162                     xtmp = xb;
2163                     ftmp = fb;
2164                     xb   = xc;
2165                     fb   = fc;
2166                     xc   = xtmp;
2167                     fc   = ftmp;
2168                 }
2169                 else
2170                 {
2171                     /* Replace a endpoint with b */
2172                     EpotA = EpotB;
2173                     a     = b;
2174                     gpa   = gpb;
2175                     /* swap coord pointers a/b */
2176                     xtmp = xb;
2177                     ftmp = fb;
2178                     xb   = xa;
2179                     fb   = fa;
2180                     xa   = xtmp;
2181                     fa   = ftmp;
2182                 }
2183
2184                 /*
2185                  * Stop search as soon as we find a value smaller than the endpoints,
2186                  * or if the tolerance is below machine precision.
2187                  * Never run more than 20 steps, no matter what.
2188                  */
2189                 nminstep++;
2190             }
2191             while ((EpotB > EpotA || EpotB > EpotC) && (nminstep < 20));
2192
2193             if (fabs(EpotB-Epot0) < GMX_REAL_EPS || nminstep >= 20)
2194             {
2195                 /* OK. We couldn't find a significantly lower energy.
2196                  * If ncorr==0 this was steepest descent, and then we give up.
2197                  * If not, reset memory to restart as steepest descent before quitting.
2198                  */
2199                 if (ncorr == 0)
2200                 {
2201                     /* Converged */
2202                     converged = TRUE;
2203                     break;
2204                 }
2205                 else
2206                 {
2207                     /* Reset memory */
2208                     ncorr = 0;
2209                     /* Search in gradient direction */
2210                     for (i = 0; i < n; i++)
2211                     {
2212                         dx[point][i] = ff[i];
2213                     }
2214                     /* Reset stepsize */
2215                     stepsize = 1.0/fnorm;
2216                     continue;
2217                 }
2218             }
2219
2220             /* Select min energy state of A & C, put the best in xx/ff/Epot
2221              */
2222             if (EpotC < EpotA)
2223             {
2224                 Epot = EpotC;
2225                 /* Use state C */
2226                 for (i = 0; i < n; i++)
2227                 {
2228                     xx[i] = xc[i];
2229                     ff[i] = fc[i];
2230                 }
2231                 step_taken = c;
2232             }
2233             else
2234             {
2235                 Epot = EpotA;
2236                 /* Use state A */
2237                 for (i = 0; i < n; i++)
2238                 {
2239                     xx[i] = xa[i];
2240                     ff[i] = fa[i];
2241                 }
2242                 step_taken = a;
2243             }
2244
2245         }
2246         else
2247         {
2248             /* found lower */
2249             Epot = EpotC;
2250             /* Use state C */
2251             for (i = 0; i < n; i++)
2252             {
2253                 xx[i] = xc[i];
2254                 ff[i] = fc[i];
2255             }
2256             step_taken = c;
2257         }
2258
2259         /* Update the memory information, and calculate a new
2260          * approximation of the inverse hessian
2261          */
2262
2263         /* Have new data in Epot, xx, ff */
2264         if (ncorr < nmaxcorr)
2265         {
2266             ncorr++;
2267         }
2268
2269         for (i = 0; i < n; i++)
2270         {
2271             dg[point][i]  = lastf[i]-ff[i];
2272             dx[point][i] *= step_taken;
2273         }
2274
2275         dgdg = 0;
2276         dgdx = 0;
2277         for (i = 0; i < n; i++)
2278         {
2279             dgdg += dg[point][i]*dg[point][i];
2280             dgdx += dg[point][i]*dx[point][i];
2281         }
2282
2283         diag = dgdx/dgdg;
2284
2285         rho[point] = 1.0/dgdx;
2286         point++;
2287
2288         if (point >= nmaxcorr)
2289         {
2290             point = 0;
2291         }
2292
2293         /* Update */
2294         for (i = 0; i < n; i++)
2295         {
2296             p[i] = ff[i];
2297         }
2298
2299         cp = point;
2300
2301         /* Recursive update. First go back over the memory points */
2302         for (k = 0; k < ncorr; k++)
2303         {
2304             cp--;
2305             if (cp < 0)
2306             {
2307                 cp = ncorr-1;
2308             }
2309
2310             sq = 0;
2311             for (i = 0; i < n; i++)
2312             {
2313                 sq += dx[cp][i]*p[i];
2314             }
2315
2316             alpha[cp] = rho[cp]*sq;
2317
2318             for (i = 0; i < n; i++)
2319             {
2320                 p[i] -= alpha[cp]*dg[cp][i];
2321             }
2322         }
2323
2324         for (i = 0; i < n; i++)
2325         {
2326             p[i] *= diag;
2327         }
2328
2329         /* And then go forward again */
2330         for (k = 0; k < ncorr; k++)
2331         {
2332             yr = 0;
2333             for (i = 0; i < n; i++)
2334             {
2335                 yr += p[i]*dg[cp][i];
2336             }
2337
2338             beta = rho[cp]*yr;
2339             beta = alpha[cp]-beta;
2340
2341             for (i = 0; i < n; i++)
2342             {
2343                 p[i] += beta*dx[cp][i];
2344             }
2345
2346             cp++;
2347             if (cp >= ncorr)
2348             {
2349                 cp = 0;
2350             }
2351         }
2352
2353         for (i = 0; i < n; i++)
2354         {
2355             if (!frozen[i])
2356             {
2357                 dx[point][i] = p[i];
2358             }
2359             else
2360             {
2361                 dx[point][i] = 0;
2362             }
2363         }
2364
2365         /* Test whether the convergence criterion is met */
2366         get_f_norm_max(cr, &(inputrec->opts), mdatoms, f, &fnorm, &fmax, &nfmax);
2367
2368         /* Print it if necessary */
2369         if (MASTER(cr))
2370         {
2371             if (bVerbose)
2372             {
2373                 double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2374                 fprintf(stderr, "\rStep %d, Epot=%12.6e, Fnorm=%9.3e, Fmax=%9.3e (atom %d)\n",
2375                         step, Epot, fnorm/sqrtNumAtoms, fmax, nfmax+1);
2376                 fflush(stderr);
2377             }
2378             /* Store the new (lower) energies */
2379             upd_mdebin(mdebin, FALSE, FALSE, (double)step,
2380                        mdatoms->tmass, enerd, state_global, inputrec->fepvals, inputrec->expandedvals, state_global->box,
2381                        NULL, NULL, vir, pres, NULL, mu_tot, constr);
2382             do_log = do_per_step(step, inputrec->nstlog);
2383             do_ene = do_per_step(step, inputrec->nstenergy);
2384             if (do_log)
2385             {
2386                 print_ebin_header(fplog, step, step);
2387             }
2388             print_ebin(mdoutf_get_fp_ene(outf), do_ene, FALSE, FALSE,
2389                        do_log ? fplog : NULL, step, step, eprNORMAL,
2390                        mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2391         }
2392
2393         /* Send x and E to IMD client, if bIMD is TRUE. */
2394         if (do_IMD(inputrec->bIMD, step, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
2395         {
2396             IMD_send_positions(inputrec->imd);
2397         }
2398
2399         // Reset stepsize in we are doing more iterations
2400         stepsize = 1.0/fnorm;
2401
2402         /* Stop when the maximum force lies below tolerance.
2403          * If we have reached machine precision, converged is already set to true.
2404          */
2405         converged = converged || (fmax < inputrec->em_tol);
2406
2407     }   /* End of the loop */
2408
2409     /* IMD cleanup, if bIMD is TRUE. */
2410     IMD_finalize(inputrec->bIMD, inputrec->imd);
2411
2412     if (converged)
2413     {
2414         step--; /* we never took that last step in this case */
2415
2416     }
2417     if (fmax > inputrec->em_tol)
2418     {
2419         if (MASTER(cr))
2420         {
2421             warn_step(stderr, inputrec->em_tol, step-1 == number_steps, FALSE);
2422             warn_step(fplog, inputrec->em_tol, step-1 == number_steps, FALSE);
2423         }
2424         converged = FALSE;
2425     }
2426
2427     /* If we printed energy and/or logfile last step (which was the last step)
2428      * we don't have to do it again, but otherwise print the final values.
2429      */
2430     if (!do_log) /* Write final value to log since we didn't do anythin last step */
2431     {
2432         print_ebin_header(fplog, step, step);
2433     }
2434     if (!do_ene || !do_log) /* Write final energy file entries */
2435     {
2436         print_ebin(mdoutf_get_fp_ene(outf), !do_ene, FALSE, FALSE,
2437                    !do_log ? fplog : NULL, step, step, eprNORMAL,
2438                    mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2439     }
2440
2441     /* Print some stuff... */
2442     if (MASTER(cr))
2443     {
2444         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2445     }
2446
2447     /* IMPORTANT!
2448      * For accurate normal mode calculation it is imperative that we
2449      * store the last conformation into the full precision binary trajectory.
2450      *
2451      * However, we should only do it if we did NOT already write this step
2452      * above (which we did if do_x or do_f was true).
2453      */
2454     do_x = !do_per_step(step, inputrec->nstxout);
2455     do_f = !do_per_step(step, inputrec->nstfout);
2456     write_em_traj(fplog, cr, outf, do_x, do_f, ftp2fn(efSTO, nfile, fnm),
2457                   top_global, inputrec, step,
2458                   &ems, state_global);
2459
2460     if (MASTER(cr))
2461     {
2462         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2463         print_converged(stderr, LBFGS, inputrec->em_tol, step, converged,
2464                         number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
2465         print_converged(fplog, LBFGS, inputrec->em_tol, step, converged,
2466                         number_steps, Epot, fmax, nfmax, fnorm/sqrtNumAtoms);
2467
2468         fprintf(fplog, "\nPerformed %d energy evaluations in total.\n", neval);
2469     }
2470
2471     finish_em(cr, outf, walltime_accounting, wcycle);
2472
2473     /* To print the actual number of steps we needed somewhere */
2474     walltime_accounting_set_nsteps_done(walltime_accounting, step);
2475
2476     return 0;
2477 }   /* That's all folks */
2478
2479 /*! \brief Do steepest descents minimization
2480     \copydoc integrator_t (FILE *fplog, t_commrec *cr,
2481                            int nfile, const t_filenm fnm[],
2482                            const gmx_output_env_t *oenv, gmx_bool bVerbose,
2483                            int nstglobalcomm,
2484                            gmx_vsite_t *vsite, gmx_constr_t constr,
2485                            int stepout,
2486                            t_inputrec *inputrec,
2487                            gmx_mtop_t *top_global, t_fcdata *fcd,
2488                            t_state *state_global,
2489                            t_mdatoms *mdatoms,
2490                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2491                            gmx_edsam_t ed,
2492                            t_forcerec *fr,
2493                            int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2494                            real cpt_period, real max_hours,
2495                            int imdport,
2496                            unsigned long Flags,
2497                            gmx_walltime_accounting_t walltime_accounting)
2498  */
2499 double do_steep(FILE *fplog, t_commrec *cr,
2500                 int nfile, const t_filenm fnm[],
2501                 const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
2502                 int gmx_unused nstglobalcomm,
2503                 gmx_vsite_t *vsite, gmx_constr_t constr,
2504                 int gmx_unused stepout,
2505                 t_inputrec *inputrec,
2506                 gmx_mtop_t *top_global, t_fcdata *fcd,
2507                 t_state *state_global,
2508                 t_mdatoms *mdatoms,
2509                 t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2510                 gmx_edsam_t gmx_unused  ed,
2511                 t_forcerec *fr,
2512                 int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
2513                 gmx_membed_t gmx_unused *membed,
2514                 real gmx_unused cpt_period, real gmx_unused max_hours,
2515                 int imdport,
2516                 unsigned long gmx_unused Flags,
2517                 gmx_walltime_accounting_t walltime_accounting)
2518 {
2519     const char       *SD = "Steepest Descents";
2520     em_state_t       *s_min, *s_try;
2521     gmx_localtop_t   *top;
2522     gmx_enerdata_t   *enerd;
2523     rvec             *f;
2524     gmx_global_stat_t gstat;
2525     t_graph          *graph;
2526     real              stepsize;
2527     real              ustep, fnormn;
2528     gmx_mdoutf_t      outf;
2529     t_mdebin         *mdebin;
2530     gmx_bool          bDone, bAbort, do_x, do_f;
2531     tensor            vir, pres;
2532     rvec              mu_tot;
2533     int               nsteps;
2534     int               count          = 0;
2535     int               steps_accepted = 0;
2536
2537     s_min = init_em_state();
2538     s_try = init_em_state();
2539
2540     /* Init em and store the local state in s_try */
2541     init_em(fplog, SD, cr, inputrec,
2542             state_global, top_global, s_try, &top, &f,
2543             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
2544             nfile, fnm, &outf, &mdebin, imdport, Flags, wcycle);
2545
2546     /* Print to log file  */
2547     print_em_start(fplog, cr, walltime_accounting, wcycle, SD);
2548
2549     /* Set variables for stepsize (in nm). This is the largest
2550      * step that we are going to make in any direction.
2551      */
2552     ustep    = inputrec->em_stepsize;
2553     stepsize = 0;
2554
2555     /* Max number of steps  */
2556     nsteps = inputrec->nsteps;
2557
2558     if (MASTER(cr))
2559     {
2560         /* Print to the screen  */
2561         sp_header(stderr, SD, inputrec->em_tol, nsteps);
2562     }
2563     if (fplog)
2564     {
2565         sp_header(fplog, SD, inputrec->em_tol, nsteps);
2566     }
2567
2568     /**** HERE STARTS THE LOOP ****
2569      * count is the counter for the number of steps
2570      * bDone will be TRUE when the minimization has converged
2571      * bAbort will be TRUE when nsteps steps have been performed or when
2572      * the stepsize becomes smaller than is reasonable for machine precision
2573      */
2574     count  = 0;
2575     bDone  = FALSE;
2576     bAbort = FALSE;
2577     while (!bDone && !bAbort)
2578     {
2579         bAbort = (nsteps >= 0) && (count == nsteps);
2580
2581         /* set new coordinates, except for first step */
2582         bool validStep = true;
2583         if (count > 0)
2584         {
2585             validStep =
2586                 do_em_step(cr, inputrec, mdatoms, fr->bMolPBC,
2587                            s_min, stepsize, s_min->f, s_try,
2588                            constr, top, nrnb, wcycle, count);
2589         }
2590
2591         if (validStep)
2592         {
2593             evaluate_energy(fplog, cr,
2594                             top_global, s_try, top,
2595                             inputrec, nrnb, wcycle, gstat,
2596                             vsite, constr, fcd, graph, mdatoms, fr,
2597                             mu_tot, enerd, vir, pres, count, count == 0);
2598         }
2599         else
2600         {
2601             // Signal constraint error during stepping with energy=inf
2602             s_try->epot = std::numeric_limits<real>::infinity();
2603         }
2604
2605         if (MASTER(cr))
2606         {
2607             print_ebin_header(fplog, count, count);
2608         }
2609
2610         if (count == 0)
2611         {
2612             s_min->epot = s_try->epot;
2613         }
2614
2615         /* Print it if necessary  */
2616         if (MASTER(cr))
2617         {
2618             if (bVerbose)
2619             {
2620                 fprintf(stderr, "Step=%5d, Dmax= %6.1e nm, Epot= %12.5e Fmax= %11.5e, atom= %d%c",
2621                         count, ustep, s_try->epot, s_try->fmax, s_try->a_fmax+1,
2622                         ( (count == 0) || (s_try->epot < s_min->epot) ) ? '\n' : '\r');
2623                 fflush(stderr);
2624             }
2625
2626             if ( (count == 0) || (s_try->epot < s_min->epot) )
2627             {
2628                 /* Store the new (lower) energies  */
2629                 upd_mdebin(mdebin, FALSE, FALSE, (double)count,
2630                            mdatoms->tmass, enerd, &s_try->s, inputrec->fepvals, inputrec->expandedvals,
2631                            s_try->s.box, NULL, NULL, vir, pres, NULL, mu_tot, constr);
2632
2633                 /* Prepare IMD energy record, if bIMD is TRUE. */
2634                 IMD_fill_energy_record(inputrec->bIMD, inputrec->imd, enerd, count, TRUE);
2635
2636                 print_ebin(mdoutf_get_fp_ene(outf), TRUE,
2637                            do_per_step(steps_accepted, inputrec->nstdisreout),
2638                            do_per_step(steps_accepted, inputrec->nstorireout),
2639                            fplog, count, count, eprNORMAL,
2640                            mdebin, fcd, &(top_global->groups), &(inputrec->opts));
2641                 fflush(fplog);
2642             }
2643         }
2644
2645         /* Now if the new energy is smaller than the previous...
2646          * or if this is the first step!
2647          * or if we did random steps!
2648          */
2649
2650         if ( (count == 0) || (s_try->epot < s_min->epot) )
2651         {
2652             steps_accepted++;
2653
2654             /* Test whether the convergence criterion is met...  */
2655             bDone = (s_try->fmax < inputrec->em_tol);
2656
2657             /* Copy the arrays for force, positions and energy  */
2658             /* The 'Min' array always holds the coords and forces of the minimal
2659                sampled energy  */
2660             swap_em_state(s_min, s_try);
2661             if (count > 0)
2662             {
2663                 ustep *= 1.2;
2664             }
2665
2666             /* Write to trn, if necessary */
2667             do_x = do_per_step(steps_accepted, inputrec->nstxout);
2668             do_f = do_per_step(steps_accepted, inputrec->nstfout);
2669             write_em_traj(fplog, cr, outf, do_x, do_f, NULL,
2670                           top_global, inputrec, count,
2671                           s_min, state_global);
2672         }
2673         else
2674         {
2675             /* If energy is not smaller make the step smaller...  */
2676             ustep *= 0.5;
2677
2678             if (DOMAINDECOMP(cr) && s_min->s.ddp_count != cr->dd->ddp_count)
2679             {
2680                 /* Reload the old state */
2681                 em_dd_partition_system(fplog, count, cr, top_global, inputrec,
2682                                        s_min, top, mdatoms, fr, vsite, constr,
2683                                        nrnb, wcycle);
2684             }
2685         }
2686
2687         /* Determine new step  */
2688         stepsize = ustep/s_min->fmax;
2689
2690         /* Check if stepsize is too small, with 1 nm as a characteristic length */
2691 #if GMX_DOUBLE
2692         if (count == nsteps || ustep < 1e-12)
2693 #else
2694         if (count == nsteps || ustep < 1e-6)
2695 #endif
2696         {
2697             if (MASTER(cr))
2698             {
2699                 warn_step(stderr, inputrec->em_tol, count == nsteps, constr != NULL);
2700                 warn_step(fplog, inputrec->em_tol, count == nsteps, constr != NULL);
2701             }
2702             bAbort = TRUE;
2703         }
2704
2705         /* Send IMD energies and positions, if bIMD is TRUE. */
2706         if (do_IMD(inputrec->bIMD, count, cr, TRUE, state_global->box, state_global->x, inputrec, 0, wcycle) && MASTER(cr))
2707         {
2708             IMD_send_positions(inputrec->imd);
2709         }
2710
2711         count++;
2712     }   /* End of the loop  */
2713
2714     /* IMD cleanup, if bIMD is TRUE. */
2715     IMD_finalize(inputrec->bIMD, inputrec->imd);
2716
2717     /* Print some data...  */
2718     if (MASTER(cr))
2719     {
2720         fprintf(stderr, "\nwriting lowest energy coordinates.\n");
2721     }
2722     write_em_traj(fplog, cr, outf, TRUE, inputrec->nstfout, ftp2fn(efSTO, nfile, fnm),
2723                   top_global, inputrec, count,
2724                   s_min, state_global);
2725
2726     if (MASTER(cr))
2727     {
2728         double sqrtNumAtoms = sqrt(static_cast<double>(state_global->natoms));
2729         fnormn = s_min->fnorm/sqrtNumAtoms;
2730
2731         print_converged(stderr, SD, inputrec->em_tol, count, bDone, nsteps,
2732                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2733         print_converged(fplog, SD, inputrec->em_tol, count, bDone, nsteps,
2734                         s_min->epot, s_min->fmax, s_min->a_fmax, fnormn);
2735     }
2736
2737     finish_em(cr, outf, walltime_accounting, wcycle);
2738
2739     /* To print the actual number of steps we needed somewhere */
2740     inputrec->nsteps = count;
2741
2742     walltime_accounting_set_nsteps_done(walltime_accounting, count);
2743
2744     return 0;
2745 }   /* That's all folks */
2746
2747 /*! \brief Do normal modes analysis
2748     \copydoc integrator_t (FILE *fplog, t_commrec *cr,
2749                            int nfile, const t_filenm fnm[],
2750                            const gmx_output_env_t *oenv, gmx_bool bVerbose,
2751                            int nstglobalcomm,
2752                            gmx_vsite_t *vsite, gmx_constr_t constr,
2753                            int stepout,
2754                            t_inputrec *inputrec,
2755                            gmx_mtop_t *top_global, t_fcdata *fcd,
2756                            t_state *state_global,
2757                            t_mdatoms *mdatoms,
2758                            t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2759                            gmx_edsam_t ed,
2760                            t_forcerec *fr,
2761                            int repl_ex_nst, int repl_ex_nex, int repl_ex_seed,
2762                            real cpt_period, real max_hours,
2763                            int imdport,
2764                            unsigned long Flags,
2765                            gmx_walltime_accounting_t walltime_accounting)
2766  */
2767 double do_nm(FILE *fplog, t_commrec *cr,
2768              int nfile, const t_filenm fnm[],
2769              const gmx_output_env_t gmx_unused *oenv, gmx_bool bVerbose,
2770              int gmx_unused nstglobalcomm,
2771              gmx_vsite_t *vsite, gmx_constr_t constr,
2772              int gmx_unused stepout,
2773              t_inputrec *inputrec,
2774              gmx_mtop_t *top_global, t_fcdata *fcd,
2775              t_state *state_global,
2776              t_mdatoms *mdatoms,
2777              t_nrnb *nrnb, gmx_wallcycle_t wcycle,
2778              gmx_edsam_t  gmx_unused ed,
2779              t_forcerec *fr,
2780              int gmx_unused repl_ex_nst, int gmx_unused repl_ex_nex, int gmx_unused repl_ex_seed,
2781              gmx_membed_t gmx_unused *membed,
2782              real gmx_unused cpt_period, real gmx_unused max_hours,
2783              int imdport,
2784              unsigned long gmx_unused Flags,
2785              gmx_walltime_accounting_t walltime_accounting)
2786 {
2787     const char          *NM = "Normal Mode Analysis";
2788     gmx_mdoutf_t         outf;
2789     int                  nnodes, node;
2790     gmx_localtop_t      *top;
2791     gmx_enerdata_t      *enerd;
2792     rvec                *f;
2793     gmx_global_stat_t    gstat;
2794     t_graph             *graph;
2795     tensor               vir, pres;
2796     rvec                 mu_tot;
2797     rvec                *fneg, *dfdx;
2798     gmx_bool             bSparse; /* use sparse matrix storage format */
2799     size_t               sz;
2800     gmx_sparsematrix_t * sparse_matrix           = NULL;
2801     real           *     full_matrix             = NULL;
2802     em_state_t       *   state_work;
2803
2804     /* added with respect to mdrun */
2805     int                       row, col;
2806     real                      der_range = 10.0*sqrt(GMX_REAL_EPS);
2807     real                      x_min;
2808     bool                      bIsMaster = MASTER(cr);
2809
2810     if (constr != NULL)
2811     {
2812         gmx_fatal(FARGS, "Constraints present with Normal Mode Analysis, this combination is not supported");
2813     }
2814
2815     state_work = init_em_state();
2816
2817     /* Init em and store the local state in state_minimum */
2818     init_em(fplog, NM, cr, inputrec,
2819             state_global, top_global, state_work, &top,
2820             &f,
2821             nrnb, mu_tot, fr, &enerd, &graph, mdatoms, &gstat, vsite, constr,
2822             nfile, fnm, &outf, NULL, imdport, Flags, wcycle);
2823
2824     gmx_shellfc_t *shellfc = init_shell_flexcon(stdout,
2825                                                 top_global,
2826                                                 n_flexible_constraints(constr),
2827                                                 inputrec->nstcalcenergy,
2828                                                 DOMAINDECOMP(cr));
2829
2830     if (shellfc)
2831     {
2832         make_local_shells(cr, mdatoms, shellfc);
2833     }
2834     std::vector<size_t> atom_index = get_atom_index(top_global);
2835     snew(fneg, atom_index.size());
2836     snew(dfdx, atom_index.size());
2837
2838 #if !GMX_DOUBLE
2839     if (bIsMaster)
2840     {
2841         fprintf(stderr,
2842                 "NOTE: This version of GROMACS has been compiled in single precision,\n"
2843                 "      which MIGHT not be accurate enough for normal mode analysis.\n"
2844                 "      GROMACS now uses sparse matrix storage, so the memory requirements\n"
2845                 "      are fairly modest even if you recompile in double precision.\n\n");
2846     }
2847 #endif
2848
2849     /* Check if we can/should use sparse storage format.
2850      *
2851      * Sparse format is only useful when the Hessian itself is sparse, which it
2852      * will be when we use a cutoff.
2853      * For small systems (n<1000) it is easier to always use full matrix format, though.
2854      */
2855     if (EEL_FULL(fr->eeltype) || fr->rlist == 0.0)
2856     {
2857         md_print_info(cr, fplog, "Non-cutoff electrostatics used, forcing full Hessian format.\n");
2858         bSparse = FALSE;
2859     }
2860     else if (atom_index.size() < 1000)
2861     {
2862         md_print_info(cr, fplog, "Small system size (N=%d), using full Hessian format.\n", atom_index.size());
2863         bSparse = FALSE;
2864     }
2865     else
2866     {
2867         md_print_info(cr, fplog, "Using compressed symmetric sparse Hessian format.\n");
2868         bSparse = TRUE;
2869     }
2870
2871     /* Number of dimensions, based on real atoms, that is not vsites or shell */
2872     sz = DIM*atom_index.size();
2873
2874     fprintf(stderr, "Allocating Hessian memory...\n\n");
2875
2876     if (bSparse)
2877     {
2878         sparse_matrix = gmx_sparsematrix_init(sz);
2879         sparse_matrix->compressed_symmetric = TRUE;
2880     }
2881     else
2882     {
2883         snew(full_matrix, sz*sz);
2884     }
2885
2886     init_nrnb(nrnb);
2887
2888     where();
2889
2890     /* Write start time and temperature */
2891     print_em_start(fplog, cr, walltime_accounting, wcycle, NM);
2892
2893     /* fudge nr of steps to nr of atoms */
2894     inputrec->nsteps = atom_index.size()*2;
2895
2896     if (bIsMaster)
2897     {
2898         fprintf(stderr, "starting normal mode calculation '%s'\n%d steps.\n\n",
2899                 *(top_global->name), (int)inputrec->nsteps);
2900     }
2901
2902     nnodes = cr->nnodes;
2903
2904     /* Make evaluate_energy do a single node force calculation */
2905     cr->nnodes = 1;
2906     evaluate_energy(fplog, cr,
2907                     top_global, state_work, top,
2908                     inputrec, nrnb, wcycle, gstat,
2909                     vsite, constr, fcd, graph, mdatoms, fr,
2910                     mu_tot, enerd, vir, pres, -1, TRUE);
2911     cr->nnodes = nnodes;
2912
2913     /* if forces are not small, warn user */
2914     get_state_f_norm_max(cr, &(inputrec->opts), mdatoms, state_work);
2915
2916     md_print_info(cr, fplog, "Maximum force:%12.5e\n", state_work->fmax);
2917     if (state_work->fmax > 1.0e-3)
2918     {
2919         md_print_info(cr, fplog,
2920                       "The force is probably not small enough to "
2921                       "ensure that you are at a minimum.\n"
2922                       "Be aware that negative eigenvalues may occur\n"
2923                       "when the resulting matrix is diagonalized.\n\n");
2924     }
2925
2926     /***********************************************************
2927      *
2928      *      Loop over all pairs in matrix
2929      *
2930      *      do_force called twice. Once with positive and
2931      *      once with negative displacement
2932      *
2933      ************************************************************/
2934
2935     /* Steps are divided one by one over the nodes */
2936     bool bNS = true;
2937     for (unsigned int aid = cr->nodeid; aid < atom_index.size(); aid += nnodes)
2938     {
2939         size_t atom = atom_index[aid];
2940         for (size_t d = 0; d < DIM; d++)
2941         {
2942             gmx_bool    bBornRadii  = FALSE;
2943             gmx_int64_t step        = 0;
2944             int         force_flags = GMX_FORCE_STATECHANGED | GMX_FORCE_ALLFORCES;
2945             double      t           = 0;
2946
2947             x_min = state_work->s.x[atom][d];
2948
2949             for (unsigned int dx = 0; (dx < 2); dx++)
2950             {
2951                 if (dx == 0)
2952                 {
2953                     state_work->s.x[atom][d] = x_min - der_range;
2954                 }
2955                 else
2956                 {
2957                     state_work->s.x[atom][d] = x_min + der_range;
2958                 }
2959
2960                 /* Make evaluate_energy do a single node force calculation */
2961                 cr->nnodes = 1;
2962                 if (shellfc)
2963                 {
2964                     /* Now is the time to relax the shells */
2965                     (void) relax_shell_flexcon(fplog, cr, bVerbose, step,
2966                                                inputrec, bNS, force_flags,
2967                                                top,
2968                                                constr, enerd, fcd,
2969                                                &state_work->s, state_work->f, vir, mdatoms,
2970                                                nrnb, wcycle, graph, &top_global->groups,
2971                                                shellfc, fr, bBornRadii, t, mu_tot,
2972                                                vsite, NULL);
2973                     bNS = false;
2974                     step++;
2975                 }
2976                 else
2977                 {
2978                     evaluate_energy(fplog, cr,
2979                                     top_global, state_work, top,
2980                                     inputrec, nrnb, wcycle, gstat,
2981                                     vsite, constr, fcd, graph, mdatoms, fr,
2982                                     mu_tot, enerd, vir, pres, atom*2+dx, FALSE);
2983                 }
2984
2985                 cr->nnodes = nnodes;
2986
2987                 if (dx == 0)
2988                 {
2989                     for (size_t i = 0; i < atom_index.size(); i++)
2990                     {
2991                         copy_rvec(state_work->f[atom_index[i]], fneg[i]);
2992                     }
2993                 }
2994             }
2995
2996             /* x is restored to original */
2997             state_work->s.x[atom][d] = x_min;
2998
2999             for (size_t j = 0; j < atom_index.size(); j++)
3000             {
3001                 for (size_t k = 0; (k < DIM); k++)
3002                 {
3003                     dfdx[j][k] =
3004                         -(state_work->f[atom_index[j]][k] - fneg[j][k])/(2*der_range);
3005                 }
3006             }
3007
3008             if (!bIsMaster)
3009             {
3010 #if GMX_MPI
3011 #define mpi_type GMX_MPI_REAL
3012                 MPI_Send(dfdx[0], atom_index.size()*DIM, mpi_type, MASTER(cr),
3013                          cr->nodeid, cr->mpi_comm_mygroup);
3014 #endif
3015             }
3016             else
3017             {
3018                 for (node = 0; (node < nnodes && atom+node < atom_index.size()); node++)
3019                 {
3020                     if (node > 0)
3021                     {
3022 #if GMX_MPI
3023                         MPI_Status stat;
3024                         MPI_Recv(dfdx[0], atom_index.size()*DIM, mpi_type, node, node,
3025                                  cr->mpi_comm_mygroup, &stat);
3026 #undef mpi_type
3027 #endif
3028                     }
3029
3030                     row = (atom + node)*DIM + d;
3031
3032                     for (size_t j = 0; j < atom_index.size(); j++)
3033                     {
3034                         for (size_t k = 0; k < DIM; k++)
3035                         {
3036                             col = j*DIM + k;
3037
3038                             if (bSparse)
3039                             {
3040                                 if (col >= row && dfdx[j][k] != 0.0)
3041                                 {
3042                                     gmx_sparsematrix_increment_value(sparse_matrix,
3043                                                                      row, col, dfdx[j][k]);
3044                                 }
3045                             }
3046                             else
3047                             {
3048                                 full_matrix[row*sz+col] = dfdx[j][k];
3049                             }
3050                         }
3051                     }
3052                 }
3053             }
3054
3055             if (bVerbose && fplog)
3056             {
3057                 fflush(fplog);
3058             }
3059         }
3060         /* write progress */
3061         if (bIsMaster && bVerbose)
3062         {
3063             fprintf(stderr, "\rFinished step %d out of %d",
3064                     static_cast<int>(std::min(atom+nnodes, atom_index.size())),
3065                     static_cast<int>(atom_index.size()));
3066             fflush(stderr);
3067         }
3068     }
3069
3070     if (bIsMaster)
3071     {
3072         fprintf(stderr, "\n\nWriting Hessian...\n");
3073         gmx_mtxio_write(ftp2fn(efMTX, nfile, fnm), sz, sz, full_matrix, sparse_matrix);
3074     }
3075
3076     finish_em(cr, outf, walltime_accounting, wcycle);
3077
3078     walltime_accounting_set_nsteps_done(walltime_accounting, atom_index.size()*2);
3079
3080     return 0;
3081 }
3082
3083 } // namespace gmx