src/gromacs/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "typedefs.h"
  46 #include "types/commrec.h"
  47 #include "vec.h"
  48 #include "gromacs/math/utilities.h"
  49 #include "macros.h"
  50 #include "gromacs/utility/smalloc.h"
  51 #include "macros.h"
  52 #include "gmx_fatal.h"
  53 #include "physics.h"
  54 #include "force.h"
  55 #include "tables.h"
  56 #include "nonbonded.h"
  57 #include "invblock.h"
  58 #include "names.h"
  59 #include "network.h"
  60 #include "pbc.h"
  61 #include "ns.h"
  62 #include "mshift.h"
  63 #include "txtdump.h"
  64 #include "coulomb.h"
  65 #include "md_support.h"
  66 #include "md_logging.h"
  67 #include "domdec.h"
  68 #include "qmmm.h"
  69 #include "copyrite.h"
  70 #include "mtop_util.h"
  71 #include "nbnxn_simd.h"
  72 #include "nbnxn_search.h"
  73 #include "nbnxn_atomdata.h"
  74 #include "nbnxn_consts.h"
  75 #include "gmx_omp_nthreads.h"
  76 #include "gmx_detect_hardware.h"
  77 #include "inputrec.h"
  78
  79 #include "types/nbnxn_cuda_types_ext.h"
  80 #include "gpu_utils.h"
  81 #include "nbnxn_cuda_data_mgmt.h"
  82 #include "pmalloc_cuda.h"
  83
  84 t_forcerec *mk_forcerec(void)
  85 {
  86     t_forcerec *fr;
  87
  88     snew(fr, 1);
  89
  90     return fr;
  91 }
  92
  93 #ifdef DEBUG
  94 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
  95 {
  96     int i, j;
  97
  98     for (i = 0; (i < atnr); i++)
  99     {
 100         for (j = 0; (j < atnr); j++)
 101         {
 102             fprintf(fp, "%2d - %2d", i, j);
 103             if (bBHAM)
 104             {
 105                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 106                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 107             }
 108             else
 109             {
 110                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 111                         C12(nbfp, atnr, i, j)/12.0);
 112             }
 113         }
 114     }
 115 }
 116 #endif
 117
 118 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 119 {
 120     real *nbfp;
 121     int   i, j, k, atnr;
 122
 123     atnr = idef->atnr;
 124     if (bBHAM)
 125     {
 126         snew(nbfp, 3*atnr*atnr);
 127         for (i = k = 0; (i < atnr); i++)
 128         {
 129             for (j = 0; (j < atnr); j++, k++)
 130             {
 131                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 132                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 133                 /* nbfp now includes the 6.0 derivative prefactor */
 134                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 135             }
 136         }
 137     }
 138     else
 139     {
 140         snew(nbfp, 2*atnr*atnr);
 141         for (i = k = 0; (i < atnr); i++)
 142         {
 143             for (j = 0; (j < atnr); j++, k++)
 144             {
 145                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 146                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 147                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 148             }
 149         }
 150     }
 151
 152     return nbfp;
 153 }
 154
 155 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
 156 {
 157     int   i, j, k, atnr;
 158     real  c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 159     real *grid;
 160
 161     /* For LJ-PME simulations, we correct the energies with the reciprocal space
 162      * inside of the cut-off. To do this the non-bonded kernels needs to have
 163      * access to the C6-values used on the reciprocal grid in pme.c
 164      */
 165
 166     atnr = idef->atnr;
 167     snew(grid, 2*atnr*atnr);
 168     for (i = k = 0; (i < atnr); i++)
 169     {
 170         for (j = 0; (j < atnr); j++, k++)
 171         {
 172             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 173             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 174             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 175             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 176             c6   = sqrt(c6i * c6j);
 177             if (fr->ljpme_combination_rule == eljpmeLB
 178                 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
 179             {
 180                 sigmai = pow(c12i / c6i, 1.0/6.0);
 181                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 182                 epsi   = c6i * c6i / c12i;
 183                 epsj   = c6j * c6j / c12j;
 184                 c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
 185             }
 186             /* Store the elements at the same relative positions as C6 in nbfp in order
 187              * to simplify access in the kernels
 188              */
 189             grid[2*(atnr*i+j)] = c6*6.0;
 190         }
 191     }
 192     return grid;
 193 }
 194
 195 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 196 {
 197     real *nbfp;
 198     int   i, j, k, atnr;
 199     real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 200     real  c6, c12;
 201
 202     atnr = idef->atnr;
 203     snew(nbfp, 2*atnr*atnr);
 204     for (i = 0; i < atnr; ++i)
 205     {
 206         for (j = 0; j < atnr; ++j)
 207         {
 208             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 209             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 210             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 211             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 212             c6   = sqrt(c6i  * c6j);
 213             c12  = sqrt(c12i * c12j);
 214             if (comb_rule == eCOMB_ARITHMETIC
 215                 && !gmx_numzero(c6) && !gmx_numzero(c12))
 216             {
 217                 sigmai = pow(c12i / c6i, 1.0/6.0);
 218                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 219                 epsi   = c6i * c6i / c12i;
 220                 epsj   = c6j * c6j / c12j;
 221                 c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
 222                 c12    = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 12);
 223             }
 224             C6(nbfp, atnr, i, j)   = c6*6.0;
 225             C12(nbfp, atnr, i, j)  = c12*12.0;
 226         }
 227     }
 228     return nbfp;
 229 }
 230
 231 /* This routine sets fr->solvent_opt to the most common solvent in the
 232  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 233  * the fr->solvent_type array with the correct type (or esolNO).
 234  *
 235  * Charge groups that fulfill the conditions but are not identical to the
 236  * most common one will be marked as esolNO in the solvent_type array.
 237  *
 238  * TIP3p is identical to SPC for these purposes, so we call it
 239  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 240  *
 241  * NOTE: QM particle should not
 242  * become an optimized solvent. Not even if there is only one charge
 243  * group in the Qm
 244  */
 245
 246 typedef struct
 247 {
 248     int    model;
 249     int    count;
 250     int    vdwtype[4];
 251     real   charge[4];
 252 } solvent_parameters_t;
 253
 254 static void
 255 check_solvent_cg(const gmx_moltype_t    *molt,
 256                  int                     cg0,
 257                  int                     nmol,
 258                  const unsigned char    *qm_grpnr,
 259                  const t_grps           *qm_grps,
 260                  t_forcerec   *          fr,
 261                  int                    *n_solvent_parameters,
 262                  solvent_parameters_t  **solvent_parameters_p,
 263                  int                     cginfo,
 264                  int                    *cg_sp)
 265 {
 266     const t_blocka       *excl;
 267     t_atom               *atom;
 268     int                   j, k;
 269     int                   j0, j1, nj;
 270     gmx_bool              perturbed;
 271     gmx_bool              has_vdw[4];
 272     gmx_bool              match;
 273     real                  tmp_charge[4]  = { 0.0 }; /* init to zero to make gcc4.8 happy */
 274     int                   tmp_vdwtype[4] = { 0 };   /* init to zero to make gcc4.8 happy */
 275     int                   tjA;
 276     gmx_bool              qm;
 277     solvent_parameters_t *solvent_parameters;
 278
 279     /* We use a list with parameters for each solvent type.
 280      * Every time we discover a new molecule that fulfills the basic
 281      * conditions for a solvent we compare with the previous entries
 282      * in these lists. If the parameters are the same we just increment
 283      * the counter for that type, and otherwise we create a new type
 284      * based on the current molecule.
 285      *
 286      * Once we've finished going through all molecules we check which
 287      * solvent is most common, and mark all those molecules while we
 288      * clear the flag on all others.
 289      */
 290
 291     solvent_parameters = *solvent_parameters_p;
 292
 293     /* Mark the cg first as non optimized */
 294     *cg_sp = -1;
 295
 296     /* Check if this cg has no exclusions with atoms in other charge groups
 297      * and all atoms inside the charge group excluded.
 298      * We only have 3 or 4 atom solvent loops.
 299      */
 300     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 301         !GET_CGINFO_EXCL_INTRA(cginfo))
 302     {
 303         return;
 304     }
 305
 306     /* Get the indices of the first atom in this charge group */
 307     j0     = molt->cgs.index[cg0];
 308     j1     = molt->cgs.index[cg0+1];
 309
 310     /* Number of atoms in our molecule */
 311     nj     = j1 - j0;
 312
 313     if (debug)
 314     {
 315         fprintf(debug,
 316                 "Moltype '%s': there are %d atoms in this charge group\n",
 317                 *molt->name, nj);
 318     }
 319
 320     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 321      * otherwise skip it.
 322      */
 323     if (nj < 3 || nj > 4)
 324     {
 325         return;
 326     }
 327
 328     /* Check if we are doing QM on this group */
 329     qm = FALSE;
 330     if (qm_grpnr != NULL)
 331     {
 332         for (j = j0; j < j1 && !qm; j++)
 333         {
 334             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 335         }
 336     }
 337     /* Cannot use solvent optimization with QM */
 338     if (qm)
 339     {
 340         return;
 341     }
 342
 343     atom = molt->atoms.atom;
 344
 345     /* Still looks like a solvent, time to check parameters */
 346
 347     /* If it is perturbed (free energy) we can't use the solvent loops,
 348      * so then we just skip to the next molecule.
 349      */
 350     perturbed = FALSE;
 351
 352     for (j = j0; j < j1 && !perturbed; j++)
 353     {
 354         perturbed = PERTURBED(atom[j]);
 355     }
 356
 357     if (perturbed)
 358     {
 359         return;
 360     }
 361
 362     /* Now it's only a question if the VdW and charge parameters
 363      * are OK. Before doing the check we compare and see if they are
 364      * identical to a possible previous solvent type.
 365      * First we assign the current types and charges.
 366      */
 367     for (j = 0; j < nj; j++)
 368     {
 369         tmp_vdwtype[j] = atom[j0+j].type;
 370         tmp_charge[j]  = atom[j0+j].q;
 371     }
 372
 373     /* Does it match any previous solvent type? */
 374     for (k = 0; k < *n_solvent_parameters; k++)
 375     {
 376         match = TRUE;
 377
 378
 379         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 380         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 381              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 382         {
 383             match = FALSE;
 384         }
 385
 386         /* Check that types & charges match for all atoms in molecule */
 387         for (j = 0; j < nj && match == TRUE; j++)
 388         {
 389             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 390             {
 391                 match = FALSE;
 392             }
 393             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 394             {
 395                 match = FALSE;
 396             }
 397         }
 398         if (match == TRUE)
 399         {
 400             /* Congratulations! We have a matched solvent.
 401              * Flag it with this type for later processing.
 402              */
 403             *cg_sp = k;
 404             solvent_parameters[k].count += nmol;
 405
 406             /* We are done with this charge group */
 407             return;
 408         }
 409     }
 410
 411     /* If we get here, we have a tentative new solvent type.
 412      * Before we add it we must check that it fulfills the requirements
 413      * of the solvent optimized loops. First determine which atoms have
 414      * VdW interactions.
 415      */
 416     for (j = 0; j < nj; j++)
 417     {
 418         has_vdw[j] = FALSE;
 419         tjA        = tmp_vdwtype[j];
 420
 421         /* Go through all other tpes and see if any have non-zero
 422          * VdW parameters when combined with this one.
 423          */
 424         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 425         {
 426             /* We already checked that the atoms weren't perturbed,
 427              * so we only need to check state A now.
 428              */
 429             if (fr->bBHAM)
 430             {
 431                 has_vdw[j] = (has_vdw[j] ||
 432                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 433                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 434                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 435             }
 436             else
 437             {
 438                 /* Standard LJ */
 439                 has_vdw[j] = (has_vdw[j] ||
 440                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 441                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 442             }
 443         }
 444     }
 445
 446     /* Now we know all we need to make the final check and assignment. */
 447     if (nj == 3)
 448     {
 449         /* So, is it an SPC?
 450          * For this we require thatn all atoms have charge,
 451          * the charges on atom 2 & 3 should be the same, and only
 452          * atom 1 might have VdW.
 453          */
 454         if (has_vdw[1] == FALSE &&
 455             has_vdw[2] == FALSE &&
 456             tmp_charge[0]  != 0 &&
 457             tmp_charge[1]  != 0 &&
 458             tmp_charge[2]  == tmp_charge[1])
 459         {
 460             srenew(solvent_parameters, *n_solvent_parameters+1);
 461             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 462             solvent_parameters[*n_solvent_parameters].count = nmol;
 463             for (k = 0; k < 3; k++)
 464             {
 465                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 466                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 467             }
 468
 469             *cg_sp = *n_solvent_parameters;
 470             (*n_solvent_parameters)++;
 471         }
 472     }
 473     else if (nj == 4)
 474     {
 475         /* Or could it be a TIP4P?
 476          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 477          * Only atom 1 mght have VdW.
 478          */
 479         if (has_vdw[1] == FALSE &&
 480             has_vdw[2] == FALSE &&
 481             has_vdw[3] == FALSE &&
 482             tmp_charge[0]  == 0 &&
 483             tmp_charge[1]  != 0 &&
 484             tmp_charge[2]  == tmp_charge[1] &&
 485             tmp_charge[3]  != 0)
 486         {
 487             srenew(solvent_parameters, *n_solvent_parameters+1);
 488             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 489             solvent_parameters[*n_solvent_parameters].count = nmol;
 490             for (k = 0; k < 4; k++)
 491             {
 492                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 493                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 494             }
 495
 496             *cg_sp = *n_solvent_parameters;
 497             (*n_solvent_parameters)++;
 498         }
 499     }
 500
 501     *solvent_parameters_p = solvent_parameters;
 502 }
 503
 504 static void
 505 check_solvent(FILE  *                fp,
 506               const gmx_mtop_t  *    mtop,
 507               t_forcerec  *          fr,
 508               cginfo_mb_t           *cginfo_mb)
 509 {
 510     const t_block     *   cgs;
 511     const t_block     *   mols;
 512     const gmx_moltype_t  *molt;
 513     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 514     int                   n_solvent_parameters;
 515     solvent_parameters_t *solvent_parameters;
 516     int                 **cg_sp;
 517     int                   bestsp, bestsol;
 518
 519     if (debug)
 520     {
 521         fprintf(debug, "Going to determine what solvent types we have.\n");
 522     }
 523
 524     mols = &mtop->mols;
 525
 526     n_solvent_parameters = 0;
 527     solvent_parameters   = NULL;
 528     /* Allocate temporary array for solvent type */
 529     snew(cg_sp, mtop->nmolblock);
 530
 531     cg_offset = 0;
 532     at_offset = 0;
 533     for (mb = 0; mb < mtop->nmolblock; mb++)
 534     {
 535         molt = &mtop->moltype[mtop->molblock[mb].type];
 536         cgs  = &molt->cgs;
 537         /* Here we have to loop over all individual molecules
 538          * because we need to check for QMMM particles.
 539          */
 540         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 541         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 542         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 543         for (mol = 0; mol < nmol_ch; mol++)
 544         {
 545             cgm = mol*cgs->nr;
 546             am  = mol*cgs->index[cgs->nr];
 547             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 548             {
 549                 check_solvent_cg(molt, cg_mol, nmol,
 550                                  mtop->groups.grpnr[egcQMMM] ?
 551                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 552                                  &mtop->groups.grps[egcQMMM],
 553                                  fr,
 554                                  &n_solvent_parameters, &solvent_parameters,
 555                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 556                                  &cg_sp[mb][cgm+cg_mol]);
 557             }
 558         }
 559         cg_offset += cgs->nr;
 560         at_offset += cgs->index[cgs->nr];
 561     }
 562
 563     /* Puh! We finished going through all charge groups.
 564      * Now find the most common solvent model.
 565      */
 566
 567     /* Most common solvent this far */
 568     bestsp = -2;
 569     for (i = 0; i < n_solvent_parameters; i++)
 570     {
 571         if (bestsp == -2 ||
 572             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 573         {
 574             bestsp = i;
 575         }
 576     }
 577
 578     if (bestsp >= 0)
 579     {
 580         bestsol = solvent_parameters[bestsp].model;
 581     }
 582     else
 583     {
 584         bestsol = esolNO;
 585     }
 586
 587 #ifdef DISABLE_WATER_NLIST
 588     bestsol = esolNO;
 589 #endif
 590
 591     fr->nWatMol = 0;
 592     for (mb = 0; mb < mtop->nmolblock; mb++)
 593     {
 594         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 595         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 596         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 597         {
 598             if (cg_sp[mb][i] == bestsp)
 599             {
 600                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 601                 fr->nWatMol += nmol;
 602             }
 603             else
 604             {
 605                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 606             }
 607         }
 608         sfree(cg_sp[mb]);
 609     }
 610     sfree(cg_sp);
 611
 612     if (bestsol != esolNO && fp != NULL)
 613     {
 614         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 615                 esol_names[bestsol],
 616                 solvent_parameters[bestsp].count);
 617     }
 618
 619     sfree(solvent_parameters);
 620     fr->solvent_opt = bestsol;
 621 }
 622
 623 enum {
 624     acNONE = 0, acCONSTRAINT, acSETTLE
 625 };
 626
 627 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 628                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 629                                    gmx_bool *bFEP_NonBonded,
 630                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 631 {
 632     const t_block        *cgs;
 633     const t_blocka       *excl;
 634     const gmx_moltype_t  *molt;
 635     const gmx_molblock_t *molb;
 636     cginfo_mb_t          *cginfo_mb;
 637     gmx_bool             *type_VDW;
 638     int                  *cginfo;
 639     int                   cg_offset, a_offset, cgm, am;
 640     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 641     int                  *a_con;
 642     int                   ftype;
 643     int                   ia;
 644     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
 645
 646     ncg_tot = ncg_mtop(mtop);
 647     snew(cginfo_mb, mtop->nmolblock);
 648
 649     snew(type_VDW, fr->ntype);
 650     for (ai = 0; ai < fr->ntype; ai++)
 651     {
 652         type_VDW[ai] = FALSE;
 653         for (j = 0; j < fr->ntype; j++)
 654         {
 655             type_VDW[ai] = type_VDW[ai] ||
 656                 fr->bBHAM ||
 657                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 658                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 659         }
 660     }
 661
 662     *bFEP_NonBonded               = FALSE;
 663     *bExcl_IntraCGAll_InterCGNone = TRUE;
 664
 665     excl_nalloc = 10;
 666     snew(bExcl, excl_nalloc);
 667     cg_offset = 0;
 668     a_offset  = 0;
 669     for (mb = 0; mb < mtop->nmolblock; mb++)
 670     {
 671         molb = &mtop->molblock[mb];
 672         molt = &mtop->moltype[molb->type];
 673         cgs  = &molt->cgs;
 674         excl = &molt->excls;
 675
 676         /* Check if the cginfo is identical for all molecules in this block.
 677          * If so, we only need an array of the size of one molecule.
 678          * Otherwise we make an array of #mol times #cgs per molecule.
 679          */
 680         bId = TRUE;
 681         am  = 0;
 682         for (m = 0; m < molb->nmol; m++)
 683         {
 684             am = m*cgs->index[cgs->nr];
 685             for (cg = 0; cg < cgs->nr; cg++)
 686             {
 687                 a0 = cgs->index[cg];
 688                 a1 = cgs->index[cg+1];
 689                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 690                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 691                 {
 692                     bId = FALSE;
 693                 }
 694                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 695                 {
 696                     for (ai = a0; ai < a1; ai++)
 697                     {
 698                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 699                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 700                         {
 701                             bId = FALSE;
 702                         }
 703                     }
 704                 }
 705             }
 706         }
 707
 708         cginfo_mb[mb].cg_start = cg_offset;
 709         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 710         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 711         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 712         cginfo = cginfo_mb[mb].cginfo;
 713
 714         /* Set constraints flags for constrained atoms */
 715         snew(a_con, molt->atoms.nr);
 716         for (ftype = 0; ftype < F_NRE; ftype++)
 717         {
 718             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 719             {
 720                 int nral;
 721
 722                 nral = NRAL(ftype);
 723                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 724                 {
 725                     int a;
 726
 727                     for (a = 0; a < nral; a++)
 728                     {
 729                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 730                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 731                     }
 732                 }
 733             }
 734         }
 735
 736         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 737         {
 738             cgm = m*cgs->nr;
 739             am  = m*cgs->index[cgs->nr];
 740             for (cg = 0; cg < cgs->nr; cg++)
 741             {
 742                 a0 = cgs->index[cg];
 743                 a1 = cgs->index[cg+1];
 744
 745                 /* Store the energy group in cginfo */
 746                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 747                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 748
 749                 /* Check the intra/inter charge group exclusions */
 750                 if (a1-a0 > excl_nalloc)
 751                 {
 752                     excl_nalloc = a1 - a0;
 753                     srenew(bExcl, excl_nalloc);
 754                 }
 755                 /* bExclIntraAll: all intra cg interactions excluded
 756                  * bExclInter:    any inter cg interactions excluded
 757                  */
 758                 bExclIntraAll       = TRUE;
 759                 bExclInter          = FALSE;
 760                 bHaveVDW            = FALSE;
 761                 bHaveQ              = FALSE;
 762                 bHavePerturbedAtoms = FALSE;
 763                 for (ai = a0; ai < a1; ai++)
 764                 {
 765                     /* Check VDW and electrostatic interactions */
 766                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 767                                             type_VDW[molt->atoms.atom[ai].typeB]);
 768                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 769                                             molt->atoms.atom[ai].qB != 0);
 770
 771                     bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
 772
 773                     /* Clear the exclusion list for atom ai */
 774                     for (aj = a0; aj < a1; aj++)
 775                     {
 776                         bExcl[aj-a0] = FALSE;
 777                     }
 778                     /* Loop over all the exclusions of atom ai */
 779                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 780                     {
 781                         aj = excl->a[j];
 782                         if (aj < a0 || aj >= a1)
 783                         {
 784                             bExclInter = TRUE;
 785                         }
 786                         else
 787                         {
 788                             bExcl[aj-a0] = TRUE;
 789                         }
 790                     }
 791                     /* Check if ai excludes a0 to a1 */
 792                     for (aj = a0; aj < a1; aj++)
 793                     {
 794                         if (!bExcl[aj-a0])
 795                         {
 796                             bExclIntraAll = FALSE;
 797                         }
 798                     }
 799
 800                     switch (a_con[ai])
 801                     {
 802                         case acCONSTRAINT:
 803                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 804                             break;
 805                         case acSETTLE:
 806                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 807                             break;
 808                         default:
 809                             break;
 810                     }
 811                 }
 812                 if (bExclIntraAll)
 813                 {
 814                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 815                 }
 816                 if (bExclInter)
 817                 {
 818                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 819                 }
 820                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 821                 {
 822                     /* The size in cginfo is currently only read with DD */
 823                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 824                 }
 825                 if (bHaveVDW)
 826                 {
 827                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 828                 }
 829                 if (bHaveQ)
 830                 {
 831                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 832                 }
 833                 if (bHavePerturbedAtoms && fr->efep != efepNO)
 834                 {
 835                     SET_CGINFO_FEP(cginfo[cgm+cg]);
 836                     *bFEP_NonBonded = TRUE;
 837                 }
 838                 /* Store the charge group size */
 839                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 840
 841                 if (!bExclIntraAll || bExclInter)
 842                 {
 843                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 844                 }
 845             }
 846         }
 847
 848         sfree(a_con);
 849
 850         cg_offset += molb->nmol*cgs->nr;
 851         a_offset  += molb->nmol*cgs->index[cgs->nr];
 852     }
 853     sfree(bExcl);
 854
 855     /* the solvent optimizer is called after the QM is initialized,
 856      * because we don't want to have the QM subsystemto become an
 857      * optimized solvent
 858      */
 859
 860     check_solvent(fplog, mtop, fr, cginfo_mb);
 861
 862     if (getenv("GMX_NO_SOLV_OPT"))
 863     {
 864         if (fplog)
 865         {
 866             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 867                     "Disabling all solvent optimization\n");
 868         }
 869         fr->solvent_opt = esolNO;
 870     }
 871     if (bNoSolvOpt)
 872     {
 873         fr->solvent_opt = esolNO;
 874     }
 875     if (!fr->solvent_opt)
 876     {
 877         for (mb = 0; mb < mtop->nmolblock; mb++)
 878         {
 879             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 880             {
 881                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 882             }
 883         }
 884     }
 885
 886     return cginfo_mb;
 887 }
 888
 889 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 890 {
 891     int  ncg, mb, cg;
 892     int *cginfo;
 893
 894     ncg = cgi_mb[nmb-1].cg_end;
 895     snew(cginfo, ncg);
 896     mb = 0;
 897     for (cg = 0; cg < ncg; cg++)
 898     {
 899         while (cg >= cgi_mb[mb].cg_end)
 900         {
 901             mb++;
 902         }
 903         cginfo[cg] =
 904             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 905     }
 906
 907     return cginfo;
 908 }
 909
 910 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 911 {
 912     /*This now calculates sum for q and c6*/
 913     double         qsum, q2sum, q, c6sum, c6;
 914     int            mb, nmol, i;
 915     const t_atoms *atoms;
 916
 917     qsum   = 0;
 918     q2sum  = 0;
 919     c6sum  = 0;
 920     for (mb = 0; mb < mtop->nmolblock; mb++)
 921     {
 922         nmol  = mtop->molblock[mb].nmol;
 923         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 924         for (i = 0; i < atoms->nr; i++)
 925         {
 926             q       = atoms->atom[i].q;
 927             qsum   += nmol*q;
 928             q2sum  += nmol*q*q;
 929             c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 930             c6sum  += nmol*c6;
 931         }
 932     }
 933     fr->qsum[0]   = qsum;
 934     fr->q2sum[0]  = q2sum;
 935     fr->c6sum[0]  = c6sum;
 936
 937     if (fr->efep != efepNO)
 938     {
 939         qsum   = 0;
 940         q2sum  = 0;
 941         c6sum  = 0;
 942         for (mb = 0; mb < mtop->nmolblock; mb++)
 943         {
 944             nmol  = mtop->molblock[mb].nmol;
 945             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 946             for (i = 0; i < atoms->nr; i++)
 947             {
 948                 q       = atoms->atom[i].qB;
 949                 qsum   += nmol*q;
 950                 q2sum  += nmol*q*q;
 951                 c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 952                 c6sum  += nmol*c6;
 953             }
 954             fr->qsum[1]   = qsum;
 955             fr->q2sum[1]  = q2sum;
 956             fr->c6sum[1]  = c6sum;
 957         }
 958     }
 959     else
 960     {
 961         fr->qsum[1]   = fr->qsum[0];
 962         fr->q2sum[1]  = fr->q2sum[0];
 963         fr->c6sum[1]  = fr->c6sum[0];
 964     }
 965     if (log)
 966     {
 967         if (fr->efep == efepNO)
 968         {
 969             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 970         }
 971         else
 972         {
 973             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 974                     fr->qsum[0], fr->qsum[1]);
 975         }
 976     }
 977 }
 978
 979 void update_forcerec(t_forcerec *fr, matrix box)
 980 {
 981     if (fr->eeltype == eelGRF)
 982     {
 983         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 984                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 985                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 986     }
 987 }
 988
 989 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 990 {
 991     const t_atoms  *atoms, *atoms_tpi;
 992     const t_blocka *excl;
 993     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 994     gmx_int64_t     npair, npair_ij, tmpi, tmpj;
 995     double          csix, ctwelve;
 996     int             ntp, *typecount;
 997     gmx_bool        bBHAM;
 998     real           *nbfp;
 999     real           *nbfp_comb = NULL;
1000
1001     ntp   = fr->ntype;
1002     bBHAM = fr->bBHAM;
1003     nbfp  = fr->nbfp;
1004
1005     /* For LJ-PME, we want to correct for the difference between the
1006      * actual C6 values and the C6 values used by the LJ-PME based on
1007      * combination rules. */
1008
1009     if (EVDW_PME(fr->vdwtype))
1010     {
1011         nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1012                                              (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1013         for (tpi = 0; tpi < ntp; ++tpi)
1014         {
1015             for (tpj = 0; tpj < ntp; ++tpj)
1016             {
1017                 C6(nbfp_comb, ntp, tpi, tpj) =
1018                     C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1019                 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1020             }
1021         }
1022         nbfp = nbfp_comb;
1023     }
1024     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1025     {
1026         csix    = 0;
1027         ctwelve = 0;
1028         npair   = 0;
1029         nexcl   = 0;
1030         if (!fr->n_tpi)
1031         {
1032             /* Count the types so we avoid natoms^2 operations */
1033             snew(typecount, ntp);
1034             gmx_mtop_count_atomtypes(mtop, q, typecount);
1035
1036             for (tpi = 0; tpi < ntp; tpi++)
1037             {
1038                 for (tpj = tpi; tpj < ntp; tpj++)
1039                 {
1040                     tmpi = typecount[tpi];
1041                     tmpj = typecount[tpj];
1042                     if (tpi != tpj)
1043                     {
1044                         npair_ij = tmpi*tmpj;
1045                     }
1046                     else
1047                     {
1048                         npair_ij = tmpi*(tmpi - 1)/2;
1049                     }
1050                     if (bBHAM)
1051                     {
1052                         /* nbfp now includes the 6.0 derivative prefactor */
1053                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1054                     }
1055                     else
1056                     {
1057                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
1058                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
1059                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
1060                     }
1061                     npair += npair_ij;
1062                 }
1063             }
1064             sfree(typecount);
1065             /* Subtract the excluded pairs.
1066              * The main reason for substracting exclusions is that in some cases
1067              * some combinations might never occur and the parameters could have
1068              * any value. These unused values should not influence the dispersion
1069              * correction.
1070              */
1071             for (mb = 0; mb < mtop->nmolblock; mb++)
1072             {
1073                 nmol  = mtop->molblock[mb].nmol;
1074                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1075                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
1076                 for (i = 0; (i < atoms->nr); i++)
1077                 {
1078                     if (q == 0)
1079                     {
1080                         tpi = atoms->atom[i].type;
1081                     }
1082                     else
1083                     {
1084                         tpi = atoms->atom[i].typeB;
1085                     }
1086                     j1  = excl->index[i];
1087                     j2  = excl->index[i+1];
1088                     for (j = j1; j < j2; j++)
1089                     {
1090                         k = excl->a[j];
1091                         if (k > i)
1092                         {
1093                             if (q == 0)
1094                             {
1095                                 tpj = atoms->atom[k].type;
1096                             }
1097                             else
1098                             {
1099                                 tpj = atoms->atom[k].typeB;
1100                             }
1101                             if (bBHAM)
1102                             {
1103                                 /* nbfp now includes the 6.0 derivative prefactor */
1104                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1105                             }
1106                             else
1107                             {
1108                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1109                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1110                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1111                             }
1112                             nexcl += nmol;
1113                         }
1114                     }
1115                 }
1116             }
1117         }
1118         else
1119         {
1120             /* Only correct for the interaction of the test particle
1121              * with the rest of the system.
1122              */
1123             atoms_tpi =
1124                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1125
1126             npair = 0;
1127             for (mb = 0; mb < mtop->nmolblock; mb++)
1128             {
1129                 nmol  = mtop->molblock[mb].nmol;
1130                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1131                 for (j = 0; j < atoms->nr; j++)
1132                 {
1133                     nmolc = nmol;
1134                     /* Remove the interaction of the test charge group
1135                      * with itself.
1136                      */
1137                     if (mb == mtop->nmolblock-1)
1138                     {
1139                         nmolc--;
1140
1141                         if (mb == 0 && nmol == 1)
1142                         {
1143                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1144                         }
1145                     }
1146                     if (q == 0)
1147                     {
1148                         tpj = atoms->atom[j].type;
1149                     }
1150                     else
1151                     {
1152                         tpj = atoms->atom[j].typeB;
1153                     }
1154                     for (i = 0; i < fr->n_tpi; i++)
1155                     {
1156                         if (q == 0)
1157                         {
1158                             tpi = atoms_tpi->atom[i].type;
1159                         }
1160                         else
1161                         {
1162                             tpi = atoms_tpi->atom[i].typeB;
1163                         }
1164                         if (bBHAM)
1165                         {
1166                             /* nbfp now includes the 6.0 derivative prefactor */
1167                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1168                         }
1169                         else
1170                         {
1171                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1172                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1173                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1174                         }
1175                         npair += nmolc;
1176                     }
1177                 }
1178             }
1179         }
1180         if (npair - nexcl <= 0 && fplog)
1181         {
1182             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1183             csix     = 0;
1184             ctwelve  = 0;
1185         }
1186         else
1187         {
1188             csix    /= npair - nexcl;
1189             ctwelve /= npair - nexcl;
1190         }
1191         if (debug)
1192         {
1193             fprintf(debug, "Counted %d exclusions\n", nexcl);
1194             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1195             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1196         }
1197         fr->avcsix[q]    = csix;
1198         fr->avctwelve[q] = ctwelve;
1199     }
1200
1201     if (EVDW_PME(fr->vdwtype))
1202     {
1203         sfree(nbfp_comb);
1204     }
1205
1206     if (fplog != NULL)
1207     {
1208         if (fr->eDispCorr == edispcAllEner ||
1209             fr->eDispCorr == edispcAllEnerPres)
1210         {
1211             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1212                     fr->avcsix[0], fr->avctwelve[0]);
1213         }
1214         else
1215         {
1216             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1217         }
1218     }
1219 }
1220
1221
1222 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1223                            const gmx_mtop_t *mtop)
1224 {
1225     const t_atoms *at1, *at2;
1226     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1227     real           b, bmin;
1228     real          *nbfp;
1229
1230     if (fplog)
1231     {
1232         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1233     }
1234     nbfp   = fr->nbfp;
1235     ntypes = fr->ntype;
1236
1237     bmin           = -1;
1238     fr->bham_b_max = 0;
1239     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1240     {
1241         at1 = &mtop->moltype[mt1].atoms;
1242         for (i = 0; (i < at1->nr); i++)
1243         {
1244             tpi = at1->atom[i].type;
1245             if (tpi >= ntypes)
1246             {
1247                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1248             }
1249
1250             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1251             {
1252                 at2 = &mtop->moltype[mt2].atoms;
1253                 for (j = 0; (j < at2->nr); j++)
1254                 {
1255                     tpj = at2->atom[j].type;
1256                     if (tpj >= ntypes)
1257                     {
1258                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1259                     }
1260                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1261                     if (b > fr->bham_b_max)
1262                     {
1263                         fr->bham_b_max = b;
1264                     }
1265                     if ((b < bmin) || (bmin == -1))
1266                     {
1267                         bmin = b;
1268                     }
1269                 }
1270             }
1271         }
1272     }
1273     if (fplog)
1274     {
1275         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1276                 bmin, fr->bham_b_max);
1277     }
1278 }
1279
1280 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1281                             t_forcerec *fr, real rtab,
1282                             const t_commrec *cr,
1283                             const char *tabfn, char *eg1, char *eg2,
1284                             t_nblists *nbl)
1285 {
1286     char buf[STRLEN];
1287     int  i, j;
1288
1289     if (tabfn == NULL)
1290     {
1291         if (debug)
1292         {
1293             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1294         }
1295         return;
1296     }
1297
1298     sprintf(buf, "%s", tabfn);
1299     if (eg1 && eg2)
1300     {
1301         /* Append the two energy group names */
1302         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1303                 eg1, eg2, ftp2ext(efXVG));
1304     }
1305     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1306     /* Copy the contents of the table to separate coulomb and LJ tables too,
1307      * to improve cache performance.
1308      */
1309     /* For performance reasons we want
1310      * the table data to be aligned to 16-byte. The pointers could be freed
1311      * but currently aren't.
1312      */
1313     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1314     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1315     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1316     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1317     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1318     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1319     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1320     nbl->table_elec.ninteractions = 1;
1321     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1322     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1323
1324     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1325     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1326     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1327     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1328     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1329     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1330     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1331     nbl->table_vdw.ninteractions = 2;
1332     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1333     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1334
1335     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1336     {
1337         for (j = 0; j < 4; j++)
1338         {
1339             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1340         }
1341         for (j = 0; j < 8; j++)
1342         {
1343             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1344         }
1345     }
1346 }
1347
1348 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1349                          int *ncount, int **count)
1350 {
1351     const gmx_moltype_t *molt;
1352     const t_ilist       *il;
1353     int                  mt, ftype, stride, i, j, tabnr;
1354
1355     for (mt = 0; mt < mtop->nmoltype; mt++)
1356     {
1357         molt = &mtop->moltype[mt];
1358         for (ftype = 0; ftype < F_NRE; ftype++)
1359         {
1360             if (ftype == ftype1 || ftype == ftype2)
1361             {
1362                 il     = &molt->ilist[ftype];
1363                 stride = 1 + NRAL(ftype);
1364                 for (i = 0; i < il->nr; i += stride)
1365                 {
1366                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1367                     if (tabnr < 0)
1368                     {
1369                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1370                     }
1371                     if (tabnr >= *ncount)
1372                     {
1373                         srenew(*count, tabnr+1);
1374                         for (j = *ncount; j < tabnr+1; j++)
1375                         {
1376                             (*count)[j] = 0;
1377                         }
1378                         *ncount = tabnr+1;
1379                     }
1380                     (*count)[tabnr]++;
1381                 }
1382             }
1383         }
1384     }
1385 }
1386
1387 static bondedtable_t *make_bonded_tables(FILE *fplog,
1388                                          int ftype1, int ftype2,
1389                                          const gmx_mtop_t *mtop,
1390                                          const char *basefn, const char *tabext)
1391 {
1392     int            i, ncount, *count;
1393     char           tabfn[STRLEN];
1394     bondedtable_t *tab;
1395
1396     tab = NULL;
1397
1398     ncount = 0;
1399     count  = NULL;
1400     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1401
1402     if (ncount > 0)
1403     {
1404         snew(tab, ncount);
1405         for (i = 0; i < ncount; i++)
1406         {
1407             if (count[i] > 0)
1408             {
1409                 sprintf(tabfn, "%s", basefn);
1410                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1411                         tabext, i, ftp2ext(efXVG));
1412                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1413             }
1414         }
1415         sfree(count);
1416     }
1417
1418     return tab;
1419 }
1420
1421 void forcerec_set_ranges(t_forcerec *fr,
1422                          int ncg_home, int ncg_force,
1423                          int natoms_force,
1424                          int natoms_force_constr, int natoms_f_novirsum)
1425 {
1426     fr->cg0 = 0;
1427     fr->hcg = ncg_home;
1428
1429     /* fr->ncg_force is unused in the standard code,
1430      * but it can be useful for modified code dealing with charge groups.
1431      */
1432     fr->ncg_force           = ncg_force;
1433     fr->natoms_force        = natoms_force;
1434     fr->natoms_force_constr = natoms_force_constr;
1435
1436     if (fr->natoms_force_constr > fr->nalloc_force)
1437     {
1438         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1439
1440         if (fr->bTwinRange)
1441         {
1442             srenew(fr->f_twin, fr->nalloc_force);
1443         }
1444     }
1445
1446     if (fr->bF_NoVirSum)
1447     {
1448         fr->f_novirsum_n = natoms_f_novirsum;
1449         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1450         {
1451             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1452             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1453         }
1454     }
1455     else
1456     {
1457         fr->f_novirsum_n = 0;
1458     }
1459 }
1460
1461 static real cutoff_inf(real cutoff)
1462 {
1463     if (cutoff == 0)
1464     {
1465         cutoff = GMX_CUTOFF_INF;
1466     }
1467
1468     return cutoff;
1469 }
1470
1471 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1472                                   t_forcerec *fr, const t_inputrec *ir,
1473                                   const char *tabfn, const gmx_mtop_t *mtop,
1474                                   matrix     box)
1475 {
1476     char buf[STRLEN];
1477     int  i, j;
1478
1479     if (tabfn == NULL)
1480     {
1481         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1482         return;
1483     }
1484
1485     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1486
1487     sprintf(buf, "%s", tabfn);
1488     for (i = 0; i < ir->adress->n_tf_grps; i++)
1489     {
1490         j = ir->adress->tf_table_index[i]; /* get energy group index */
1491         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1492                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1493         if (fp)
1494         {
1495             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1496         }
1497         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1498     }
1499
1500 }
1501
1502 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1503 {
1504     gmx_bool bAllvsAll;
1505
1506     bAllvsAll =
1507         (
1508             ir->rlist == 0            &&
1509             ir->rcoulomb == 0         &&
1510             ir->rvdw == 0             &&
1511             ir->ePBC == epbcNONE      &&
1512             ir->vdwtype == evdwCUT    &&
1513             ir->coulombtype == eelCUT &&
1514             ir->efep == efepNO        &&
1515             (ir->implicit_solvent == eisNO ||
1516              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1517                                                   ir->gb_algorithm == egbHCT   ||
1518                                                   ir->gb_algorithm == egbOBC))) &&
1519             getenv("GMX_NO_ALLVSALL") == NULL
1520         );
1521
1522     if (bAllvsAll && ir->opts.ngener > 1)
1523     {
1524         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1525
1526         if (bPrintNote)
1527         {
1528             if (MASTER(cr))
1529             {
1530                 fprintf(stderr, "\n%s\n", note);
1531             }
1532             if (fp != NULL)
1533             {
1534                 fprintf(fp, "\n%s\n", note);
1535             }
1536         }
1537         bAllvsAll = FALSE;
1538     }
1539
1540     if (bAllvsAll && fp && MASTER(cr))
1541     {
1542         fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1543     }
1544
1545     return bAllvsAll;
1546 }
1547
1548
1549 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1550 {
1551     int t, i;
1552
1553     /* These thread local data structures are used for bondeds only */
1554     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1555
1556     if (fr->nthreads > 1)
1557     {
1558         snew(fr->f_t, fr->nthreads);
1559         /* Thread 0 uses the global force and energy arrays */
1560         for (t = 1; t < fr->nthreads; t++)
1561         {
1562             fr->f_t[t].f        = NULL;
1563             fr->f_t[t].f_nalloc = 0;
1564             snew(fr->f_t[t].fshift, SHIFTS);
1565             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1566             for (i = 0; i < egNR; i++)
1567             {
1568                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1569             }
1570         }
1571     }
1572 }
1573
1574
1575 gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
1576                                       const t_commrec  *cr,
1577                                       const t_inputrec *ir,
1578                                       gmx_bool          bGPU)
1579 {
1580     if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1581     {
1582         md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1583                       bGPU ? "GPUs" : "SIMD kernels",
1584                       bGPU ? "CPU only" : "plain-C kernels");
1585         return FALSE;
1586     }
1587
1588     return TRUE;
1589 }
1590
1591
1592 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1593                                   int                         *kernel_type,
1594                                   int                         *ewald_excl)
1595 {
1596     *kernel_type = nbnxnk4x4_PlainC;
1597     *ewald_excl  = ewaldexclTable;
1598
1599 #ifdef GMX_NBNXN_SIMD
1600     {
1601 #ifdef GMX_NBNXN_SIMD_4XN
1602         *kernel_type = nbnxnk4xN_SIMD_4xN;
1603 #endif
1604 #ifdef GMX_NBNXN_SIMD_2XNN
1605         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1606 #endif
1607
1608 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1609         /* We need to choose if we want 2x(N+N) or 4xN kernels.
1610          * Currently this is based on the SIMD acceleration choice,
1611          * but it might be better to decide this at runtime based on CPU.
1612          *
1613          * 4xN calculates more (zero) interactions, but has less pair-search
1614          * work and much better kernel instruction scheduling.
1615          *
1616          * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1617          * which doesn't have FMA, both the analytical and tabulated Ewald
1618          * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1619          * 2x(4+4) because it results in significantly fewer pairs.
1620          * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1621          * 10% with HT, 50% without HT. As we currently don't detect the actual
1622          * use of HT, use 4x8 to avoid a potential performance hit.
1623          * On Intel Haswell 4x8 is always faster.
1624          */
1625         *kernel_type = nbnxnk4xN_SIMD_4xN;
1626
1627 #ifndef GMX_SIMD_HAVE_FMA
1628         if (EEL_PME_EWALD(ir->coulombtype) ||
1629             EVDW_PME(ir->vdwtype))
1630         {
1631             /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1632              * There are enough instructions to make 2x(4+4) efficient.
1633              */
1634             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1635         }
1636 #endif
1637 #endif  /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1638
1639
1640         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1641         {
1642 #ifdef GMX_NBNXN_SIMD_4XN
1643             *kernel_type = nbnxnk4xN_SIMD_4xN;
1644 #else
1645             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1646 #endif
1647         }
1648         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1649         {
1650 #ifdef GMX_NBNXN_SIMD_2XNN
1651             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1652 #else
1653             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1654 #endif
1655         }
1656
1657         /* Analytical Ewald exclusion correction is only an option in
1658          * the SIMD kernel.
1659          * Since table lookup's don't parallelize with SIMD, analytical
1660          * will probably always be faster for a SIMD width of 8 or more.
1661          * With FMA analytical is sometimes faster for a width if 4 as well.
1662          * On BlueGene/Q, this is faster regardless of precision.
1663          * In single precision, this is faster on Bulldozer.
1664          */
1665 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1666         (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1667         defined GMX_SIMD_IBM_QPX
1668         *ewald_excl = ewaldexclAnalytical;
1669 #endif
1670         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1671         {
1672             *ewald_excl = ewaldexclTable;
1673         }
1674         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1675         {
1676             *ewald_excl = ewaldexclAnalytical;
1677         }
1678
1679     }
1680 #endif /* GMX_NBNXN_SIMD */
1681 }
1682
1683
1684 const char *lookup_nbnxn_kernel_name(int kernel_type)
1685 {
1686     const char *returnvalue = NULL;
1687     switch (kernel_type)
1688     {
1689         case nbnxnkNotSet:
1690             returnvalue = "not set";
1691             break;
1692         case nbnxnk4x4_PlainC:
1693             returnvalue = "plain C";
1694             break;
1695         case nbnxnk4xN_SIMD_4xN:
1696         case nbnxnk4xN_SIMD_2xNN:
1697 #ifdef GMX_NBNXN_SIMD
1698 #if defined GMX_SIMD_X86_SSE2
1699             returnvalue = "SSE2";
1700 #elif defined GMX_SIMD_X86_SSE4_1
1701             returnvalue = "SSE4.1";
1702 #elif defined GMX_SIMD_X86_AVX_128_FMA
1703             returnvalue = "AVX_128_FMA";
1704 #elif defined GMX_SIMD_X86_AVX_256
1705             returnvalue = "AVX_256";
1706 #elif defined GMX_SIMD_X86_AVX2_256
1707             returnvalue = "AVX2_256";
1708 #else
1709             returnvalue = "SIMD";
1710 #endif
1711 #else  /* GMX_NBNXN_SIMD */
1712             returnvalue = "not available";
1713 #endif /* GMX_NBNXN_SIMD */
1714             break;
1715         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1716         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1717
1718         case nbnxnkNR:
1719         default:
1720             gmx_fatal(FARGS, "Illegal kernel type selected");
1721             returnvalue = NULL;
1722             break;
1723     }
1724     return returnvalue;
1725 };
1726
1727 static void pick_nbnxn_kernel(FILE                *fp,
1728                               const t_commrec     *cr,
1729                               gmx_bool             use_simd_kernels,
1730                               gmx_bool             bUseGPU,
1731                               gmx_bool             bEmulateGPU,
1732                               const t_inputrec    *ir,
1733                               int                 *kernel_type,
1734                               int                 *ewald_excl,
1735                               gmx_bool             bDoNonbonded)
1736 {
1737     assert(kernel_type);
1738
1739     *kernel_type = nbnxnkNotSet;
1740     *ewald_excl  = ewaldexclTable;
1741
1742     if (bEmulateGPU)
1743     {
1744         *kernel_type = nbnxnk8x8x8_PlainC;
1745
1746         if (bDoNonbonded)
1747         {
1748             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1749         }
1750     }
1751     else if (bUseGPU)
1752     {
1753         *kernel_type = nbnxnk8x8x8_CUDA;
1754     }
1755
1756     if (*kernel_type == nbnxnkNotSet)
1757     {
1758         /* LJ PME with LB combination rule does 7 mesh operations.
1759          * This so slow that we don't compile SIMD non-bonded kernels for that.
1760          */
1761         if (use_simd_kernels &&
1762             nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1763         {
1764             pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1765         }
1766         else
1767         {
1768             *kernel_type = nbnxnk4x4_PlainC;
1769         }
1770     }
1771
1772     if (bDoNonbonded && fp != NULL)
1773     {
1774         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1775                 lookup_nbnxn_kernel_name(*kernel_type),
1776                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1777                 nbnxn_kernel_to_cj_size(*kernel_type));
1778
1779         if (nbnxnk4x4_PlainC == *kernel_type ||
1780             nbnxnk8x8x8_PlainC == *kernel_type)
1781         {
1782             md_print_warn(cr, fp,
1783                           "WARNING: Using the slow %s kernels. This should\n"
1784                           "not happen during routine usage on supported platforms.\n\n",
1785                           lookup_nbnxn_kernel_name(*kernel_type));
1786         }
1787     }
1788 }
1789
1790 static void pick_nbnxn_resources(const t_commrec     *cr,
1791                                  const gmx_hw_info_t *hwinfo,
1792                                  gmx_bool             bDoNonbonded,
1793                                  gmx_bool            *bUseGPU,
1794                                  gmx_bool            *bEmulateGPU,
1795                                  const gmx_gpu_opt_t *gpu_opt)
1796 {
1797     gmx_bool bEmulateGPUEnvVarSet;
1798     char     gpu_err_str[STRLEN];
1799
1800     *bUseGPU = FALSE;
1801
1802     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1803
1804     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1805      * GPUs (currently) only handle non-bonded calculations, we will
1806      * automatically switch to emulation if non-bonded calculations are
1807      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1808      * way to turn off GPU initialization, data movement, and cleanup.
1809      *
1810      * GPU emulation can be useful to assess the performance one can expect by
1811      * adding GPU(s) to the machine. The conditional below allows this even
1812      * if mdrun is compiled without GPU acceleration support.
1813      * Note that you should freezing the system as otherwise it will explode.
1814      */
1815     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1816                     (!bDoNonbonded &&
1817                      gpu_opt->ncuda_dev_use > 0));
1818
1819     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1820      */
1821     if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1822     {
1823         /* Each PP node will use the intra-node id-th device from the
1824          * list of detected/selected GPUs. */
1825         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1826                       &hwinfo->gpu_info, gpu_opt))
1827         {
1828             /* At this point the init should never fail as we made sure that
1829              * we have all the GPUs we need. If it still does, we'll bail. */
1830             gmx_fatal(FARGS, "On rank %d failed to initialize GPU #%d: %s",
1831                       cr->nodeid,
1832                       get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1833                                         cr->rank_pp_intranode),
1834                       gpu_err_str);
1835         }
1836
1837         /* Here we actually turn on hardware GPU acceleration */
1838         *bUseGPU = TRUE;
1839     }
1840 }
1841
1842 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1843                             nonbonded_verlet_t *nbv,
1844                             int                 group)
1845 {
1846     gmx_bool bUsesSimpleTables = TRUE;
1847     int      grp_index;
1848
1849     switch (cutoff_scheme)
1850     {
1851         case ecutsGROUP:
1852             bUsesSimpleTables = TRUE;
1853             break;
1854         case ecutsVERLET:
1855             assert(NULL != nbv && NULL != nbv->grp);
1856             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1857             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1858             break;
1859         default:
1860             gmx_incons("unimplemented");
1861     }
1862     return bUsesSimpleTables;
1863 }
1864
1865 static void init_ewald_f_table(interaction_const_t *ic,
1866                                gmx_bool             bUsesSimpleTables,
1867                                real                 rtab)
1868 {
1869     real maxr;
1870
1871     if (bUsesSimpleTables)
1872     {
1873         /* With a spacing of 0.0005 we are at the force summation accuracy
1874          * for the SSE kernels for "normal" atomistic simulations.
1875          */
1876         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1877                                                    ic->rcoulomb);
1878
1879         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1880         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1881     }
1882     else
1883     {
1884         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1885         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1886         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1887     }
1888
1889     sfree_aligned(ic->tabq_coul_FDV0);
1890     sfree_aligned(ic->tabq_coul_F);
1891     sfree_aligned(ic->tabq_coul_V);
1892
1893     sfree_aligned(ic->tabq_vdw_FDV0);
1894     sfree_aligned(ic->tabq_vdw_F);
1895     sfree_aligned(ic->tabq_vdw_V);
1896
1897     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1898     {
1899         /* Create the original table data in FDV0 */
1900         snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1901         snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1902         snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1903         table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1904                                     ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1905     }
1906
1907     if (EVDW_PME(ic->vdwtype))
1908     {
1909         snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1910         snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1911         snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1912         table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1913                                     ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1914     }
1915 }
1916
1917 void init_interaction_const_tables(FILE                *fp,
1918                                    interaction_const_t *ic,
1919                                    gmx_bool             bUsesSimpleTables,
1920                                    real                 rtab)
1921 {
1922     real spacing;
1923
1924     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1925     {
1926         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1927
1928         if (fp != NULL)
1929         {
1930             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1931                     1/ic->tabq_scale, ic->tabq_size);
1932         }
1933     }
1934 }
1935
1936 static void clear_force_switch_constants(shift_consts_t *sc)
1937 {
1938     sc->c2   = 0;
1939     sc->c3   = 0;
1940     sc->cpot = 0;
1941 }
1942
1943 static void force_switch_constants(real p,
1944                                    real rsw, real rc,
1945                                    shift_consts_t *sc)
1946 {
1947     /* Here we determine the coefficient for shifting the force to zero
1948      * between distance rsw and the cut-off rc.
1949      * For a potential of r^-p, we have force p*r^-(p+1).
1950      * But to save flops we absorb p in the coefficient.
1951      * Thus we get:
1952      * force/p   = r^-(p+1) + c2*r^2 + c3*r^3
1953      * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1954      */
1955     sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1956     sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1957     sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1958 }
1959
1960 static void potential_switch_constants(real rsw, real rc,
1961                                        switch_consts_t *sc)
1962 {
1963     /* The switch function is 1 at rsw and 0 at rc.
1964      * The derivative and second derivate are zero at both ends.
1965      * rsw        = max(r - r_switch, 0)
1966      * sw         = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1967      * dsw        = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1968      * force      = force*dsw - potential*sw
1969      * potential *= sw
1970      */
1971     sc->c3 = -10*pow(rc - rsw, -3);
1972     sc->c4 =  15*pow(rc - rsw, -4);
1973     sc->c5 =  -6*pow(rc - rsw, -5);
1974 }
1975
1976 static void
1977 init_interaction_const(FILE                       *fp,
1978                        const t_commrec gmx_unused *cr,
1979                        interaction_const_t       **interaction_const,
1980                        const t_forcerec           *fr,
1981                        real                        rtab)
1982 {
1983     interaction_const_t *ic;
1984     gmx_bool             bUsesSimpleTables = TRUE;
1985
1986     snew(ic, 1);
1987
1988     /* Just allocate something so we can free it */
1989     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1990     snew_aligned(ic->tabq_coul_F, 16, 32);
1991     snew_aligned(ic->tabq_coul_V, 16, 32);
1992
1993     ic->rlist           = fr->rlist;
1994     ic->rlistlong       = fr->rlistlong;
1995
1996     /* Lennard-Jones */
1997     ic->vdwtype         = fr->vdwtype;
1998     ic->vdw_modifier    = fr->vdw_modifier;
1999     ic->rvdw            = fr->rvdw;
2000     ic->rvdw_switch     = fr->rvdw_switch;
2001     ic->ewaldcoeff_lj   = fr->ewaldcoeff_lj;
2002     ic->ljpme_comb_rule = fr->ljpme_combination_rule;
2003     ic->sh_lj_ewald     = 0;
2004     clear_force_switch_constants(&ic->dispersion_shift);
2005     clear_force_switch_constants(&ic->repulsion_shift);
2006
2007     switch (ic->vdw_modifier)
2008     {
2009         case eintmodPOTSHIFT:
2010             /* Only shift the potential, don't touch the force */
2011             ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2012             ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
2013             if (EVDW_PME(ic->vdwtype))
2014             {
2015                 real crc2;
2016
2017                 crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2018                 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2019             }
2020             break;
2021         case eintmodFORCESWITCH:
2022             /* Switch the force, switch and shift the potential */
2023             force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2024                                    &ic->dispersion_shift);
2025             force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2026                                    &ic->repulsion_shift);
2027             break;
2028         case eintmodPOTSWITCH:
2029             /* Switch the potential and force */
2030             potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2031                                        &ic->vdw_switch);
2032             break;
2033         case eintmodNONE:
2034         case eintmodEXACTCUTOFF:
2035             /* Nothing to do here */
2036             break;
2037         default:
2038             gmx_incons("unimplemented potential modifier");
2039     }
2040
2041     ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2042
2043     /* Electrostatics */
2044     ic->eeltype          = fr->eeltype;
2045     ic->coulomb_modifier = fr->coulomb_modifier;
2046     ic->rcoulomb         = fr->rcoulomb;
2047     ic->epsilon_r        = fr->epsilon_r;
2048     ic->epsfac           = fr->epsfac;
2049     ic->ewaldcoeff_q     = fr->ewaldcoeff_q;
2050
2051     if (fr->coulomb_modifier == eintmodPOTSHIFT)
2052     {
2053         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2054     }
2055     else
2056     {
2057         ic->sh_ewald = 0;
2058     }
2059
2060     /* Reaction-field */
2061     if (EEL_RF(ic->eeltype))
2062     {
2063         ic->epsilon_rf = fr->epsilon_rf;
2064         ic->k_rf       = fr->k_rf;
2065         ic->c_rf       = fr->c_rf;
2066     }
2067     else
2068     {
2069         /* For plain cut-off we might use the reaction-field kernels */
2070         ic->epsilon_rf = ic->epsilon_r;
2071         ic->k_rf       = 0;
2072         if (fr->coulomb_modifier == eintmodPOTSHIFT)
2073         {
2074             ic->c_rf   = 1/ic->rcoulomb;
2075         }
2076         else
2077         {
2078             ic->c_rf   = 0;
2079         }
2080     }
2081
2082     if (fp != NULL)
2083     {
2084         real dispersion_shift;
2085
2086         dispersion_shift = ic->dispersion_shift.cpot;
2087         if (EVDW_PME(ic->vdwtype))
2088         {
2089             dispersion_shift -= ic->sh_lj_ewald;
2090         }
2091         fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2092                 ic->repulsion_shift.cpot, dispersion_shift);
2093
2094         if (ic->eeltype == eelCUT)
2095         {
2096             fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2097         }
2098         else if (EEL_PME(ic->eeltype))
2099         {
2100             fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2101         }
2102         fprintf(fp, "\n");
2103     }
2104
2105     *interaction_const = ic;
2106
2107     if (fr->nbv != NULL && fr->nbv->bUseGPU)
2108     {
2109         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2110
2111         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2112          * also sharing texture references. To keep the code simple, we don't
2113          * treat texture references as shared resources, but this means that
2114          * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2115          * Hence, to ensure that the non-bonded kernels don't start before all
2116          * texture binding operations are finished, we need to wait for all ranks
2117          * to arrive here before continuing.
2118          *
2119          * Note that we could omit this barrier if GPUs are not shared (or
2120          * texture objects are used), but as this is initialization code, there
2121          * is not point in complicating things.
2122          */
2123 #ifdef GMX_THREAD_MPI
2124         if (PAR(cr))
2125         {
2126             gmx_barrier(cr);
2127         }
2128 #endif  /* GMX_THREAD_MPI */
2129     }
2130
2131     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2132     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2133 }
2134
2135 static void init_nb_verlet(FILE                *fp,
2136                            nonbonded_verlet_t **nb_verlet,
2137                            gmx_bool             bFEP_NonBonded,
2138                            const t_inputrec    *ir,
2139                            const t_forcerec    *fr,
2140                            const t_commrec     *cr,
2141                            const char          *nbpu_opt)
2142 {
2143     nonbonded_verlet_t *nbv;
2144     int                 i;
2145     char               *env;
2146     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
2147
2148     nbnxn_alloc_t      *nb_alloc;
2149     nbnxn_free_t       *nb_free;
2150
2151     snew(nbv, 1);
2152
2153     pick_nbnxn_resources(cr, fr->hwinfo,
2154                          fr->bNonbonded,
2155                          &nbv->bUseGPU,
2156                          &bEmulateGPU,
2157                          fr->gpu_opt);
2158
2159     nbv->nbs = NULL;
2160
2161     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2162     for (i = 0; i < nbv->ngrp; i++)
2163     {
2164         nbv->grp[i].nbl_lists.nnbl = 0;
2165         nbv->grp[i].nbat           = NULL;
2166         nbv->grp[i].kernel_type    = nbnxnkNotSet;
2167
2168         if (i == 0) /* local */
2169         {
2170             pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2171                               nbv->bUseGPU, bEmulateGPU, ir,
2172                               &nbv->grp[i].kernel_type,
2173                               &nbv->grp[i].ewald_excl,
2174                               fr->bNonbonded);
2175         }
2176         else /* non-local */
2177         {
2178             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2179             {
2180                 /* Use GPU for local, select a CPU kernel for non-local */
2181                 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2182                                   FALSE, FALSE, ir,
2183                                   &nbv->grp[i].kernel_type,
2184                                   &nbv->grp[i].ewald_excl,
2185                                   fr->bNonbonded);
2186
2187                 bHybridGPURun = TRUE;
2188             }
2189             else
2190             {
2191                 /* Use the same kernel for local and non-local interactions */
2192                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2193                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
2194             }
2195         }
2196     }
2197
2198     if (nbv->bUseGPU)
2199     {
2200         /* init the NxN GPU data; the last argument tells whether we'll have
2201          * both local and non-local NB calculation on GPU */
2202         nbnxn_cuda_init(fp, &nbv->cu_nbv,
2203                         &fr->hwinfo->gpu_info, fr->gpu_opt,
2204                         cr->rank_pp_intranode,
2205                         (nbv->ngrp > 1) && !bHybridGPURun);
2206
2207         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2208         {
2209             char *end;
2210
2211             nbv->min_ci_balanced = strtol(env, &end, 10);
2212             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2213             {
2214                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2215             }
2216
2217             if (debug)
2218             {
2219                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2220                         nbv->min_ci_balanced);
2221             }
2222         }
2223         else
2224         {
2225             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2226             if (debug)
2227             {
2228                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2229                         nbv->min_ci_balanced);
2230             }
2231         }
2232     }
2233     else
2234     {
2235         nbv->min_ci_balanced = 0;
2236     }
2237
2238     *nb_verlet = nbv;
2239
2240     nbnxn_init_search(&nbv->nbs,
2241                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2242                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2243                       bFEP_NonBonded,
2244                       gmx_omp_nthreads_get(emntNonbonded));
2245
2246     for (i = 0; i < nbv->ngrp; i++)
2247     {
2248         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2249         {
2250             nb_alloc = &pmalloc;
2251             nb_free  = &pfree;
2252         }
2253         else
2254         {
2255             nb_alloc = NULL;
2256             nb_free  = NULL;
2257         }
2258
2259         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2260                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2261                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2262                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2263                                 nb_alloc, nb_free);
2264
2265         if (i == 0 ||
2266             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2267         {
2268             gmx_bool bSimpleList;
2269             int      enbnxninitcombrule;
2270
2271             bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2272
2273             if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2274             {
2275                 /* Plain LJ cut-off: we can optimize with combination rules */
2276                 enbnxninitcombrule = enbnxninitcombruleDETECT;
2277             }
2278             else if (fr->vdwtype == evdwPME)
2279             {
2280                 /* LJ-PME: we need to use a combination rule for the grid */
2281                 if (fr->ljpme_combination_rule == eljpmeGEOM)
2282                 {
2283                     enbnxninitcombrule = enbnxninitcombruleGEOM;
2284                 }
2285                 else
2286                 {
2287                     enbnxninitcombrule = enbnxninitcombruleLB;
2288                 }
2289             }
2290             else
2291             {
2292                 /* We use a full combination matrix: no rule required */
2293                 enbnxninitcombrule = enbnxninitcombruleNONE;
2294             }
2295
2296
2297             snew(nbv->grp[i].nbat, 1);
2298             nbnxn_atomdata_init(fp,
2299                                 nbv->grp[i].nbat,
2300                                 nbv->grp[i].kernel_type,
2301                                 enbnxninitcombrule,
2302                                 fr->ntype, fr->nbfp,
2303                                 ir->opts.ngener,
2304                                 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2305                                 nb_alloc, nb_free);
2306         }
2307         else
2308         {
2309             nbv->grp[i].nbat = nbv->grp[0].nbat;
2310         }
2311     }
2312 }
2313
2314 void init_forcerec(FILE              *fp,
2315                    const output_env_t oenv,
2316                    t_forcerec        *fr,
2317                    t_fcdata          *fcd,
2318                    const t_inputrec  *ir,
2319                    const gmx_mtop_t  *mtop,
2320                    const t_commrec   *cr,
2321                    matrix             box,
2322                    const char        *tabfn,
2323                    const char        *tabafn,
2324                    const char        *tabpfn,
2325                    const char        *tabbfn,
2326                    const char        *nbpu_opt,
2327                    gmx_bool           bNoSolvOpt,
2328                    real               print_force)
2329 {
2330     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2331     real           rtab;
2332     char          *env;
2333     double         dbl;
2334     const t_block *cgs;
2335     gmx_bool       bGenericKernelOnly;
2336     gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2337     gmx_bool       bFEP_NonBonded;
2338     t_nblists     *nbl;
2339     int           *nm_ind, egp_flags;
2340
2341     if (fr->hwinfo == NULL)
2342     {
2343         /* Detect hardware, gather information.
2344          * In mdrun, hwinfo has already been set before calling init_forcerec.
2345          * Here we ignore GPUs, as tools will not use them anyhow.
2346          */
2347         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2348     }
2349
2350     /* By default we turn SIMD kernels on, but it might be turned off further down... */
2351     fr->use_simd_kernels = TRUE;
2352
2353     fr->bDomDec = DOMAINDECOMP(cr);
2354
2355     natoms = mtop->natoms;
2356
2357     if (check_box(ir->ePBC, box))
2358     {
2359         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2360     }
2361
2362     /* Test particle insertion ? */
2363     if (EI_TPI(ir->eI))
2364     {
2365         /* Set to the size of the molecule to be inserted (the last one) */
2366         /* Because of old style topologies, we have to use the last cg
2367          * instead of the last molecule type.
2368          */
2369         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2370         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2371         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2372         {
2373             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2374         }
2375     }
2376     else
2377     {
2378         fr->n_tpi = 0;
2379     }
2380
2381     /* Copy AdResS parameters */
2382     if (ir->bAdress)
2383     {
2384         fr->adress_type           = ir->adress->type;
2385         fr->adress_const_wf       = ir->adress->const_wf;
2386         fr->adress_ex_width       = ir->adress->ex_width;
2387         fr->adress_hy_width       = ir->adress->hy_width;
2388         fr->adress_icor           = ir->adress->icor;
2389         fr->adress_site           = ir->adress->site;
2390         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2391         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2392
2393
2394         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2395         for (i = 0; i < ir->adress->n_energy_grps; i++)
2396         {
2397             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2398         }
2399
2400         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2401         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2402         for (i = 0; i < fr->n_adress_tf_grps; i++)
2403         {
2404             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2405         }
2406         copy_rvec(ir->adress->refs, fr->adress_refs);
2407     }
2408     else
2409     {
2410         fr->adress_type           = eAdressOff;
2411         fr->adress_do_hybridpairs = FALSE;
2412     }
2413
2414     /* Copy the user determined parameters */
2415     fr->userint1  = ir->userint1;
2416     fr->userint2  = ir->userint2;
2417     fr->userint3  = ir->userint3;
2418     fr->userint4  = ir->userint4;
2419     fr->userreal1 = ir->userreal1;
2420     fr->userreal2 = ir->userreal2;
2421     fr->userreal3 = ir->userreal3;
2422     fr->userreal4 = ir->userreal4;
2423
2424     /* Shell stuff */
2425     fr->fc_stepsize = ir->fc_stepsize;
2426
2427     /* Free energy */
2428     fr->efep        = ir->efep;
2429     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2430     if (ir->fepvals->bScCoul)
2431     {
2432         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2433         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2434     }
2435     else
2436     {
2437         fr->sc_alphacoul  = 0;
2438         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2439     }
2440     fr->sc_power      = ir->fepvals->sc_power;
2441     fr->sc_r_power    = ir->fepvals->sc_r_power;
2442     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2443
2444     env = getenv("GMX_SCSIGMA_MIN");
2445     if (env != NULL)
2446     {
2447         dbl = 0;
2448         sscanf(env, "%lf", &dbl);
2449         fr->sc_sigma6_min = pow(dbl, 6);
2450         if (fp)
2451         {
2452             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2453         }
2454     }
2455
2456     fr->bNonbonded = TRUE;
2457     if (getenv("GMX_NO_NONBONDED") != NULL)
2458     {
2459         /* turn off non-bonded calculations */
2460         fr->bNonbonded = FALSE;
2461         md_print_warn(cr, fp,
2462                       "Found environment variable GMX_NO_NONBONDED.\n"
2463                       "Disabling nonbonded calculations.\n");
2464     }
2465
2466     bGenericKernelOnly = FALSE;
2467
2468     /* We now check in the NS code whether a particular combination of interactions
2469      * can be used with water optimization, and disable it if that is not the case.
2470      */
2471
2472     if (getenv("GMX_NB_GENERIC") != NULL)
2473     {
2474         if (fp != NULL)
2475         {
2476             fprintf(fp,
2477                     "Found environment variable GMX_NB_GENERIC.\n"
2478                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2479                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2480         }
2481         bGenericKernelOnly = TRUE;
2482     }
2483
2484     if (bGenericKernelOnly == TRUE)
2485     {
2486         bNoSolvOpt         = TRUE;
2487     }
2488
2489     if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2490     {
2491         fr->use_simd_kernels = FALSE;
2492         if (fp != NULL)
2493         {
2494             fprintf(fp,
2495                     "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2496                     "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2497         }
2498     }
2499
2500     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2501
2502     /* Check if we can/should do all-vs-all kernels */
2503     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
2504     fr->AllvsAll_work   = NULL;
2505     fr->AllvsAll_workgb = NULL;
2506
2507     /* All-vs-all kernels have not been implemented in 4.6, and
2508      * the SIMD group kernels are also buggy in this case. Non-SIMD
2509      * group kernels are OK. See Redmine #1249. */
2510     if (fr->bAllvsAll)
2511     {
2512         fr->bAllvsAll            = FALSE;
2513         fr->use_simd_kernels     = FALSE;
2514         if (fp != NULL)
2515         {
2516             fprintf(fp,
2517                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2518                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2519                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2520                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2521                     "we are proceeding by disabling all CPU architecture-specific\n"
2522                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2523                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2524         }
2525     }
2526
2527     /* Neighbour searching stuff */
2528     fr->cutoff_scheme = ir->cutoff_scheme;
2529     fr->bGrid         = (ir->ns_type == ensGRID);
2530     fr->ePBC          = ir->ePBC;
2531
2532     if (fr->cutoff_scheme == ecutsGROUP)
2533     {
2534         const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2535             "removed in a future release when 'verlet' supports all interaction forms.\n";
2536
2537         if (MASTER(cr))
2538         {
2539             fprintf(stderr, "\n%s\n", note);
2540         }
2541         if (fp != NULL)
2542         {
2543             fprintf(fp, "\n%s\n", note);
2544         }
2545     }
2546
2547     /* Determine if we will do PBC for distances in bonded interactions */
2548     if (fr->ePBC == epbcNONE)
2549     {
2550         fr->bMolPBC = FALSE;
2551     }
2552     else
2553     {
2554         if (!DOMAINDECOMP(cr))
2555         {
2556             /* The group cut-off scheme and SHAKE assume charge groups
2557              * are whole, but not using molpbc is faster in most cases.
2558              */
2559             if (fr->cutoff_scheme == ecutsGROUP ||
2560                 (ir->eConstrAlg == econtSHAKE &&
2561                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2562                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2563             {
2564                 fr->bMolPBC = ir->bPeriodicMols;
2565             }
2566             else
2567             {
2568                 fr->bMolPBC = TRUE;
2569                 if (getenv("GMX_USE_GRAPH") != NULL)
2570                 {
2571                     fr->bMolPBC = FALSE;
2572                     if (fp)
2573                     {
2574                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2575                     }
2576                 }
2577             }
2578         }
2579         else
2580         {
2581             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2582         }
2583     }
2584     fr->bGB = (ir->implicit_solvent == eisGBSA);
2585
2586     fr->rc_scaling = ir->refcoord_scaling;
2587     copy_rvec(ir->posres_com, fr->posres_com);
2588     copy_rvec(ir->posres_comB, fr->posres_comB);
2589     fr->rlist                    = cutoff_inf(ir->rlist);
2590     fr->rlistlong                = cutoff_inf(ir->rlistlong);
2591     fr->eeltype                  = ir->coulombtype;
2592     fr->vdwtype                  = ir->vdwtype;
2593     fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
2594
2595     fr->coulomb_modifier = ir->coulomb_modifier;
2596     fr->vdw_modifier     = ir->vdw_modifier;
2597
2598     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2599     switch (fr->eeltype)
2600     {
2601         case eelCUT:
2602             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2603             break;
2604
2605         case eelRF:
2606         case eelGRF:
2607         case eelRF_NEC:
2608             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2609             break;
2610
2611         case eelRF_ZERO:
2612             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2613             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2614             break;
2615
2616         case eelSWITCH:
2617         case eelSHIFT:
2618         case eelUSER:
2619         case eelENCADSHIFT:
2620         case eelPMESWITCH:
2621         case eelPMEUSER:
2622         case eelPMEUSERSWITCH:
2623             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2624             break;
2625
2626         case eelPME:
2627         case eelEWALD:
2628             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2629             break;
2630
2631         default:
2632             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2633             break;
2634     }
2635
2636     /* Vdw: Translate from mdp settings to kernel format */
2637     switch (fr->vdwtype)
2638     {
2639         case evdwCUT:
2640             if (fr->bBHAM)
2641             {
2642                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2643             }
2644             else
2645             {
2646                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2647             }
2648             break;
2649         case evdwPME:
2650             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2651             break;
2652
2653         case evdwSWITCH:
2654         case evdwSHIFT:
2655         case evdwUSER:
2656         case evdwENCADSHIFT:
2657             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2658             break;
2659
2660         default:
2661             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2662             break;
2663     }
2664
2665     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2666     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2667     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2668
2669     fr->rvdw             = cutoff_inf(ir->rvdw);
2670     fr->rvdw_switch      = ir->rvdw_switch;
2671     fr->rcoulomb         = cutoff_inf(ir->rcoulomb);
2672     fr->rcoulomb_switch  = ir->rcoulomb_switch;
2673
2674     fr->bTwinRange = fr->rlistlong > fr->rlist;
2675     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2676
2677     fr->reppow     = mtop->ffparams.reppow;
2678
2679     if (ir->cutoff_scheme == ecutsGROUP)
2680     {
2681         fr->bvdwtab    = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2682                           && !EVDW_PME(fr->vdwtype));
2683         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2684         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2685                            fr->eeltype == eelEWALD ||
2686                            fr->eeltype == eelPME ||
2687                            fr->eeltype == eelRF ||
2688                            fr->eeltype == eelRF_ZERO);
2689
2690         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2691          * going to be faster to tabulate the interaction than calling the generic kernel.
2692          * However, if generic kernels have been requested we keep things analytically.
2693          */
2694         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH &&
2695             fr->nbkernel_vdw_modifier == eintmodPOTSWITCH &&
2696             bGenericKernelOnly == FALSE)
2697         {
2698             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2699             {
2700                 fr->bcoultab = TRUE;
2701                 /* Once we tabulate electrostatics, we can use the switch function for LJ,
2702                  * which would otherwise need two tables.
2703                  */
2704             }
2705         }
2706         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2707                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2708                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2709                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2710         {
2711             if ((fr->rcoulomb != fr->rvdw) && (bGenericKernelOnly == FALSE))
2712             {
2713                 fr->bcoultab = TRUE;
2714             }
2715         }
2716
2717         if (fr->nbkernel_elec_modifier == eintmodFORCESWITCH)
2718         {
2719             fr->bcoultab = TRUE;
2720         }
2721         if (fr->nbkernel_vdw_modifier == eintmodFORCESWITCH)
2722         {
2723             fr->bvdwtab = TRUE;
2724         }
2725
2726         if (getenv("GMX_REQUIRE_TABLES"))
2727         {
2728             fr->bvdwtab  = TRUE;
2729             fr->bcoultab = TRUE;
2730         }
2731
2732         if (fp)
2733         {
2734             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2735             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2736         }
2737
2738         if (fr->bvdwtab == TRUE)
2739         {
2740             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2741             fr->nbkernel_vdw_modifier    = eintmodNONE;
2742         }
2743         if (fr->bcoultab == TRUE)
2744         {
2745             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2746             fr->nbkernel_elec_modifier    = eintmodNONE;
2747         }
2748     }
2749
2750     if (ir->cutoff_scheme == ecutsVERLET)
2751     {
2752         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2753         {
2754             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2755         }
2756         fr->bvdwtab  = FALSE;
2757         fr->bcoultab = FALSE;
2758     }
2759
2760     /* Tables are used for direct ewald sum */
2761     if (fr->bEwald)
2762     {
2763         if (EEL_PME(ir->coulombtype))
2764         {
2765             if (fp)
2766             {
2767                 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2768             }
2769             if (ir->coulombtype == eelP3M_AD)
2770             {
2771                 please_cite(fp, "Hockney1988");
2772                 please_cite(fp, "Ballenegger2012");
2773             }
2774             else
2775             {
2776                 please_cite(fp, "Essmann95a");
2777             }
2778
2779             if (ir->ewald_geometry == eewg3DC)
2780             {
2781                 if (fp)
2782                 {
2783                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2784                 }
2785                 please_cite(fp, "In-Chul99a");
2786             }
2787         }
2788         fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2789         init_ewald_tab(&(fr->ewald_table), ir, fp);
2790         if (fp)
2791         {
2792             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2793                     1/fr->ewaldcoeff_q);
2794         }
2795     }
2796
2797     if (EVDW_PME(ir->vdwtype))
2798     {
2799         if (fp)
2800         {
2801             fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2802         }
2803         please_cite(fp, "Essmann95a");
2804         fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2805         if (fp)
2806         {
2807             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2808                     1/fr->ewaldcoeff_lj);
2809         }
2810     }
2811
2812     /* Electrostatics */
2813     fr->epsilon_r       = ir->epsilon_r;
2814     fr->epsilon_rf      = ir->epsilon_rf;
2815     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2816
2817     /* Parameters for generalized RF */
2818     fr->zsquare = 0.0;
2819     fr->temp    = 0.0;
2820
2821     if (fr->eeltype == eelGRF)
2822     {
2823         init_generalized_rf(fp, mtop, ir, fr);
2824     }
2825
2826     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2827                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2828                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2829                        IR_ELEC_FIELD(*ir) ||
2830                        (fr->adress_icor != eAdressICOff)
2831                        );
2832
2833     if (fr->cutoff_scheme == ecutsGROUP &&
2834         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2835     {
2836         /* Count the total number of charge groups */
2837         fr->cg_nalloc = ncg_mtop(mtop);
2838         srenew(fr->cg_cm, fr->cg_nalloc);
2839     }
2840     if (fr->shift_vec == NULL)
2841     {
2842         snew(fr->shift_vec, SHIFTS);
2843     }
2844
2845     if (fr->fshift == NULL)
2846     {
2847         snew(fr->fshift, SHIFTS);
2848     }
2849
2850     if (fr->nbfp == NULL)
2851     {
2852         fr->ntype = mtop->ffparams.atnr;
2853         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2854         if (EVDW_PME(fr->vdwtype))
2855         {
2856             fr->ljpme_c6grid  = make_ljpme_c6grid(&mtop->ffparams, fr);
2857         }
2858     }
2859
2860     /* Copy the energy group exclusions */
2861     fr->egp_flags = ir->opts.egp_flags;
2862
2863     /* Van der Waals stuff */
2864     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2865     {
2866         if (fr->rvdw_switch >= fr->rvdw)
2867         {
2868             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2869                       fr->rvdw_switch, fr->rvdw);
2870         }
2871         if (fp)
2872         {
2873             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2874                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2875                     fr->rvdw_switch, fr->rvdw);
2876         }
2877     }
2878
2879     if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2880     {
2881         gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2882     }
2883
2884     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2885     {
2886         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2887     }
2888
2889     if (fr->bBHAM && fr->cutoff_scheme == ecutsVERLET)
2890     {
2891         gmx_fatal(FARGS, "Verlet cutoff-scheme is not supported with Buckingham");
2892     }
2893
2894     if (fp)
2895     {
2896         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2897                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2898     }
2899
2900     fr->eDispCorr = ir->eDispCorr;
2901     if (ir->eDispCorr != edispcNO)
2902     {
2903         set_avcsixtwelve(fp, fr, mtop);
2904     }
2905
2906     if (fr->bBHAM)
2907     {
2908         set_bham_b_max(fp, fr, mtop);
2909     }
2910
2911     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2912
2913     /* Copy the GBSA data (radius, volume and surftens for each
2914      * atomtype) from the topology atomtype section to forcerec.
2915      */
2916     snew(fr->atype_radius, fr->ntype);
2917     snew(fr->atype_vol, fr->ntype);
2918     snew(fr->atype_surftens, fr->ntype);
2919     snew(fr->atype_gb_radius, fr->ntype);
2920     snew(fr->atype_S_hct, fr->ntype);
2921
2922     if (mtop->atomtypes.nr > 0)
2923     {
2924         for (i = 0; i < fr->ntype; i++)
2925         {
2926             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2927         }
2928         for (i = 0; i < fr->ntype; i++)
2929         {
2930             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2931         }
2932         for (i = 0; i < fr->ntype; i++)
2933         {
2934             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2935         }
2936         for (i = 0; i < fr->ntype; i++)
2937         {
2938             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2939         }
2940         for (i = 0; i < fr->ntype; i++)
2941         {
2942             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2943         }
2944     }
2945
2946     /* Generate the GB table if needed */
2947     if (fr->bGB)
2948     {
2949 #ifdef GMX_DOUBLE
2950         fr->gbtabscale = 2000;
2951 #else
2952         fr->gbtabscale = 500;
2953 #endif
2954
2955         fr->gbtabr = 100;
2956         fr->gbtab  = make_gb_table(oenv, fr);
2957
2958         init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2959
2960         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2961         if (!DOMAINDECOMP(cr))
2962         {
2963             make_local_gb(cr, fr->born, ir->gb_algorithm);
2964         }
2965     }
2966
2967     /* Set the charge scaling */
2968     if (fr->epsilon_r != 0)
2969     {
2970         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2971     }
2972     else
2973     {
2974         /* eps = 0 is infinite dieletric: no coulomb interactions */
2975         fr->epsfac = 0;
2976     }
2977
2978     /* Reaction field constants */
2979     if (EEL_RF(fr->eeltype))
2980     {
2981         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2982                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2983                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2984     }
2985
2986     /*This now calculates sum for q and c6*/
2987     set_chargesum(fp, fr, mtop);
2988
2989     /* if we are using LR electrostatics, and they are tabulated,
2990      * the tables will contain modified coulomb interactions.
2991      * Since we want to use the non-shifted ones for 1-4
2992      * coulombic interactions, we must have an extra set of tables.
2993      */
2994
2995     /* Construct tables.
2996      * A little unnecessary to make both vdw and coul tables sometimes,
2997      * but what the heck... */
2998
2999     bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
3000         (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
3001
3002     bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
3003                              fr->coulomb_modifier != eintmodNONE ||
3004                              fr->vdw_modifier != eintmodNONE ||
3005                              fr->bBHAM || fr->bEwald) &&
3006                             (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
3007                              gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
3008                              gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
3009
3010     negp_pp   = ir->opts.ngener - ir->nwall;
3011     negptable = 0;
3012     if (!bMakeTables)
3013     {
3014         bSomeNormalNbListsAreInUse = TRUE;
3015         fr->nnblists               = 1;
3016     }
3017     else
3018     {
3019         bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
3020         for (egi = 0; egi < negp_pp; egi++)
3021         {
3022             for (egj = egi; egj < negp_pp; egj++)
3023             {
3024                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3025                 if (!(egp_flags & EGP_EXCL))
3026                 {
3027                     if (egp_flags & EGP_TABLE)
3028                     {
3029                         negptable++;
3030                     }
3031                     else
3032                     {
3033                         bSomeNormalNbListsAreInUse = TRUE;
3034                     }
3035                 }
3036             }
3037         }
3038         if (bSomeNormalNbListsAreInUse)
3039         {
3040             fr->nnblists = negptable + 1;
3041         }
3042         else
3043         {
3044             fr->nnblists = negptable;
3045         }
3046         if (fr->nnblists > 1)
3047         {
3048             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3049         }
3050     }
3051
3052     if (ir->adress)
3053     {
3054         fr->nnblists *= 2;
3055     }
3056
3057     snew(fr->nblists, fr->nnblists);
3058
3059     /* This code automatically gives table length tabext without cut-off's,
3060      * in that case grompp should already have checked that we do not need
3061      * normal tables and we only generate tables for 1-4 interactions.
3062      */
3063     rtab = ir->rlistlong + ir->tabext;
3064
3065     if (bMakeTables)
3066     {
3067         /* make tables for ordinary interactions */
3068         if (bSomeNormalNbListsAreInUse)
3069         {
3070             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3071             if (ir->adress)
3072             {
3073                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3074             }
3075             if (!bMakeSeparate14Table)
3076             {
3077                 fr->tab14 = fr->nblists[0].table_elec_vdw;
3078             }
3079             m = 1;
3080         }
3081         else
3082         {
3083             m = 0;
3084         }
3085         if (negptable > 0)
3086         {
3087             /* Read the special tables for certain energy group pairs */
3088             nm_ind = mtop->groups.grps[egcENER].nm_ind;
3089             for (egi = 0; egi < negp_pp; egi++)
3090             {
3091                 for (egj = egi; egj < negp_pp; egj++)
3092                 {
3093                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3094                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3095                     {
3096                         nbl = &(fr->nblists[m]);
3097                         if (fr->nnblists > 1)
3098                         {
3099                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3100                         }
3101                         /* Read the table file with the two energy groups names appended */
3102                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3103                                         *mtop->groups.grpname[nm_ind[egi]],
3104                                         *mtop->groups.grpname[nm_ind[egj]],
3105                                         &fr->nblists[m]);
3106                         if (ir->adress)
3107                         {
3108                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3109                                             *mtop->groups.grpname[nm_ind[egi]],
3110                                             *mtop->groups.grpname[nm_ind[egj]],
3111                                             &fr->nblists[fr->nnblists/2+m]);
3112                         }
3113                         m++;
3114                     }
3115                     else if (fr->nnblists > 1)
3116                     {
3117                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3118                     }
3119                 }
3120             }
3121         }
3122     }
3123     else if ((fr->eDispCorr != edispcNO) &&
3124              ((fr->vdw_modifier == eintmodPOTSWITCH) ||
3125               (fr->vdw_modifier == eintmodFORCESWITCH) ||
3126               (fr->vdw_modifier == eintmodPOTSHIFT)))
3127     {
3128         /* Tables might not be used for the potential modifier interactions per se, but
3129          * we still need them to evaluate switch/shift dispersion corrections in this case.
3130          */
3131         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3132     }
3133
3134     if (bMakeSeparate14Table)
3135     {
3136         /* generate extra tables with plain Coulomb for 1-4 interactions only */
3137         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3138                                 GMX_MAKETABLES_14ONLY);
3139     }
3140
3141     /* Read AdResS Thermo Force table if needed */
3142     if (fr->adress_icor == eAdressICThermoForce)
3143     {
3144         /* old todo replace */
3145
3146         if (ir->adress->n_tf_grps > 0)
3147         {
3148             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3149
3150         }
3151         else
3152         {
3153             /* load the default table */
3154             snew(fr->atf_tabs, 1);
3155             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3156         }
3157     }
3158
3159     /* Wall stuff */
3160     fr->nwall = ir->nwall;
3161     if (ir->nwall && ir->wall_type == ewtTABLE)
3162     {
3163         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3164     }
3165
3166     if (fcd && tabbfn)
3167     {
3168         fcd->bondtab  = make_bonded_tables(fp,
3169                                            F_TABBONDS, F_TABBONDSNC,
3170                                            mtop, tabbfn, "b");
3171         fcd->angletab = make_bonded_tables(fp,
3172                                            F_TABANGLES, -1,
3173                                            mtop, tabbfn, "a");
3174         fcd->dihtab   = make_bonded_tables(fp,
3175                                            F_TABDIHS, -1,
3176                                            mtop, tabbfn, "d");
3177     }
3178     else
3179     {
3180         if (debug)
3181         {
3182             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3183         }
3184     }
3185
3186     /* QM/MM initialization if requested
3187      */
3188     if (ir->bQMMM)
3189     {
3190         fprintf(stderr, "QM/MM calculation requested.\n");
3191     }
3192
3193     fr->bQMMM      = ir->bQMMM;
3194     fr->qr         = mk_QMMMrec();
3195
3196     /* Set all the static charge group info */
3197     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3198                                    &bFEP_NonBonded,
3199                                    &fr->bExcl_IntraCGAll_InterCGNone);
3200     if (DOMAINDECOMP(cr))
3201     {
3202         fr->cginfo = NULL;
3203     }
3204     else
3205     {
3206         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3207     }
3208
3209     if (!DOMAINDECOMP(cr))
3210     {
3211         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3212                             mtop->natoms, mtop->natoms, mtop->natoms);
3213     }
3214
3215     fr->print_force = print_force;
3216
3217
3218     /* coarse load balancing vars */
3219     fr->t_fnbf    = 0.;
3220     fr->t_wait    = 0.;
3221     fr->timesteps = 0;
3222
3223     /* Initialize neighbor search */
3224     init_ns(fp, cr, &fr->ns, fr, mtop);
3225
3226     if (cr->duty & DUTY_PP)
3227     {
3228         gmx_nonbonded_setup(fr, bGenericKernelOnly);
3229         /*
3230            if (ir->bAdress)
3231             {
3232                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3233             }
3234          */
3235     }
3236
3237     /* Initialize the thread working data for bonded interactions */
3238     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3239
3240     snew(fr->excl_load, fr->nthreads+1);
3241
3242     if (fr->cutoff_scheme == ecutsVERLET)
3243     {
3244         if (ir->rcoulomb != ir->rvdw)
3245         {
3246             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3247         }
3248
3249         init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3250     }
3251
3252     /* fr->ic is used both by verlet and group kernels (to some extent) now */
3253     init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3254
3255     if (ir->eDispCorr != edispcNO)
3256     {
3257         calc_enervirdiff(fp, ir->eDispCorr, fr);
3258     }
3259 }
3260
3261 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3262 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
3263 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3264
3265 void pr_forcerec(FILE *fp, t_forcerec *fr)
3266 {
3267     int i;
3268
3269     pr_real(fp, fr->rlist);
3270     pr_real(fp, fr->rcoulomb);
3271     pr_real(fp, fr->fudgeQQ);
3272     pr_bool(fp, fr->bGrid);
3273     pr_bool(fp, fr->bTwinRange);
3274     /*pr_int(fp,fr->cg0);
3275        pr_int(fp,fr->hcg);*/
3276     for (i = 0; i < fr->nnblists; i++)
3277     {
3278         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3279     }
3280     pr_real(fp, fr->rcoulomb_switch);
3281     pr_real(fp, fr->rcoulomb);
3282
3283     fflush(fp);
3284 }
3285
3286 void forcerec_set_excl_load(t_forcerec           *fr,
3287                             const gmx_localtop_t *top)
3288 {
3289     const int *ind, *a;
3290     int        t, i, j, ntot, n, ntarget;
3291
3292     ind = top->excls.index;
3293     a   = top->excls.a;
3294
3295     ntot = 0;
3296     for (i = 0; i < top->excls.nr; i++)
3297     {
3298         for (j = ind[i]; j < ind[i+1]; j++)
3299         {
3300             if (a[j] > i)
3301             {
3302                 ntot++;
3303             }
3304         }
3305     }
3306
3307     fr->excl_load[0] = 0;
3308     n                = 0;
3309     i                = 0;
3310     for (t = 1; t <= fr->nthreads; t++)
3311     {
3312         ntarget = (ntot*t)/fr->nthreads;
3313         while (i < top->excls.nr && n < ntarget)
3314         {
3315             for (j = ind[i]; j < ind[i+1]; j++)
3316             {
3317                 if (a[j] > i)
3318                 {
3319                     n++;
3320                 }
3321             }
3322             i++;
3323         }
3324         fr->excl_load[t] = i;
3325     }
3326 }