src/gromacs/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "typedefs.h"
  45 #include "types/commrec.h"
  46 #include "gromacs/math/vec.h"
  47 #include "gromacs/math/utilities.h"
  48 #include "macros.h"
  49 #include "gromacs/utility/smalloc.h"
  50 #include "macros.h"
  51 #include "gromacs/utility/fatalerror.h"
  52 #include "gromacs/math/units.h"
  53 #include "force.h"
  54 #include "tables.h"
  55 #include "nonbonded.h"
  56 #include "names.h"
  57 #include "network.h"
  58 #include "gromacs/pbcutil/pbc.h"
  59 #include "ns.h"
  60 #include "txtdump.h"
  61 #include "coulomb.h"
  62 #include "md_support.h"
  63 #include "md_logging.h"
  64 #include "domdec.h"
  65 #include "qmmm.h"
  66 #include "copyrite.h"
  67 #include "mtop_util.h"
  68 #include "nbnxn_simd.h"
  69 #include "nbnxn_search.h"
  70 #include "nbnxn_atomdata.h"
  71 #include "nbnxn_consts.h"
  72 #include "gmx_omp_nthreads.h"
  73 #include "gmx_detect_hardware.h"
  74 #include "inputrec.h"
  75
  76 #include "types/nbnxn_cuda_types_ext.h"
  77 #include "gpu_utils.h"
  78 #include "nbnxn_cuda_data_mgmt.h"
  79 #include "pmalloc_cuda.h"
  80
  81 t_forcerec *mk_forcerec(void)
  82 {
  83     t_forcerec *fr;
  84
  85     snew(fr, 1);
  86
  87     return fr;
  88 }
  89
  90 #ifdef DEBUG
  91 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
  92 {
  93     int i, j;
  94
  95     for (i = 0; (i < atnr); i++)
  96     {
  97         for (j = 0; (j < atnr); j++)
  98         {
  99             fprintf(fp, "%2d - %2d", i, j);
 100             if (bBHAM)
 101             {
 102                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 103                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 104             }
 105             else
 106             {
 107                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 108                         C12(nbfp, atnr, i, j)/12.0);
 109             }
 110         }
 111     }
 112 }
 113 #endif
 114
 115 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 116 {
 117     real *nbfp;
 118     int   i, j, k, atnr;
 119
 120     atnr = idef->atnr;
 121     if (bBHAM)
 122     {
 123         snew(nbfp, 3*atnr*atnr);
 124         for (i = k = 0; (i < atnr); i++)
 125         {
 126             for (j = 0; (j < atnr); j++, k++)
 127             {
 128                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 129                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 130                 /* nbfp now includes the 6.0 derivative prefactor */
 131                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 132             }
 133         }
 134     }
 135     else
 136     {
 137         snew(nbfp, 2*atnr*atnr);
 138         for (i = k = 0; (i < atnr); i++)
 139         {
 140             for (j = 0; (j < atnr); j++, k++)
 141             {
 142                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 143                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 144                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 145             }
 146         }
 147     }
 148
 149     return nbfp;
 150 }
 151
 152 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
 153 {
 154     int   i, j, k, atnr;
 155     real  c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 156     real *grid;
 157
 158     /* For LJ-PME simulations, we correct the energies with the reciprocal space
 159      * inside of the cut-off. To do this the non-bonded kernels needs to have
 160      * access to the C6-values used on the reciprocal grid in pme.c
 161      */
 162
 163     atnr = idef->atnr;
 164     snew(grid, 2*atnr*atnr);
 165     for (i = k = 0; (i < atnr); i++)
 166     {
 167         for (j = 0; (j < atnr); j++, k++)
 168         {
 169             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 170             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 171             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 172             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 173             c6   = sqrt(c6i * c6j);
 174             if (fr->ljpme_combination_rule == eljpmeLB
 175                 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
 176             {
 177                 sigmai = pow(c12i / c6i, 1.0/6.0);
 178                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 179                 epsi   = c6i * c6i / c12i;
 180                 epsj   = c6j * c6j / c12j;
 181                 c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
 182             }
 183             /* Store the elements at the same relative positions as C6 in nbfp in order
 184              * to simplify access in the kernels
 185              */
 186             grid[2*(atnr*i+j)] = c6*6.0;
 187         }
 188     }
 189     return grid;
 190 }
 191
 192 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 193 {
 194     real *nbfp;
 195     int   i, j, k, atnr;
 196     real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 197     real  c6, c12;
 198
 199     atnr = idef->atnr;
 200     snew(nbfp, 2*atnr*atnr);
 201     for (i = 0; i < atnr; ++i)
 202     {
 203         for (j = 0; j < atnr; ++j)
 204         {
 205             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 206             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 207             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 208             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 209             c6   = sqrt(c6i  * c6j);
 210             c12  = sqrt(c12i * c12j);
 211             if (comb_rule == eCOMB_ARITHMETIC
 212                 && !gmx_numzero(c6) && !gmx_numzero(c12))
 213             {
 214                 sigmai = pow(c12i / c6i, 1.0/6.0);
 215                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 216                 epsi   = c6i * c6i / c12i;
 217                 epsj   = c6j * c6j / c12j;
 218                 c6     = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
 219                 c12    = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
 220             }
 221             C6(nbfp, atnr, i, j)   = c6*6.0;
 222             C12(nbfp, atnr, i, j)  = c12*12.0;
 223         }
 224     }
 225     return nbfp;
 226 }
 227
 228 /* This routine sets fr->solvent_opt to the most common solvent in the
 229  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 230  * the fr->solvent_type array with the correct type (or esolNO).
 231  *
 232  * Charge groups that fulfill the conditions but are not identical to the
 233  * most common one will be marked as esolNO in the solvent_type array.
 234  *
 235  * TIP3p is identical to SPC for these purposes, so we call it
 236  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 237  *
 238  * NOTE: QM particle should not
 239  * become an optimized solvent. Not even if there is only one charge
 240  * group in the Qm
 241  */
 242
 243 typedef struct
 244 {
 245     int    model;
 246     int    count;
 247     int    vdwtype[4];
 248     real   charge[4];
 249 } solvent_parameters_t;
 250
 251 static void
 252 check_solvent_cg(const gmx_moltype_t    *molt,
 253                  int                     cg0,
 254                  int                     nmol,
 255                  const unsigned char    *qm_grpnr,
 256                  const t_grps           *qm_grps,
 257                  t_forcerec   *          fr,
 258                  int                    *n_solvent_parameters,
 259                  solvent_parameters_t  **solvent_parameters_p,
 260                  int                     cginfo,
 261                  int                    *cg_sp)
 262 {
 263     const t_blocka       *excl;
 264     t_atom               *atom;
 265     int                   j, k;
 266     int                   j0, j1, nj;
 267     gmx_bool              perturbed;
 268     gmx_bool              has_vdw[4];
 269     gmx_bool              match;
 270     real                  tmp_charge[4]  = { 0.0 }; /* init to zero to make gcc4.8 happy */
 271     int                   tmp_vdwtype[4] = { 0 };   /* init to zero to make gcc4.8 happy */
 272     int                   tjA;
 273     gmx_bool              qm;
 274     solvent_parameters_t *solvent_parameters;
 275
 276     /* We use a list with parameters for each solvent type.
 277      * Every time we discover a new molecule that fulfills the basic
 278      * conditions for a solvent we compare with the previous entries
 279      * in these lists. If the parameters are the same we just increment
 280      * the counter for that type, and otherwise we create a new type
 281      * based on the current molecule.
 282      *
 283      * Once we've finished going through all molecules we check which
 284      * solvent is most common, and mark all those molecules while we
 285      * clear the flag on all others.
 286      */
 287
 288     solvent_parameters = *solvent_parameters_p;
 289
 290     /* Mark the cg first as non optimized */
 291     *cg_sp = -1;
 292
 293     /* Check if this cg has no exclusions with atoms in other charge groups
 294      * and all atoms inside the charge group excluded.
 295      * We only have 3 or 4 atom solvent loops.
 296      */
 297     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 298         !GET_CGINFO_EXCL_INTRA(cginfo))
 299     {
 300         return;
 301     }
 302
 303     /* Get the indices of the first atom in this charge group */
 304     j0     = molt->cgs.index[cg0];
 305     j1     = molt->cgs.index[cg0+1];
 306
 307     /* Number of atoms in our molecule */
 308     nj     = j1 - j0;
 309
 310     if (debug)
 311     {
 312         fprintf(debug,
 313                 "Moltype '%s': there are %d atoms in this charge group\n",
 314                 *molt->name, nj);
 315     }
 316
 317     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 318      * otherwise skip it.
 319      */
 320     if (nj < 3 || nj > 4)
 321     {
 322         return;
 323     }
 324
 325     /* Check if we are doing QM on this group */
 326     qm = FALSE;
 327     if (qm_grpnr != NULL)
 328     {
 329         for (j = j0; j < j1 && !qm; j++)
 330         {
 331             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 332         }
 333     }
 334     /* Cannot use solvent optimization with QM */
 335     if (qm)
 336     {
 337         return;
 338     }
 339
 340     atom = molt->atoms.atom;
 341
 342     /* Still looks like a solvent, time to check parameters */
 343
 344     /* If it is perturbed (free energy) we can't use the solvent loops,
 345      * so then we just skip to the next molecule.
 346      */
 347     perturbed = FALSE;
 348
 349     for (j = j0; j < j1 && !perturbed; j++)
 350     {
 351         perturbed = PERTURBED(atom[j]);
 352     }
 353
 354     if (perturbed)
 355     {
 356         return;
 357     }
 358
 359     /* Now it's only a question if the VdW and charge parameters
 360      * are OK. Before doing the check we compare and see if they are
 361      * identical to a possible previous solvent type.
 362      * First we assign the current types and charges.
 363      */
 364     for (j = 0; j < nj; j++)
 365     {
 366         tmp_vdwtype[j] = atom[j0+j].type;
 367         tmp_charge[j]  = atom[j0+j].q;
 368     }
 369
 370     /* Does it match any previous solvent type? */
 371     for (k = 0; k < *n_solvent_parameters; k++)
 372     {
 373         match = TRUE;
 374
 375
 376         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 377         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 378              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 379         {
 380             match = FALSE;
 381         }
 382
 383         /* Check that types & charges match for all atoms in molecule */
 384         for (j = 0; j < nj && match == TRUE; j++)
 385         {
 386             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 387             {
 388                 match = FALSE;
 389             }
 390             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 391             {
 392                 match = FALSE;
 393             }
 394         }
 395         if (match == TRUE)
 396         {
 397             /* Congratulations! We have a matched solvent.
 398              * Flag it with this type for later processing.
 399              */
 400             *cg_sp = k;
 401             solvent_parameters[k].count += nmol;
 402
 403             /* We are done with this charge group */
 404             return;
 405         }
 406     }
 407
 408     /* If we get here, we have a tentative new solvent type.
 409      * Before we add it we must check that it fulfills the requirements
 410      * of the solvent optimized loops. First determine which atoms have
 411      * VdW interactions.
 412      */
 413     for (j = 0; j < nj; j++)
 414     {
 415         has_vdw[j] = FALSE;
 416         tjA        = tmp_vdwtype[j];
 417
 418         /* Go through all other tpes and see if any have non-zero
 419          * VdW parameters when combined with this one.
 420          */
 421         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 422         {
 423             /* We already checked that the atoms weren't perturbed,
 424              * so we only need to check state A now.
 425              */
 426             if (fr->bBHAM)
 427             {
 428                 has_vdw[j] = (has_vdw[j] ||
 429                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 430                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 431                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 432             }
 433             else
 434             {
 435                 /* Standard LJ */
 436                 has_vdw[j] = (has_vdw[j] ||
 437                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 438                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 439             }
 440         }
 441     }
 442
 443     /* Now we know all we need to make the final check and assignment. */
 444     if (nj == 3)
 445     {
 446         /* So, is it an SPC?
 447          * For this we require thatn all atoms have charge,
 448          * the charges on atom 2 & 3 should be the same, and only
 449          * atom 1 might have VdW.
 450          */
 451         if (has_vdw[1] == FALSE &&
 452             has_vdw[2] == FALSE &&
 453             tmp_charge[0]  != 0 &&
 454             tmp_charge[1]  != 0 &&
 455             tmp_charge[2]  == tmp_charge[1])
 456         {
 457             srenew(solvent_parameters, *n_solvent_parameters+1);
 458             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 459             solvent_parameters[*n_solvent_parameters].count = nmol;
 460             for (k = 0; k < 3; k++)
 461             {
 462                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 463                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 464             }
 465
 466             *cg_sp = *n_solvent_parameters;
 467             (*n_solvent_parameters)++;
 468         }
 469     }
 470     else if (nj == 4)
 471     {
 472         /* Or could it be a TIP4P?
 473          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 474          * Only atom 1 mght have VdW.
 475          */
 476         if (has_vdw[1] == FALSE &&
 477             has_vdw[2] == FALSE &&
 478             has_vdw[3] == FALSE &&
 479             tmp_charge[0]  == 0 &&
 480             tmp_charge[1]  != 0 &&
 481             tmp_charge[2]  == tmp_charge[1] &&
 482             tmp_charge[3]  != 0)
 483         {
 484             srenew(solvent_parameters, *n_solvent_parameters+1);
 485             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 486             solvent_parameters[*n_solvent_parameters].count = nmol;
 487             for (k = 0; k < 4; k++)
 488             {
 489                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 490                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 491             }
 492
 493             *cg_sp = *n_solvent_parameters;
 494             (*n_solvent_parameters)++;
 495         }
 496     }
 497
 498     *solvent_parameters_p = solvent_parameters;
 499 }
 500
 501 static void
 502 check_solvent(FILE  *                fp,
 503               const gmx_mtop_t  *    mtop,
 504               t_forcerec  *          fr,
 505               cginfo_mb_t           *cginfo_mb)
 506 {
 507     const t_block     *   cgs;
 508     const t_block     *   mols;
 509     const gmx_moltype_t  *molt;
 510     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 511     int                   n_solvent_parameters;
 512     solvent_parameters_t *solvent_parameters;
 513     int                 **cg_sp;
 514     int                   bestsp, bestsol;
 515
 516     if (debug)
 517     {
 518         fprintf(debug, "Going to determine what solvent types we have.\n");
 519     }
 520
 521     mols = &mtop->mols;
 522
 523     n_solvent_parameters = 0;
 524     solvent_parameters   = NULL;
 525     /* Allocate temporary array for solvent type */
 526     snew(cg_sp, mtop->nmolblock);
 527
 528     cg_offset = 0;
 529     at_offset = 0;
 530     for (mb = 0; mb < mtop->nmolblock; mb++)
 531     {
 532         molt = &mtop->moltype[mtop->molblock[mb].type];
 533         cgs  = &molt->cgs;
 534         /* Here we have to loop over all individual molecules
 535          * because we need to check for QMMM particles.
 536          */
 537         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 538         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 539         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 540         for (mol = 0; mol < nmol_ch; mol++)
 541         {
 542             cgm = mol*cgs->nr;
 543             am  = mol*cgs->index[cgs->nr];
 544             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 545             {
 546                 check_solvent_cg(molt, cg_mol, nmol,
 547                                  mtop->groups.grpnr[egcQMMM] ?
 548                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 549                                  &mtop->groups.grps[egcQMMM],
 550                                  fr,
 551                                  &n_solvent_parameters, &solvent_parameters,
 552                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 553                                  &cg_sp[mb][cgm+cg_mol]);
 554             }
 555         }
 556         cg_offset += cgs->nr;
 557         at_offset += cgs->index[cgs->nr];
 558     }
 559
 560     /* Puh! We finished going through all charge groups.
 561      * Now find the most common solvent model.
 562      */
 563
 564     /* Most common solvent this far */
 565     bestsp = -2;
 566     for (i = 0; i < n_solvent_parameters; i++)
 567     {
 568         if (bestsp == -2 ||
 569             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 570         {
 571             bestsp = i;
 572         }
 573     }
 574
 575     if (bestsp >= 0)
 576     {
 577         bestsol = solvent_parameters[bestsp].model;
 578     }
 579     else
 580     {
 581         bestsol = esolNO;
 582     }
 583
 584 #ifdef DISABLE_WATER_NLIST
 585     bestsol = esolNO;
 586 #endif
 587
 588     fr->nWatMol = 0;
 589     for (mb = 0; mb < mtop->nmolblock; mb++)
 590     {
 591         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 592         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 593         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 594         {
 595             if (cg_sp[mb][i] == bestsp)
 596             {
 597                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 598                 fr->nWatMol += nmol;
 599             }
 600             else
 601             {
 602                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 603             }
 604         }
 605         sfree(cg_sp[mb]);
 606     }
 607     sfree(cg_sp);
 608
 609     if (bestsol != esolNO && fp != NULL)
 610     {
 611         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 612                 esol_names[bestsol],
 613                 solvent_parameters[bestsp].count);
 614     }
 615
 616     sfree(solvent_parameters);
 617     fr->solvent_opt = bestsol;
 618 }
 619
 620 enum {
 621     acNONE = 0, acCONSTRAINT, acSETTLE
 622 };
 623
 624 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 625                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 626                                    gmx_bool *bFEP_NonBonded,
 627                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 628 {
 629     const t_block        *cgs;
 630     const t_blocka       *excl;
 631     const gmx_moltype_t  *molt;
 632     const gmx_molblock_t *molb;
 633     cginfo_mb_t          *cginfo_mb;
 634     gmx_bool             *type_VDW;
 635     int                  *cginfo;
 636     int                   cg_offset, a_offset, cgm, am;
 637     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 638     int                  *a_con;
 639     int                   ftype;
 640     int                   ia;
 641     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
 642
 643     ncg_tot = ncg_mtop(mtop);
 644     snew(cginfo_mb, mtop->nmolblock);
 645
 646     snew(type_VDW, fr->ntype);
 647     for (ai = 0; ai < fr->ntype; ai++)
 648     {
 649         type_VDW[ai] = FALSE;
 650         for (j = 0; j < fr->ntype; j++)
 651         {
 652             type_VDW[ai] = type_VDW[ai] ||
 653                 fr->bBHAM ||
 654                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 655                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 656         }
 657     }
 658
 659     *bFEP_NonBonded               = FALSE;
 660     *bExcl_IntraCGAll_InterCGNone = TRUE;
 661
 662     excl_nalloc = 10;
 663     snew(bExcl, excl_nalloc);
 664     cg_offset = 0;
 665     a_offset  = 0;
 666     for (mb = 0; mb < mtop->nmolblock; mb++)
 667     {
 668         molb = &mtop->molblock[mb];
 669         molt = &mtop->moltype[molb->type];
 670         cgs  = &molt->cgs;
 671         excl = &molt->excls;
 672
 673         /* Check if the cginfo is identical for all molecules in this block.
 674          * If so, we only need an array of the size of one molecule.
 675          * Otherwise we make an array of #mol times #cgs per molecule.
 676          */
 677         bId = TRUE;
 678         am  = 0;
 679         for (m = 0; m < molb->nmol; m++)
 680         {
 681             am = m*cgs->index[cgs->nr];
 682             for (cg = 0; cg < cgs->nr; cg++)
 683             {
 684                 a0 = cgs->index[cg];
 685                 a1 = cgs->index[cg+1];
 686                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 687                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 688                 {
 689                     bId = FALSE;
 690                 }
 691                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 692                 {
 693                     for (ai = a0; ai < a1; ai++)
 694                     {
 695                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 696                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 697                         {
 698                             bId = FALSE;
 699                         }
 700                     }
 701                 }
 702             }
 703         }
 704
 705         cginfo_mb[mb].cg_start = cg_offset;
 706         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 707         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 708         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 709         cginfo = cginfo_mb[mb].cginfo;
 710
 711         /* Set constraints flags for constrained atoms */
 712         snew(a_con, molt->atoms.nr);
 713         for (ftype = 0; ftype < F_NRE; ftype++)
 714         {
 715             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 716             {
 717                 int nral;
 718
 719                 nral = NRAL(ftype);
 720                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 721                 {
 722                     int a;
 723
 724                     for (a = 0; a < nral; a++)
 725                     {
 726                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 727                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 728                     }
 729                 }
 730             }
 731         }
 732
 733         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 734         {
 735             cgm = m*cgs->nr;
 736             am  = m*cgs->index[cgs->nr];
 737             for (cg = 0; cg < cgs->nr; cg++)
 738             {
 739                 a0 = cgs->index[cg];
 740                 a1 = cgs->index[cg+1];
 741
 742                 /* Store the energy group in cginfo */
 743                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 744                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 745
 746                 /* Check the intra/inter charge group exclusions */
 747                 if (a1-a0 > excl_nalloc)
 748                 {
 749                     excl_nalloc = a1 - a0;
 750                     srenew(bExcl, excl_nalloc);
 751                 }
 752                 /* bExclIntraAll: all intra cg interactions excluded
 753                  * bExclInter:    any inter cg interactions excluded
 754                  */
 755                 bExclIntraAll       = TRUE;
 756                 bExclInter          = FALSE;
 757                 bHaveVDW            = FALSE;
 758                 bHaveQ              = FALSE;
 759                 bHavePerturbedAtoms = FALSE;
 760                 for (ai = a0; ai < a1; ai++)
 761                 {
 762                     /* Check VDW and electrostatic interactions */
 763                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 764                                             type_VDW[molt->atoms.atom[ai].typeB]);
 765                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 766                                             molt->atoms.atom[ai].qB != 0);
 767
 768                     bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
 769
 770                     /* Clear the exclusion list for atom ai */
 771                     for (aj = a0; aj < a1; aj++)
 772                     {
 773                         bExcl[aj-a0] = FALSE;
 774                     }
 775                     /* Loop over all the exclusions of atom ai */
 776                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 777                     {
 778                         aj = excl->a[j];
 779                         if (aj < a0 || aj >= a1)
 780                         {
 781                             bExclInter = TRUE;
 782                         }
 783                         else
 784                         {
 785                             bExcl[aj-a0] = TRUE;
 786                         }
 787                     }
 788                     /* Check if ai excludes a0 to a1 */
 789                     for (aj = a0; aj < a1; aj++)
 790                     {
 791                         if (!bExcl[aj-a0])
 792                         {
 793                             bExclIntraAll = FALSE;
 794                         }
 795                     }
 796
 797                     switch (a_con[ai])
 798                     {
 799                         case acCONSTRAINT:
 800                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 801                             break;
 802                         case acSETTLE:
 803                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 804                             break;
 805                         default:
 806                             break;
 807                     }
 808                 }
 809                 if (bExclIntraAll)
 810                 {
 811                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 812                 }
 813                 if (bExclInter)
 814                 {
 815                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 816                 }
 817                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 818                 {
 819                     /* The size in cginfo is currently only read with DD */
 820                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 821                 }
 822                 if (bHaveVDW)
 823                 {
 824                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 825                 }
 826                 if (bHaveQ)
 827                 {
 828                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 829                 }
 830                 if (bHavePerturbedAtoms && fr->efep != efepNO)
 831                 {
 832                     SET_CGINFO_FEP(cginfo[cgm+cg]);
 833                     *bFEP_NonBonded = TRUE;
 834                 }
 835                 /* Store the charge group size */
 836                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 837
 838                 if (!bExclIntraAll || bExclInter)
 839                 {
 840                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 841                 }
 842             }
 843         }
 844
 845         sfree(a_con);
 846
 847         cg_offset += molb->nmol*cgs->nr;
 848         a_offset  += molb->nmol*cgs->index[cgs->nr];
 849     }
 850     sfree(bExcl);
 851
 852     /* the solvent optimizer is called after the QM is initialized,
 853      * because we don't want to have the QM subsystemto become an
 854      * optimized solvent
 855      */
 856
 857     check_solvent(fplog, mtop, fr, cginfo_mb);
 858
 859     if (getenv("GMX_NO_SOLV_OPT"))
 860     {
 861         if (fplog)
 862         {
 863             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 864                     "Disabling all solvent optimization\n");
 865         }
 866         fr->solvent_opt = esolNO;
 867     }
 868     if (bNoSolvOpt)
 869     {
 870         fr->solvent_opt = esolNO;
 871     }
 872     if (!fr->solvent_opt)
 873     {
 874         for (mb = 0; mb < mtop->nmolblock; mb++)
 875         {
 876             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 877             {
 878                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 879             }
 880         }
 881     }
 882
 883     return cginfo_mb;
 884 }
 885
 886 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 887 {
 888     int  ncg, mb, cg;
 889     int *cginfo;
 890
 891     ncg = cgi_mb[nmb-1].cg_end;
 892     snew(cginfo, ncg);
 893     mb = 0;
 894     for (cg = 0; cg < ncg; cg++)
 895     {
 896         while (cg >= cgi_mb[mb].cg_end)
 897         {
 898             mb++;
 899         }
 900         cginfo[cg] =
 901             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 902     }
 903
 904     return cginfo;
 905 }
 906
 907 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 908 {
 909     /*This now calculates sum for q and c6*/
 910     double         qsum, q2sum, q, c6sum, c6;
 911     int            mb, nmol, i;
 912     const t_atoms *atoms;
 913
 914     qsum   = 0;
 915     q2sum  = 0;
 916     c6sum  = 0;
 917     for (mb = 0; mb < mtop->nmolblock; mb++)
 918     {
 919         nmol  = mtop->molblock[mb].nmol;
 920         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 921         for (i = 0; i < atoms->nr; i++)
 922         {
 923             q       = atoms->atom[i].q;
 924             qsum   += nmol*q;
 925             q2sum  += nmol*q*q;
 926             c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 927             c6sum  += nmol*c6;
 928         }
 929     }
 930     fr->qsum[0]   = qsum;
 931     fr->q2sum[0]  = q2sum;
 932     fr->c6sum[0]  = c6sum;
 933
 934     if (fr->efep != efepNO)
 935     {
 936         qsum   = 0;
 937         q2sum  = 0;
 938         c6sum  = 0;
 939         for (mb = 0; mb < mtop->nmolblock; mb++)
 940         {
 941             nmol  = mtop->molblock[mb].nmol;
 942             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 943             for (i = 0; i < atoms->nr; i++)
 944             {
 945                 q       = atoms->atom[i].qB;
 946                 qsum   += nmol*q;
 947                 q2sum  += nmol*q*q;
 948                 c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 949                 c6sum  += nmol*c6;
 950             }
 951             fr->qsum[1]   = qsum;
 952             fr->q2sum[1]  = q2sum;
 953             fr->c6sum[1]  = c6sum;
 954         }
 955     }
 956     else
 957     {
 958         fr->qsum[1]   = fr->qsum[0];
 959         fr->q2sum[1]  = fr->q2sum[0];
 960         fr->c6sum[1]  = fr->c6sum[0];
 961     }
 962     if (log)
 963     {
 964         if (fr->efep == efepNO)
 965         {
 966             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 967         }
 968         else
 969         {
 970             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 971                     fr->qsum[0], fr->qsum[1]);
 972         }
 973     }
 974 }
 975
 976 void update_forcerec(t_forcerec *fr, matrix box)
 977 {
 978     if (fr->eeltype == eelGRF)
 979     {
 980         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 981                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 982                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 983     }
 984 }
 985
 986 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 987 {
 988     const t_atoms  *atoms, *atoms_tpi;
 989     const t_blocka *excl;
 990     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 991     gmx_int64_t     npair, npair_ij, tmpi, tmpj;
 992     double          csix, ctwelve;
 993     int             ntp, *typecount;
 994     gmx_bool        bBHAM;
 995     real           *nbfp;
 996     real           *nbfp_comb = NULL;
 997
 998     ntp   = fr->ntype;
 999     bBHAM = fr->bBHAM;
1000     nbfp  = fr->nbfp;
1001
1002     /* For LJ-PME, we want to correct for the difference between the
1003      * actual C6 values and the C6 values used by the LJ-PME based on
1004      * combination rules. */
1005
1006     if (EVDW_PME(fr->vdwtype))
1007     {
1008         nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1009                                              (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1010         for (tpi = 0; tpi < ntp; ++tpi)
1011         {
1012             for (tpj = 0; tpj < ntp; ++tpj)
1013             {
1014                 C6(nbfp_comb, ntp, tpi, tpj) =
1015                     C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1016                 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1017             }
1018         }
1019         nbfp = nbfp_comb;
1020     }
1021     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1022     {
1023         csix    = 0;
1024         ctwelve = 0;
1025         npair   = 0;
1026         nexcl   = 0;
1027         if (!fr->n_tpi)
1028         {
1029             /* Count the types so we avoid natoms^2 operations */
1030             snew(typecount, ntp);
1031             gmx_mtop_count_atomtypes(mtop, q, typecount);
1032
1033             for (tpi = 0; tpi < ntp; tpi++)
1034             {
1035                 for (tpj = tpi; tpj < ntp; tpj++)
1036                 {
1037                     tmpi = typecount[tpi];
1038                     tmpj = typecount[tpj];
1039                     if (tpi != tpj)
1040                     {
1041                         npair_ij = tmpi*tmpj;
1042                     }
1043                     else
1044                     {
1045                         npair_ij = tmpi*(tmpi - 1)/2;
1046                     }
1047                     if (bBHAM)
1048                     {
1049                         /* nbfp now includes the 6.0 derivative prefactor */
1050                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1051                     }
1052                     else
1053                     {
1054                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
1055                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
1056                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
1057                     }
1058                     npair += npair_ij;
1059                 }
1060             }
1061             sfree(typecount);
1062             /* Subtract the excluded pairs.
1063              * The main reason for substracting exclusions is that in some cases
1064              * some combinations might never occur and the parameters could have
1065              * any value. These unused values should not influence the dispersion
1066              * correction.
1067              */
1068             for (mb = 0; mb < mtop->nmolblock; mb++)
1069             {
1070                 nmol  = mtop->molblock[mb].nmol;
1071                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1072                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
1073                 for (i = 0; (i < atoms->nr); i++)
1074                 {
1075                     if (q == 0)
1076                     {
1077                         tpi = atoms->atom[i].type;
1078                     }
1079                     else
1080                     {
1081                         tpi = atoms->atom[i].typeB;
1082                     }
1083                     j1  = excl->index[i];
1084                     j2  = excl->index[i+1];
1085                     for (j = j1; j < j2; j++)
1086                     {
1087                         k = excl->a[j];
1088                         if (k > i)
1089                         {
1090                             if (q == 0)
1091                             {
1092                                 tpj = atoms->atom[k].type;
1093                             }
1094                             else
1095                             {
1096                                 tpj = atoms->atom[k].typeB;
1097                             }
1098                             if (bBHAM)
1099                             {
1100                                 /* nbfp now includes the 6.0 derivative prefactor */
1101                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1102                             }
1103                             else
1104                             {
1105                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1106                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1107                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1108                             }
1109                             nexcl += nmol;
1110                         }
1111                     }
1112                 }
1113             }
1114         }
1115         else
1116         {
1117             /* Only correct for the interaction of the test particle
1118              * with the rest of the system.
1119              */
1120             atoms_tpi =
1121                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1122
1123             npair = 0;
1124             for (mb = 0; mb < mtop->nmolblock; mb++)
1125             {
1126                 nmol  = mtop->molblock[mb].nmol;
1127                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1128                 for (j = 0; j < atoms->nr; j++)
1129                 {
1130                     nmolc = nmol;
1131                     /* Remove the interaction of the test charge group
1132                      * with itself.
1133                      */
1134                     if (mb == mtop->nmolblock-1)
1135                     {
1136                         nmolc--;
1137
1138                         if (mb == 0 && nmol == 1)
1139                         {
1140                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1141                         }
1142                     }
1143                     if (q == 0)
1144                     {
1145                         tpj = atoms->atom[j].type;
1146                     }
1147                     else
1148                     {
1149                         tpj = atoms->atom[j].typeB;
1150                     }
1151                     for (i = 0; i < fr->n_tpi; i++)
1152                     {
1153                         if (q == 0)
1154                         {
1155                             tpi = atoms_tpi->atom[i].type;
1156                         }
1157                         else
1158                         {
1159                             tpi = atoms_tpi->atom[i].typeB;
1160                         }
1161                         if (bBHAM)
1162                         {
1163                             /* nbfp now includes the 6.0 derivative prefactor */
1164                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1165                         }
1166                         else
1167                         {
1168                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1169                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1170                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1171                         }
1172                         npair += nmolc;
1173                     }
1174                 }
1175             }
1176         }
1177         if (npair - nexcl <= 0 && fplog)
1178         {
1179             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1180             csix     = 0;
1181             ctwelve  = 0;
1182         }
1183         else
1184         {
1185             csix    /= npair - nexcl;
1186             ctwelve /= npair - nexcl;
1187         }
1188         if (debug)
1189         {
1190             fprintf(debug, "Counted %d exclusions\n", nexcl);
1191             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1192             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1193         }
1194         fr->avcsix[q]    = csix;
1195         fr->avctwelve[q] = ctwelve;
1196     }
1197
1198     if (EVDW_PME(fr->vdwtype))
1199     {
1200         sfree(nbfp_comb);
1201     }
1202
1203     if (fplog != NULL)
1204     {
1205         if (fr->eDispCorr == edispcAllEner ||
1206             fr->eDispCorr == edispcAllEnerPres)
1207         {
1208             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1209                     fr->avcsix[0], fr->avctwelve[0]);
1210         }
1211         else
1212         {
1213             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1214         }
1215     }
1216 }
1217
1218
1219 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1220                            const gmx_mtop_t *mtop)
1221 {
1222     const t_atoms *at1, *at2;
1223     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1224     real           b, bmin;
1225     real          *nbfp;
1226
1227     if (fplog)
1228     {
1229         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1230     }
1231     nbfp   = fr->nbfp;
1232     ntypes = fr->ntype;
1233
1234     bmin           = -1;
1235     fr->bham_b_max = 0;
1236     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1237     {
1238         at1 = &mtop->moltype[mt1].atoms;
1239         for (i = 0; (i < at1->nr); i++)
1240         {
1241             tpi = at1->atom[i].type;
1242             if (tpi >= ntypes)
1243             {
1244                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1245             }
1246
1247             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1248             {
1249                 at2 = &mtop->moltype[mt2].atoms;
1250                 for (j = 0; (j < at2->nr); j++)
1251                 {
1252                     tpj = at2->atom[j].type;
1253                     if (tpj >= ntypes)
1254                     {
1255                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1256                     }
1257                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1258                     if (b > fr->bham_b_max)
1259                     {
1260                         fr->bham_b_max = b;
1261                     }
1262                     if ((b < bmin) || (bmin == -1))
1263                     {
1264                         bmin = b;
1265                     }
1266                 }
1267             }
1268         }
1269     }
1270     if (fplog)
1271     {
1272         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1273                 bmin, fr->bham_b_max);
1274     }
1275 }
1276
1277 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1278                             t_forcerec *fr, real rtab,
1279                             const t_commrec *cr,
1280                             const char *tabfn, char *eg1, char *eg2,
1281                             t_nblists *nbl)
1282 {
1283     char buf[STRLEN];
1284     int  i, j;
1285
1286     if (tabfn == NULL)
1287     {
1288         if (debug)
1289         {
1290             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1291         }
1292         return;
1293     }
1294
1295     sprintf(buf, "%s", tabfn);
1296     if (eg1 && eg2)
1297     {
1298         /* Append the two energy group names */
1299         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1300                 eg1, eg2, ftp2ext(efXVG));
1301     }
1302     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1303     /* Copy the contents of the table to separate coulomb and LJ tables too,
1304      * to improve cache performance.
1305      */
1306     /* For performance reasons we want
1307      * the table data to be aligned to 16-byte. The pointers could be freed
1308      * but currently aren't.
1309      */
1310     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1311     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1312     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1313     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1314     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1315     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1316     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1317     nbl->table_elec.ninteractions = 1;
1318     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1319     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1320
1321     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1322     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1323     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1324     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1325     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1326     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1327     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1328     nbl->table_vdw.ninteractions = 2;
1329     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1330     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1331
1332     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1333     {
1334         for (j = 0; j < 4; j++)
1335         {
1336             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1337         }
1338         for (j = 0; j < 8; j++)
1339         {
1340             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1341         }
1342     }
1343 }
1344
1345 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1346                          int *ncount, int **count)
1347 {
1348     const gmx_moltype_t *molt;
1349     const t_ilist       *il;
1350     int                  mt, ftype, stride, i, j, tabnr;
1351
1352     for (mt = 0; mt < mtop->nmoltype; mt++)
1353     {
1354         molt = &mtop->moltype[mt];
1355         for (ftype = 0; ftype < F_NRE; ftype++)
1356         {
1357             if (ftype == ftype1 || ftype == ftype2)
1358             {
1359                 il     = &molt->ilist[ftype];
1360                 stride = 1 + NRAL(ftype);
1361                 for (i = 0; i < il->nr; i += stride)
1362                 {
1363                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1364                     if (tabnr < 0)
1365                     {
1366                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1367                     }
1368                     if (tabnr >= *ncount)
1369                     {
1370                         srenew(*count, tabnr+1);
1371                         for (j = *ncount; j < tabnr+1; j++)
1372                         {
1373                             (*count)[j] = 0;
1374                         }
1375                         *ncount = tabnr+1;
1376                     }
1377                     (*count)[tabnr]++;
1378                 }
1379             }
1380         }
1381     }
1382 }
1383
1384 static bondedtable_t *make_bonded_tables(FILE *fplog,
1385                                          int ftype1, int ftype2,
1386                                          const gmx_mtop_t *mtop,
1387                                          const char *basefn, const char *tabext)
1388 {
1389     int            i, ncount, *count;
1390     char           tabfn[STRLEN];
1391     bondedtable_t *tab;
1392
1393     tab = NULL;
1394
1395     ncount = 0;
1396     count  = NULL;
1397     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1398
1399     if (ncount > 0)
1400     {
1401         snew(tab, ncount);
1402         for (i = 0; i < ncount; i++)
1403         {
1404             if (count[i] > 0)
1405             {
1406                 sprintf(tabfn, "%s", basefn);
1407                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1408                         tabext, i, ftp2ext(efXVG));
1409                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1410             }
1411         }
1412         sfree(count);
1413     }
1414
1415     return tab;
1416 }
1417
1418 void forcerec_set_ranges(t_forcerec *fr,
1419                          int ncg_home, int ncg_force,
1420                          int natoms_force,
1421                          int natoms_force_constr, int natoms_f_novirsum)
1422 {
1423     fr->cg0 = 0;
1424     fr->hcg = ncg_home;
1425
1426     /* fr->ncg_force is unused in the standard code,
1427      * but it can be useful for modified code dealing with charge groups.
1428      */
1429     fr->ncg_force           = ncg_force;
1430     fr->natoms_force        = natoms_force;
1431     fr->natoms_force_constr = natoms_force_constr;
1432
1433     if (fr->natoms_force_constr > fr->nalloc_force)
1434     {
1435         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1436
1437         if (fr->bTwinRange)
1438         {
1439             srenew(fr->f_twin, fr->nalloc_force);
1440         }
1441     }
1442
1443     if (fr->bF_NoVirSum)
1444     {
1445         fr->f_novirsum_n = natoms_f_novirsum;
1446         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1447         {
1448             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1449             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1450         }
1451     }
1452     else
1453     {
1454         fr->f_novirsum_n = 0;
1455     }
1456 }
1457
1458 static real cutoff_inf(real cutoff)
1459 {
1460     if (cutoff == 0)
1461     {
1462         cutoff = GMX_CUTOFF_INF;
1463     }
1464
1465     return cutoff;
1466 }
1467
1468 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1469                                   t_forcerec *fr, const t_inputrec *ir,
1470                                   const char *tabfn, const gmx_mtop_t *mtop,
1471                                   matrix     box)
1472 {
1473     char buf[STRLEN];
1474     int  i, j;
1475
1476     if (tabfn == NULL)
1477     {
1478         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1479         return;
1480     }
1481
1482     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1483
1484     sprintf(buf, "%s", tabfn);
1485     for (i = 0; i < ir->adress->n_tf_grps; i++)
1486     {
1487         j = ir->adress->tf_table_index[i]; /* get energy group index */
1488         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1489                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1490         if (fp)
1491         {
1492             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1493         }
1494         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1495     }
1496
1497 }
1498
1499 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1500 {
1501     gmx_bool bAllvsAll;
1502
1503     bAllvsAll =
1504         (
1505             ir->rlist == 0            &&
1506             ir->rcoulomb == 0         &&
1507             ir->rvdw == 0             &&
1508             ir->ePBC == epbcNONE      &&
1509             ir->vdwtype == evdwCUT    &&
1510             ir->coulombtype == eelCUT &&
1511             ir->efep == efepNO        &&
1512             (ir->implicit_solvent == eisNO ||
1513              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1514                                                   ir->gb_algorithm == egbHCT   ||
1515                                                   ir->gb_algorithm == egbOBC))) &&
1516             getenv("GMX_NO_ALLVSALL") == NULL
1517         );
1518
1519     if (bAllvsAll && ir->opts.ngener > 1)
1520     {
1521         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1522
1523         if (bPrintNote)
1524         {
1525             if (MASTER(cr))
1526             {
1527                 fprintf(stderr, "\n%s\n", note);
1528             }
1529             if (fp != NULL)
1530             {
1531                 fprintf(fp, "\n%s\n", note);
1532             }
1533         }
1534         bAllvsAll = FALSE;
1535     }
1536
1537     if (bAllvsAll && fp && MASTER(cr))
1538     {
1539         fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1540     }
1541
1542     return bAllvsAll;
1543 }
1544
1545
1546 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1547 {
1548     int t, i;
1549
1550     /* These thread local data structures are used for bondeds only */
1551     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1552
1553     if (fr->nthreads > 1)
1554     {
1555         snew(fr->f_t, fr->nthreads);
1556         /* Thread 0 uses the global force and energy arrays */
1557         for (t = 1; t < fr->nthreads; t++)
1558         {
1559             fr->f_t[t].f        = NULL;
1560             fr->f_t[t].f_nalloc = 0;
1561             snew(fr->f_t[t].fshift, SHIFTS);
1562             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1563             for (i = 0; i < egNR; i++)
1564             {
1565                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1566             }
1567         }
1568     }
1569 }
1570
1571
1572 gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
1573                                       const t_commrec  *cr,
1574                                       const t_inputrec *ir,
1575                                       gmx_bool          bGPU)
1576 {
1577     if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1578     {
1579         md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1580                       bGPU ? "GPUs" : "SIMD kernels",
1581                       bGPU ? "CPU only" : "plain-C kernels");
1582         return FALSE;
1583     }
1584
1585     return TRUE;
1586 }
1587
1588
1589 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1590                                   int                         *kernel_type,
1591                                   int                         *ewald_excl)
1592 {
1593     *kernel_type = nbnxnk4x4_PlainC;
1594     *ewald_excl  = ewaldexclTable;
1595
1596 #ifdef GMX_NBNXN_SIMD
1597     {
1598 #ifdef GMX_NBNXN_SIMD_4XN
1599         *kernel_type = nbnxnk4xN_SIMD_4xN;
1600 #endif
1601 #ifdef GMX_NBNXN_SIMD_2XNN
1602         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1603 #endif
1604
1605 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1606         /* We need to choose if we want 2x(N+N) or 4xN kernels.
1607          * Currently this is based on the SIMD acceleration choice,
1608          * but it might be better to decide this at runtime based on CPU.
1609          *
1610          * 4xN calculates more (zero) interactions, but has less pair-search
1611          * work and much better kernel instruction scheduling.
1612          *
1613          * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1614          * which doesn't have FMA, both the analytical and tabulated Ewald
1615          * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1616          * 2x(4+4) because it results in significantly fewer pairs.
1617          * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1618          * 10% with HT, 50% without HT. As we currently don't detect the actual
1619          * use of HT, use 4x8 to avoid a potential performance hit.
1620          * On Intel Haswell 4x8 is always faster.
1621          */
1622         *kernel_type = nbnxnk4xN_SIMD_4xN;
1623
1624 #ifndef GMX_SIMD_HAVE_FMA
1625         if (EEL_PME_EWALD(ir->coulombtype) ||
1626             EVDW_PME(ir->vdwtype))
1627         {
1628             /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1629              * There are enough instructions to make 2x(4+4) efficient.
1630              */
1631             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1632         }
1633 #endif
1634 #endif  /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1635
1636
1637         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1638         {
1639 #ifdef GMX_NBNXN_SIMD_4XN
1640             *kernel_type = nbnxnk4xN_SIMD_4xN;
1641 #else
1642             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1643 #endif
1644         }
1645         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1646         {
1647 #ifdef GMX_NBNXN_SIMD_2XNN
1648             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1649 #else
1650             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1651 #endif
1652         }
1653
1654         /* Analytical Ewald exclusion correction is only an option in
1655          * the SIMD kernel.
1656          * Since table lookup's don't parallelize with SIMD, analytical
1657          * will probably always be faster for a SIMD width of 8 or more.
1658          * With FMA analytical is sometimes faster for a width if 4 as well.
1659          * On BlueGene/Q, this is faster regardless of precision.
1660          * In single precision, this is faster on Bulldozer.
1661          */
1662 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1663         (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1664         defined GMX_SIMD_IBM_QPX
1665         *ewald_excl = ewaldexclAnalytical;
1666 #endif
1667         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1668         {
1669             *ewald_excl = ewaldexclTable;
1670         }
1671         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1672         {
1673             *ewald_excl = ewaldexclAnalytical;
1674         }
1675
1676     }
1677 #endif /* GMX_NBNXN_SIMD */
1678 }
1679
1680
1681 const char *lookup_nbnxn_kernel_name(int kernel_type)
1682 {
1683     const char *returnvalue = NULL;
1684     switch (kernel_type)
1685     {
1686         case nbnxnkNotSet:
1687             returnvalue = "not set";
1688             break;
1689         case nbnxnk4x4_PlainC:
1690             returnvalue = "plain C";
1691             break;
1692         case nbnxnk4xN_SIMD_4xN:
1693         case nbnxnk4xN_SIMD_2xNN:
1694 #ifdef GMX_NBNXN_SIMD
1695 #if defined GMX_SIMD_X86_SSE2
1696             returnvalue = "SSE2";
1697 #elif defined GMX_SIMD_X86_SSE4_1
1698             returnvalue = "SSE4.1";
1699 #elif defined GMX_SIMD_X86_AVX_128_FMA
1700             returnvalue = "AVX_128_FMA";
1701 #elif defined GMX_SIMD_X86_AVX_256
1702             returnvalue = "AVX_256";
1703 #elif defined GMX_SIMD_X86_AVX2_256
1704             returnvalue = "AVX2_256";
1705 #else
1706             returnvalue = "SIMD";
1707 #endif
1708 #else  /* GMX_NBNXN_SIMD */
1709             returnvalue = "not available";
1710 #endif /* GMX_NBNXN_SIMD */
1711             break;
1712         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1713         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1714
1715         case nbnxnkNR:
1716         default:
1717             gmx_fatal(FARGS, "Illegal kernel type selected");
1718             returnvalue = NULL;
1719             break;
1720     }
1721     return returnvalue;
1722 };
1723
1724 static void pick_nbnxn_kernel(FILE                *fp,
1725                               const t_commrec     *cr,
1726                               gmx_bool             use_simd_kernels,
1727                               gmx_bool             bUseGPU,
1728                               gmx_bool             bEmulateGPU,
1729                               const t_inputrec    *ir,
1730                               int                 *kernel_type,
1731                               int                 *ewald_excl,
1732                               gmx_bool             bDoNonbonded)
1733 {
1734     assert(kernel_type);
1735
1736     *kernel_type = nbnxnkNotSet;
1737     *ewald_excl  = ewaldexclTable;
1738
1739     if (bEmulateGPU)
1740     {
1741         *kernel_type = nbnxnk8x8x8_PlainC;
1742
1743         if (bDoNonbonded)
1744         {
1745             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1746         }
1747     }
1748     else if (bUseGPU)
1749     {
1750         *kernel_type = nbnxnk8x8x8_CUDA;
1751     }
1752
1753     if (*kernel_type == nbnxnkNotSet)
1754     {
1755         /* LJ PME with LB combination rule does 7 mesh operations.
1756          * This so slow that we don't compile SIMD non-bonded kernels for that.
1757          */
1758         if (use_simd_kernels &&
1759             nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1760         {
1761             pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1762         }
1763         else
1764         {
1765             *kernel_type = nbnxnk4x4_PlainC;
1766         }
1767     }
1768
1769     if (bDoNonbonded && fp != NULL)
1770     {
1771         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1772                 lookup_nbnxn_kernel_name(*kernel_type),
1773                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1774                 nbnxn_kernel_to_cj_size(*kernel_type));
1775     }
1776 }
1777
1778 static void pick_nbnxn_resources(const t_commrec     *cr,
1779                                  const gmx_hw_info_t *hwinfo,
1780                                  gmx_bool             bDoNonbonded,
1781                                  gmx_bool            *bUseGPU,
1782                                  gmx_bool            *bEmulateGPU,
1783                                  const gmx_gpu_opt_t *gpu_opt)
1784 {
1785     gmx_bool bEmulateGPUEnvVarSet;
1786     char     gpu_err_str[STRLEN];
1787
1788     *bUseGPU = FALSE;
1789
1790     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1791
1792     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1793      * GPUs (currently) only handle non-bonded calculations, we will
1794      * automatically switch to emulation if non-bonded calculations are
1795      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1796      * way to turn off GPU initialization, data movement, and cleanup.
1797      *
1798      * GPU emulation can be useful to assess the performance one can expect by
1799      * adding GPU(s) to the machine. The conditional below allows this even
1800      * if mdrun is compiled without GPU acceleration support.
1801      * Note that you should freezing the system as otherwise it will explode.
1802      */
1803     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1804                     (!bDoNonbonded &&
1805                      gpu_opt->ncuda_dev_use > 0));
1806
1807     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1808      */
1809     if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1810     {
1811         /* Each PP node will use the intra-node id-th device from the
1812          * list of detected/selected GPUs. */
1813         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1814                       &hwinfo->gpu_info, gpu_opt))
1815         {
1816             /* At this point the init should never fail as we made sure that
1817              * we have all the GPUs we need. If it still does, we'll bail. */
1818             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1819                       cr->nodeid,
1820                       get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1821                                         cr->rank_pp_intranode),
1822                       gpu_err_str);
1823         }
1824
1825         /* Here we actually turn on hardware GPU acceleration */
1826         *bUseGPU = TRUE;
1827     }
1828 }
1829
1830 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1831                             nonbonded_verlet_t *nbv,
1832                             int                 group)
1833 {
1834     gmx_bool bUsesSimpleTables = TRUE;
1835     int      grp_index;
1836
1837     switch (cutoff_scheme)
1838     {
1839         case ecutsGROUP:
1840             bUsesSimpleTables = TRUE;
1841             break;
1842         case ecutsVERLET:
1843             assert(NULL != nbv && NULL != nbv->grp);
1844             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1845             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1846             break;
1847         default:
1848             gmx_incons("unimplemented");
1849     }
1850     return bUsesSimpleTables;
1851 }
1852
1853 static void init_ewald_f_table(interaction_const_t *ic,
1854                                gmx_bool             bUsesSimpleTables,
1855                                real                 rtab)
1856 {
1857     real maxr;
1858
1859     if (bUsesSimpleTables)
1860     {
1861         /* With a spacing of 0.0005 we are at the force summation accuracy
1862          * for the SSE kernels for "normal" atomistic simulations.
1863          */
1864         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1865                                                    ic->rcoulomb);
1866
1867         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1868         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1869     }
1870     else
1871     {
1872         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1873         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1874         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1875     }
1876
1877     sfree_aligned(ic->tabq_coul_FDV0);
1878     sfree_aligned(ic->tabq_coul_F);
1879     sfree_aligned(ic->tabq_coul_V);
1880
1881     sfree_aligned(ic->tabq_vdw_FDV0);
1882     sfree_aligned(ic->tabq_vdw_F);
1883     sfree_aligned(ic->tabq_vdw_V);
1884
1885     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1886     {
1887         /* Create the original table data in FDV0 */
1888         snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1889         snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1890         snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1891         table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1892                                     ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1893     }
1894
1895     if (EVDW_PME(ic->vdwtype))
1896     {
1897         snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1898         snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1899         snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1900         table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1901                                     ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1902     }
1903 }
1904
1905 void init_interaction_const_tables(FILE                *fp,
1906                                    interaction_const_t *ic,
1907                                    gmx_bool             bUsesSimpleTables,
1908                                    real                 rtab)
1909 {
1910     real spacing;
1911
1912     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1913     {
1914         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1915
1916         if (fp != NULL)
1917         {
1918             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1919                     1/ic->tabq_scale, ic->tabq_size);
1920         }
1921     }
1922 }
1923
1924 static void clear_force_switch_constants(shift_consts_t *sc)
1925 {
1926     sc->c2   = 0;
1927     sc->c3   = 0;
1928     sc->cpot = 0;
1929 }
1930
1931 static void force_switch_constants(real p,
1932                                    real rsw, real rc,
1933                                    shift_consts_t *sc)
1934 {
1935     /* Here we determine the coefficient for shifting the force to zero
1936      * between distance rsw and the cut-off rc.
1937      * For a potential of r^-p, we have force p*r^-(p+1).
1938      * But to save flops we absorb p in the coefficient.
1939      * Thus we get:
1940      * force/p   = r^-(p+1) + c2*r^2 + c3*r^3
1941      * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1942      */
1943     sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1944     sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1945     sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1946 }
1947
1948 static void potential_switch_constants(real rsw, real rc,
1949                                        switch_consts_t *sc)
1950 {
1951     /* The switch function is 1 at rsw and 0 at rc.
1952      * The derivative and second derivate are zero at both ends.
1953      * rsw        = max(r - r_switch, 0)
1954      * sw         = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1955      * dsw        = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1956      * force      = force*dsw - potential*sw
1957      * potential *= sw
1958      */
1959     sc->c3 = -10*pow(rc - rsw, -3);
1960     sc->c4 =  15*pow(rc - rsw, -4);
1961     sc->c5 =  -6*pow(rc - rsw, -5);
1962 }
1963
1964 static void
1965 init_interaction_const(FILE                       *fp,
1966                        const t_commrec gmx_unused *cr,
1967                        interaction_const_t       **interaction_const,
1968                        const t_forcerec           *fr,
1969                        real                        rtab)
1970 {
1971     interaction_const_t *ic;
1972     gmx_bool             bUsesSimpleTables = TRUE;
1973
1974     snew(ic, 1);
1975
1976     /* Just allocate something so we can free it */
1977     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1978     snew_aligned(ic->tabq_coul_F, 16, 32);
1979     snew_aligned(ic->tabq_coul_V, 16, 32);
1980
1981     ic->rlist           = fr->rlist;
1982     ic->rlistlong       = fr->rlistlong;
1983
1984     /* Lennard-Jones */
1985     ic->vdwtype         = fr->vdwtype;
1986     ic->vdw_modifier    = fr->vdw_modifier;
1987     ic->rvdw            = fr->rvdw;
1988     ic->rvdw_switch     = fr->rvdw_switch;
1989     ic->ewaldcoeff_lj   = fr->ewaldcoeff_lj;
1990     ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1991     ic->sh_lj_ewald     = 0;
1992     clear_force_switch_constants(&ic->dispersion_shift);
1993     clear_force_switch_constants(&ic->repulsion_shift);
1994
1995     switch (ic->vdw_modifier)
1996     {
1997         case eintmodPOTSHIFT:
1998             /* Only shift the potential, don't touch the force */
1999             ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2000             ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
2001             if (EVDW_PME(ic->vdwtype))
2002             {
2003                 real crc2;
2004
2005                 crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2006                 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2007             }
2008             break;
2009         case eintmodFORCESWITCH:
2010             /* Switch the force, switch and shift the potential */
2011             force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2012                                    &ic->dispersion_shift);
2013             force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2014                                    &ic->repulsion_shift);
2015             break;
2016         case eintmodPOTSWITCH:
2017             /* Switch the potential and force */
2018             potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2019                                        &ic->vdw_switch);
2020             break;
2021         case eintmodNONE:
2022         case eintmodEXACTCUTOFF:
2023             /* Nothing to do here */
2024             break;
2025         default:
2026             gmx_incons("unimplemented potential modifier");
2027     }
2028
2029     ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2030
2031     /* Electrostatics */
2032     ic->eeltype          = fr->eeltype;
2033     ic->coulomb_modifier = fr->coulomb_modifier;
2034     ic->rcoulomb         = fr->rcoulomb;
2035     ic->epsilon_r        = fr->epsilon_r;
2036     ic->epsfac           = fr->epsfac;
2037     ic->ewaldcoeff_q     = fr->ewaldcoeff_q;
2038
2039     if (fr->coulomb_modifier == eintmodPOTSHIFT)
2040     {
2041         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2042     }
2043     else
2044     {
2045         ic->sh_ewald = 0;
2046     }
2047
2048     /* Reaction-field */
2049     if (EEL_RF(ic->eeltype))
2050     {
2051         ic->epsilon_rf = fr->epsilon_rf;
2052         ic->k_rf       = fr->k_rf;
2053         ic->c_rf       = fr->c_rf;
2054     }
2055     else
2056     {
2057         /* For plain cut-off we might use the reaction-field kernels */
2058         ic->epsilon_rf = ic->epsilon_r;
2059         ic->k_rf       = 0;
2060         if (fr->coulomb_modifier == eintmodPOTSHIFT)
2061         {
2062             ic->c_rf   = 1/ic->rcoulomb;
2063         }
2064         else
2065         {
2066             ic->c_rf   = 0;
2067         }
2068     }
2069
2070     if (fp != NULL)
2071     {
2072         real dispersion_shift;
2073
2074         dispersion_shift = ic->dispersion_shift.cpot;
2075         if (EVDW_PME(ic->vdwtype))
2076         {
2077             dispersion_shift -= ic->sh_lj_ewald;
2078         }
2079         fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2080                 ic->repulsion_shift.cpot, dispersion_shift);
2081
2082         if (ic->eeltype == eelCUT)
2083         {
2084             fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2085         }
2086         else if (EEL_PME(ic->eeltype))
2087         {
2088             fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2089         }
2090         fprintf(fp, "\n");
2091     }
2092
2093     *interaction_const = ic;
2094
2095     if (fr->nbv != NULL && fr->nbv->bUseGPU)
2096     {
2097         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2098
2099         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2100          * also sharing texture references. To keep the code simple, we don't
2101          * treat texture references as shared resources, but this means that
2102          * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2103          * Hence, to ensure that the non-bonded kernels don't start before all
2104          * texture binding operations are finished, we need to wait for all ranks
2105          * to arrive here before continuing.
2106          *
2107          * Note that we could omit this barrier if GPUs are not shared (or
2108          * texture objects are used), but as this is initialization code, there
2109          * is not point in complicating things.
2110          */
2111 #ifdef GMX_THREAD_MPI
2112         if (PAR(cr))
2113         {
2114             gmx_barrier(cr);
2115         }
2116 #endif  /* GMX_THREAD_MPI */
2117     }
2118
2119     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2120     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2121 }
2122
2123 static void init_nb_verlet(FILE                *fp,
2124                            nonbonded_verlet_t **nb_verlet,
2125                            gmx_bool             bFEP_NonBonded,
2126                            const t_inputrec    *ir,
2127                            const t_forcerec    *fr,
2128                            const t_commrec     *cr,
2129                            const char          *nbpu_opt)
2130 {
2131     nonbonded_verlet_t *nbv;
2132     int                 i;
2133     char               *env;
2134     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
2135
2136     nbnxn_alloc_t      *nb_alloc;
2137     nbnxn_free_t       *nb_free;
2138
2139     snew(nbv, 1);
2140
2141     pick_nbnxn_resources(cr, fr->hwinfo,
2142                          fr->bNonbonded,
2143                          &nbv->bUseGPU,
2144                          &bEmulateGPU,
2145                          fr->gpu_opt);
2146
2147     nbv->nbs = NULL;
2148
2149     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2150     for (i = 0; i < nbv->ngrp; i++)
2151     {
2152         nbv->grp[i].nbl_lists.nnbl = 0;
2153         nbv->grp[i].nbat           = NULL;
2154         nbv->grp[i].kernel_type    = nbnxnkNotSet;
2155
2156         if (i == 0) /* local */
2157         {
2158             pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2159                               nbv->bUseGPU, bEmulateGPU, ir,
2160                               &nbv->grp[i].kernel_type,
2161                               &nbv->grp[i].ewald_excl,
2162                               fr->bNonbonded);
2163         }
2164         else /* non-local */
2165         {
2166             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2167             {
2168                 /* Use GPU for local, select a CPU kernel for non-local */
2169                 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2170                                   FALSE, FALSE, ir,
2171                                   &nbv->grp[i].kernel_type,
2172                                   &nbv->grp[i].ewald_excl,
2173                                   fr->bNonbonded);
2174
2175                 bHybridGPURun = TRUE;
2176             }
2177             else
2178             {
2179                 /* Use the same kernel for local and non-local interactions */
2180                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2181                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
2182             }
2183         }
2184     }
2185
2186     if (nbv->bUseGPU)
2187     {
2188         /* init the NxN GPU data; the last argument tells whether we'll have
2189          * both local and non-local NB calculation on GPU */
2190         nbnxn_cuda_init(fp, &nbv->cu_nbv,
2191                         &fr->hwinfo->gpu_info, fr->gpu_opt,
2192                         cr->rank_pp_intranode,
2193                         (nbv->ngrp > 1) && !bHybridGPURun);
2194
2195         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2196         {
2197             char *end;
2198
2199             nbv->min_ci_balanced = strtol(env, &end, 10);
2200             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2201             {
2202                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2203             }
2204
2205             if (debug)
2206             {
2207                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2208                         nbv->min_ci_balanced);
2209             }
2210         }
2211         else
2212         {
2213             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2214             if (debug)
2215             {
2216                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2217                         nbv->min_ci_balanced);
2218             }
2219         }
2220     }
2221     else
2222     {
2223         nbv->min_ci_balanced = 0;
2224     }
2225
2226     *nb_verlet = nbv;
2227
2228     nbnxn_init_search(&nbv->nbs,
2229                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2230                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2231                       bFEP_NonBonded,
2232                       gmx_omp_nthreads_get(emntNonbonded));
2233
2234     for (i = 0; i < nbv->ngrp; i++)
2235     {
2236         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2237         {
2238             nb_alloc = &pmalloc;
2239             nb_free  = &pfree;
2240         }
2241         else
2242         {
2243             nb_alloc = NULL;
2244             nb_free  = NULL;
2245         }
2246
2247         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2248                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2249                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2250                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2251                                 nb_alloc, nb_free);
2252
2253         if (i == 0 ||
2254             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2255         {
2256             gmx_bool bSimpleList;
2257             int      enbnxninitcombrule;
2258
2259             bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2260
2261             if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2262             {
2263                 /* Plain LJ cut-off: we can optimize with combination rules */
2264                 enbnxninitcombrule = enbnxninitcombruleDETECT;
2265             }
2266             else if (fr->vdwtype == evdwPME)
2267             {
2268                 /* LJ-PME: we need to use a combination rule for the grid */
2269                 if (fr->ljpme_combination_rule == eljpmeGEOM)
2270                 {
2271                     enbnxninitcombrule = enbnxninitcombruleGEOM;
2272                 }
2273                 else
2274                 {
2275                     enbnxninitcombrule = enbnxninitcombruleLB;
2276                 }
2277             }
2278             else
2279             {
2280                 /* We use a full combination matrix: no rule required */
2281                 enbnxninitcombrule = enbnxninitcombruleNONE;
2282             }
2283
2284
2285             snew(nbv->grp[i].nbat, 1);
2286             nbnxn_atomdata_init(fp,
2287                                 nbv->grp[i].nbat,
2288                                 nbv->grp[i].kernel_type,
2289                                 enbnxninitcombrule,
2290                                 fr->ntype, fr->nbfp,
2291                                 ir->opts.ngener,
2292                                 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2293                                 nb_alloc, nb_free);
2294         }
2295         else
2296         {
2297             nbv->grp[i].nbat = nbv->grp[0].nbat;
2298         }
2299     }
2300 }
2301
2302 void init_forcerec(FILE              *fp,
2303                    const output_env_t oenv,
2304                    t_forcerec        *fr,
2305                    t_fcdata          *fcd,
2306                    const t_inputrec  *ir,
2307                    const gmx_mtop_t  *mtop,
2308                    const t_commrec   *cr,
2309                    matrix             box,
2310                    const char        *tabfn,
2311                    const char        *tabafn,
2312                    const char        *tabpfn,
2313                    const char        *tabbfn,
2314                    const char        *nbpu_opt,
2315                    gmx_bool           bNoSolvOpt,
2316                    real               print_force)
2317 {
2318     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2319     real           rtab;
2320     char          *env;
2321     double         dbl;
2322     const t_block *cgs;
2323     gmx_bool       bGenericKernelOnly;
2324     gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2325     gmx_bool       bFEP_NonBonded;
2326     t_nblists     *nbl;
2327     int           *nm_ind, egp_flags;
2328
2329     if (fr->hwinfo == NULL)
2330     {
2331         /* Detect hardware, gather information.
2332          * In mdrun, hwinfo has already been set before calling init_forcerec.
2333          * Here we ignore GPUs, as tools will not use them anyhow.
2334          */
2335         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2336     }
2337
2338     /* By default we turn SIMD kernels on, but it might be turned off further down... */
2339     fr->use_simd_kernels = TRUE;
2340
2341     fr->bDomDec = DOMAINDECOMP(cr);
2342
2343     natoms = mtop->natoms;
2344
2345     if (check_box(ir->ePBC, box))
2346     {
2347         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2348     }
2349
2350     /* Test particle insertion ? */
2351     if (EI_TPI(ir->eI))
2352     {
2353         /* Set to the size of the molecule to be inserted (the last one) */
2354         /* Because of old style topologies, we have to use the last cg
2355          * instead of the last molecule type.
2356          */
2357         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2358         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2359         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2360         {
2361             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2362         }
2363     }
2364     else
2365     {
2366         fr->n_tpi = 0;
2367     }
2368
2369     /* Copy AdResS parameters */
2370     if (ir->bAdress)
2371     {
2372         fr->adress_type           = ir->adress->type;
2373         fr->adress_const_wf       = ir->adress->const_wf;
2374         fr->adress_ex_width       = ir->adress->ex_width;
2375         fr->adress_hy_width       = ir->adress->hy_width;
2376         fr->adress_icor           = ir->adress->icor;
2377         fr->adress_site           = ir->adress->site;
2378         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2379         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2380
2381
2382         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2383         for (i = 0; i < ir->adress->n_energy_grps; i++)
2384         {
2385             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2386         }
2387
2388         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2389         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2390         for (i = 0; i < fr->n_adress_tf_grps; i++)
2391         {
2392             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2393         }
2394         copy_rvec(ir->adress->refs, fr->adress_refs);
2395     }
2396     else
2397     {
2398         fr->adress_type           = eAdressOff;
2399         fr->adress_do_hybridpairs = FALSE;
2400     }
2401
2402     /* Copy the user determined parameters */
2403     fr->userint1  = ir->userint1;
2404     fr->userint2  = ir->userint2;
2405     fr->userint3  = ir->userint3;
2406     fr->userint4  = ir->userint4;
2407     fr->userreal1 = ir->userreal1;
2408     fr->userreal2 = ir->userreal2;
2409     fr->userreal3 = ir->userreal3;
2410     fr->userreal4 = ir->userreal4;
2411
2412     /* Shell stuff */
2413     fr->fc_stepsize = ir->fc_stepsize;
2414
2415     /* Free energy */
2416     fr->efep        = ir->efep;
2417     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2418     if (ir->fepvals->bScCoul)
2419     {
2420         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2421         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2422     }
2423     else
2424     {
2425         fr->sc_alphacoul  = 0;
2426         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2427     }
2428     fr->sc_power      = ir->fepvals->sc_power;
2429     fr->sc_r_power    = ir->fepvals->sc_r_power;
2430     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2431
2432     env = getenv("GMX_SCSIGMA_MIN");
2433     if (env != NULL)
2434     {
2435         dbl = 0;
2436         sscanf(env, "%lf", &dbl);
2437         fr->sc_sigma6_min = pow(dbl, 6);
2438         if (fp)
2439         {
2440             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2441         }
2442     }
2443
2444     fr->bNonbonded = TRUE;
2445     if (getenv("GMX_NO_NONBONDED") != NULL)
2446     {
2447         /* turn off non-bonded calculations */
2448         fr->bNonbonded = FALSE;
2449         md_print_warn(cr, fp,
2450                       "Found environment variable GMX_NO_NONBONDED.\n"
2451                       "Disabling nonbonded calculations.\n");
2452     }
2453
2454     bGenericKernelOnly = FALSE;
2455
2456     /* We now check in the NS code whether a particular combination of interactions
2457      * can be used with water optimization, and disable it if that is not the case.
2458      */
2459
2460     if (getenv("GMX_NB_GENERIC") != NULL)
2461     {
2462         if (fp != NULL)
2463         {
2464             fprintf(fp,
2465                     "Found environment variable GMX_NB_GENERIC.\n"
2466                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2467                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2468         }
2469         bGenericKernelOnly = TRUE;
2470     }
2471
2472     if (bGenericKernelOnly == TRUE)
2473     {
2474         bNoSolvOpt         = TRUE;
2475     }
2476
2477     if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2478     {
2479         fr->use_simd_kernels = FALSE;
2480         if (fp != NULL)
2481         {
2482             fprintf(fp,
2483                     "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2484                     "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2485         }
2486     }
2487
2488     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2489
2490     /* Check if we can/should do all-vs-all kernels */
2491     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
2492     fr->AllvsAll_work   = NULL;
2493     fr->AllvsAll_workgb = NULL;
2494
2495     /* All-vs-all kernels have not been implemented in 4.6, and
2496      * the SIMD group kernels are also buggy in this case. Non-SIMD
2497      * group kernels are OK. See Redmine #1249. */
2498     if (fr->bAllvsAll)
2499     {
2500         fr->bAllvsAll            = FALSE;
2501         fr->use_simd_kernels     = FALSE;
2502         if (fp != NULL)
2503         {
2504             fprintf(fp,
2505                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2506                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2507                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2508                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2509                     "we are proceeding by disabling all CPU architecture-specific\n"
2510                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2511                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2512         }
2513     }
2514
2515     /* Neighbour searching stuff */
2516     fr->cutoff_scheme = ir->cutoff_scheme;
2517     fr->bGrid         = (ir->ns_type == ensGRID);
2518     fr->ePBC          = ir->ePBC;
2519
2520     if (fr->cutoff_scheme == ecutsGROUP)
2521     {
2522         const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2523             "removed in a future release when 'verlet' supports all interaction forms.\n";
2524
2525         if (MASTER(cr))
2526         {
2527             fprintf(stderr, "\n%s\n", note);
2528         }
2529         if (fp != NULL)
2530         {
2531             fprintf(fp, "\n%s\n", note);
2532         }
2533     }
2534
2535     /* Determine if we will do PBC for distances in bonded interactions */
2536     if (fr->ePBC == epbcNONE)
2537     {
2538         fr->bMolPBC = FALSE;
2539     }
2540     else
2541     {
2542         if (!DOMAINDECOMP(cr))
2543         {
2544             /* The group cut-off scheme and SHAKE assume charge groups
2545              * are whole, but not using molpbc is faster in most cases.
2546              */
2547             if (fr->cutoff_scheme == ecutsGROUP ||
2548                 (ir->eConstrAlg == econtSHAKE &&
2549                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2550                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2551             {
2552                 fr->bMolPBC = ir->bPeriodicMols;
2553             }
2554             else
2555             {
2556                 fr->bMolPBC = TRUE;
2557                 if (getenv("GMX_USE_GRAPH") != NULL)
2558                 {
2559                     fr->bMolPBC = FALSE;
2560                     if (fp)
2561                     {
2562                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2563                     }
2564                 }
2565             }
2566         }
2567         else
2568         {
2569             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2570         }
2571     }
2572     fr->bGB = (ir->implicit_solvent == eisGBSA);
2573
2574     fr->rc_scaling = ir->refcoord_scaling;
2575     copy_rvec(ir->posres_com, fr->posres_com);
2576     copy_rvec(ir->posres_comB, fr->posres_comB);
2577     fr->rlist                    = cutoff_inf(ir->rlist);
2578     fr->rlistlong                = cutoff_inf(ir->rlistlong);
2579     fr->eeltype                  = ir->coulombtype;
2580     fr->vdwtype                  = ir->vdwtype;
2581     fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
2582
2583     fr->coulomb_modifier = ir->coulomb_modifier;
2584     fr->vdw_modifier     = ir->vdw_modifier;
2585
2586     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2587     switch (fr->eeltype)
2588     {
2589         case eelCUT:
2590             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2591             break;
2592
2593         case eelRF:
2594         case eelGRF:
2595         case eelRF_NEC:
2596             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2597             break;
2598
2599         case eelRF_ZERO:
2600             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2601             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2602             break;
2603
2604         case eelSWITCH:
2605         case eelSHIFT:
2606         case eelUSER:
2607         case eelENCADSHIFT:
2608         case eelPMESWITCH:
2609         case eelPMEUSER:
2610         case eelPMEUSERSWITCH:
2611             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2612             break;
2613
2614         case eelPME:
2615         case eelEWALD:
2616             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2617             break;
2618
2619         default:
2620             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2621             break;
2622     }
2623
2624     /* Vdw: Translate from mdp settings to kernel format */
2625     switch (fr->vdwtype)
2626     {
2627         case evdwCUT:
2628             if (fr->bBHAM)
2629             {
2630                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2631             }
2632             else
2633             {
2634                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2635             }
2636             break;
2637         case evdwPME:
2638             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2639             break;
2640
2641         case evdwSWITCH:
2642         case evdwSHIFT:
2643         case evdwUSER:
2644         case evdwENCADSHIFT:
2645             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2646             break;
2647
2648         default:
2649             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2650             break;
2651     }
2652
2653     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2654     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2655     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2656
2657     fr->bTwinRange = fr->rlistlong > fr->rlist;
2658     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2659
2660     fr->reppow     = mtop->ffparams.reppow;
2661
2662     if (ir->cutoff_scheme == ecutsGROUP)
2663     {
2664         fr->bvdwtab    = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2665                           && !EVDW_PME(fr->vdwtype));
2666         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2667         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2668                            fr->eeltype == eelEWALD ||
2669                            fr->eeltype == eelPME ||
2670                            fr->eeltype == eelRF ||
2671                            fr->eeltype == eelRF_ZERO);
2672
2673         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2674          * going to be faster to tabulate the interaction than calling the generic kernel.
2675          */
2676         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2677         {
2678             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2679             {
2680                 fr->bcoultab = TRUE;
2681             }
2682         }
2683         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2684                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2685                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2686                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2687         {
2688             if (fr->rcoulomb != fr->rvdw)
2689             {
2690                 fr->bcoultab = TRUE;
2691             }
2692         }
2693
2694         if (getenv("GMX_REQUIRE_TABLES"))
2695         {
2696             fr->bvdwtab  = TRUE;
2697             fr->bcoultab = TRUE;
2698         }
2699
2700         if (fp)
2701         {
2702             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2703             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2704         }
2705
2706         if (fr->bvdwtab == TRUE)
2707         {
2708             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2709             fr->nbkernel_vdw_modifier    = eintmodNONE;
2710         }
2711         if (fr->bcoultab == TRUE)
2712         {
2713             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2714             fr->nbkernel_elec_modifier    = eintmodNONE;
2715         }
2716     }
2717
2718     if (ir->cutoff_scheme == ecutsVERLET)
2719     {
2720         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2721         {
2722             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2723         }
2724         fr->bvdwtab  = FALSE;
2725         fr->bcoultab = FALSE;
2726     }
2727
2728     /* Tables are used for direct ewald sum */
2729     if (fr->bEwald)
2730     {
2731         if (EEL_PME(ir->coulombtype))
2732         {
2733             if (fp)
2734             {
2735                 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2736             }
2737             if (ir->coulombtype == eelP3M_AD)
2738             {
2739                 please_cite(fp, "Hockney1988");
2740                 please_cite(fp, "Ballenegger2012");
2741             }
2742             else
2743             {
2744                 please_cite(fp, "Essmann95a");
2745             }
2746
2747             if (ir->ewald_geometry == eewg3DC)
2748             {
2749                 if (fp)
2750                 {
2751                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2752                 }
2753                 please_cite(fp, "In-Chul99a");
2754             }
2755         }
2756         fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2757         init_ewald_tab(&(fr->ewald_table), ir, fp);
2758         if (fp)
2759         {
2760             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2761                     1/fr->ewaldcoeff_q);
2762         }
2763     }
2764
2765     if (EVDW_PME(ir->vdwtype))
2766     {
2767         if (fp)
2768         {
2769             fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2770         }
2771         please_cite(fp, "Essmann95a");
2772         fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2773         if (fp)
2774         {
2775             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2776                     1/fr->ewaldcoeff_lj);
2777         }
2778     }
2779
2780     /* Electrostatics */
2781     fr->epsilon_r       = ir->epsilon_r;
2782     fr->epsilon_rf      = ir->epsilon_rf;
2783     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2784     fr->rcoulomb_switch = ir->rcoulomb_switch;
2785     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2786
2787     /* Parameters for generalized RF */
2788     fr->zsquare = 0.0;
2789     fr->temp    = 0.0;
2790
2791     if (fr->eeltype == eelGRF)
2792     {
2793         init_generalized_rf(fp, mtop, ir, fr);
2794     }
2795
2796     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2797                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2798                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2799                        IR_ELEC_FIELD(*ir) ||
2800                        (fr->adress_icor != eAdressICOff)
2801                        );
2802
2803     if (fr->cutoff_scheme == ecutsGROUP &&
2804         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2805     {
2806         /* Count the total number of charge groups */
2807         fr->cg_nalloc = ncg_mtop(mtop);
2808         srenew(fr->cg_cm, fr->cg_nalloc);
2809     }
2810     if (fr->shift_vec == NULL)
2811     {
2812         snew(fr->shift_vec, SHIFTS);
2813     }
2814
2815     if (fr->fshift == NULL)
2816     {
2817         snew(fr->fshift, SHIFTS);
2818     }
2819
2820     if (fr->nbfp == NULL)
2821     {
2822         fr->ntype = mtop->ffparams.atnr;
2823         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2824         if (EVDW_PME(fr->vdwtype))
2825         {
2826             fr->ljpme_c6grid  = make_ljpme_c6grid(&mtop->ffparams, fr);
2827         }
2828     }
2829
2830     /* Copy the energy group exclusions */
2831     fr->egp_flags = ir->opts.egp_flags;
2832
2833     /* Van der Waals stuff */
2834     fr->rvdw        = cutoff_inf(ir->rvdw);
2835     fr->rvdw_switch = ir->rvdw_switch;
2836     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2837     {
2838         if (fr->rvdw_switch >= fr->rvdw)
2839         {
2840             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2841                       fr->rvdw_switch, fr->rvdw);
2842         }
2843         if (fp)
2844         {
2845             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2846                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2847                     fr->rvdw_switch, fr->rvdw);
2848         }
2849     }
2850
2851     if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2852     {
2853         gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2854     }
2855
2856     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2857     {
2858         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2859     }
2860
2861     if (fp)
2862     {
2863         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2864                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2865     }
2866
2867     fr->eDispCorr = ir->eDispCorr;
2868     if (ir->eDispCorr != edispcNO)
2869     {
2870         set_avcsixtwelve(fp, fr, mtop);
2871     }
2872
2873     if (fr->bBHAM)
2874     {
2875         set_bham_b_max(fp, fr, mtop);
2876     }
2877
2878     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2879
2880     /* Copy the GBSA data (radius, volume and surftens for each
2881      * atomtype) from the topology atomtype section to forcerec.
2882      */
2883     snew(fr->atype_radius, fr->ntype);
2884     snew(fr->atype_vol, fr->ntype);
2885     snew(fr->atype_surftens, fr->ntype);
2886     snew(fr->atype_gb_radius, fr->ntype);
2887     snew(fr->atype_S_hct, fr->ntype);
2888
2889     if (mtop->atomtypes.nr > 0)
2890     {
2891         for (i = 0; i < fr->ntype; i++)
2892         {
2893             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2894         }
2895         for (i = 0; i < fr->ntype; i++)
2896         {
2897             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2898         }
2899         for (i = 0; i < fr->ntype; i++)
2900         {
2901             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2902         }
2903         for (i = 0; i < fr->ntype; i++)
2904         {
2905             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2906         }
2907         for (i = 0; i < fr->ntype; i++)
2908         {
2909             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2910         }
2911     }
2912
2913     /* Generate the GB table if needed */
2914     if (fr->bGB)
2915     {
2916 #ifdef GMX_DOUBLE
2917         fr->gbtabscale = 2000;
2918 #else
2919         fr->gbtabscale = 500;
2920 #endif
2921
2922         fr->gbtabr = 100;
2923         fr->gbtab  = make_gb_table(oenv, fr);
2924
2925         init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2926
2927         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2928         if (!DOMAINDECOMP(cr))
2929         {
2930             make_local_gb(cr, fr->born, ir->gb_algorithm);
2931         }
2932     }
2933
2934     /* Set the charge scaling */
2935     if (fr->epsilon_r != 0)
2936     {
2937         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2938     }
2939     else
2940     {
2941         /* eps = 0 is infinite dieletric: no coulomb interactions */
2942         fr->epsfac = 0;
2943     }
2944
2945     /* Reaction field constants */
2946     if (EEL_RF(fr->eeltype))
2947     {
2948         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2949                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2950                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2951     }
2952
2953     /*This now calculates sum for q and c6*/
2954     set_chargesum(fp, fr, mtop);
2955
2956     /* if we are using LR electrostatics, and they are tabulated,
2957      * the tables will contain modified coulomb interactions.
2958      * Since we want to use the non-shifted ones for 1-4
2959      * coulombic interactions, we must have an extra set of tables.
2960      */
2961
2962     /* Construct tables.
2963      * A little unnecessary to make both vdw and coul tables sometimes,
2964      * but what the heck... */
2965
2966     bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2967         (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2968
2969     bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2970                              fr->bBHAM || fr->bEwald) &&
2971                             (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2972                              gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2973                              gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2974
2975     negp_pp   = ir->opts.ngener - ir->nwall;
2976     negptable = 0;
2977     if (!bMakeTables)
2978     {
2979         bSomeNormalNbListsAreInUse = TRUE;
2980         fr->nnblists               = 1;
2981     }
2982     else
2983     {
2984         bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2985         for (egi = 0; egi < negp_pp; egi++)
2986         {
2987             for (egj = egi; egj < negp_pp; egj++)
2988             {
2989                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2990                 if (!(egp_flags & EGP_EXCL))
2991                 {
2992                     if (egp_flags & EGP_TABLE)
2993                     {
2994                         negptable++;
2995                     }
2996                     else
2997                     {
2998                         bSomeNormalNbListsAreInUse = TRUE;
2999                     }
3000                 }
3001             }
3002         }
3003         if (bSomeNormalNbListsAreInUse)
3004         {
3005             fr->nnblists = negptable + 1;
3006         }
3007         else
3008         {
3009             fr->nnblists = negptable;
3010         }
3011         if (fr->nnblists > 1)
3012         {
3013             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3014         }
3015     }
3016
3017     if (ir->adress)
3018     {
3019         fr->nnblists *= 2;
3020     }
3021
3022     snew(fr->nblists, fr->nnblists);
3023
3024     /* This code automatically gives table length tabext without cut-off's,
3025      * in that case grompp should already have checked that we do not need
3026      * normal tables and we only generate tables for 1-4 interactions.
3027      */
3028     rtab = ir->rlistlong + ir->tabext;
3029
3030     if (bMakeTables)
3031     {
3032         /* make tables for ordinary interactions */
3033         if (bSomeNormalNbListsAreInUse)
3034         {
3035             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3036             if (ir->adress)
3037             {
3038                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3039             }
3040             if (!bMakeSeparate14Table)
3041             {
3042                 fr->tab14 = fr->nblists[0].table_elec_vdw;
3043             }
3044             m = 1;
3045         }
3046         else
3047         {
3048             m = 0;
3049         }
3050         if (negptable > 0)
3051         {
3052             /* Read the special tables for certain energy group pairs */
3053             nm_ind = mtop->groups.grps[egcENER].nm_ind;
3054             for (egi = 0; egi < negp_pp; egi++)
3055             {
3056                 for (egj = egi; egj < negp_pp; egj++)
3057                 {
3058                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3059                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3060                     {
3061                         nbl = &(fr->nblists[m]);
3062                         if (fr->nnblists > 1)
3063                         {
3064                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3065                         }
3066                         /* Read the table file with the two energy groups names appended */
3067                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3068                                         *mtop->groups.grpname[nm_ind[egi]],
3069                                         *mtop->groups.grpname[nm_ind[egj]],
3070                                         &fr->nblists[m]);
3071                         if (ir->adress)
3072                         {
3073                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3074                                             *mtop->groups.grpname[nm_ind[egi]],
3075                                             *mtop->groups.grpname[nm_ind[egj]],
3076                                             &fr->nblists[fr->nnblists/2+m]);
3077                         }
3078                         m++;
3079                     }
3080                     else if (fr->nnblists > 1)
3081                     {
3082                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3083                     }
3084                 }
3085             }
3086         }
3087     }
3088     if (bMakeSeparate14Table)
3089     {
3090         /* generate extra tables with plain Coulomb for 1-4 interactions only */
3091         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3092                                 GMX_MAKETABLES_14ONLY);
3093     }
3094
3095     /* Read AdResS Thermo Force table if needed */
3096     if (fr->adress_icor == eAdressICThermoForce)
3097     {
3098         /* old todo replace */
3099
3100         if (ir->adress->n_tf_grps > 0)
3101         {
3102             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3103
3104         }
3105         else
3106         {
3107             /* load the default table */
3108             snew(fr->atf_tabs, 1);
3109             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3110         }
3111     }
3112
3113     /* Wall stuff */
3114     fr->nwall = ir->nwall;
3115     if (ir->nwall && ir->wall_type == ewtTABLE)
3116     {
3117         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3118     }
3119
3120     if (fcd && tabbfn)
3121     {
3122         fcd->bondtab  = make_bonded_tables(fp,
3123                                            F_TABBONDS, F_TABBONDSNC,
3124                                            mtop, tabbfn, "b");
3125         fcd->angletab = make_bonded_tables(fp,
3126                                            F_TABANGLES, -1,
3127                                            mtop, tabbfn, "a");
3128         fcd->dihtab   = make_bonded_tables(fp,
3129                                            F_TABDIHS, -1,
3130                                            mtop, tabbfn, "d");
3131     }
3132     else
3133     {
3134         if (debug)
3135         {
3136             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3137         }
3138     }
3139
3140     /* QM/MM initialization if requested
3141      */
3142     if (ir->bQMMM)
3143     {
3144         fprintf(stderr, "QM/MM calculation requested.\n");
3145     }
3146
3147     fr->bQMMM      = ir->bQMMM;
3148     fr->qr         = mk_QMMMrec();
3149
3150     /* Set all the static charge group info */
3151     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3152                                    &bFEP_NonBonded,
3153                                    &fr->bExcl_IntraCGAll_InterCGNone);
3154     if (DOMAINDECOMP(cr))
3155     {
3156         fr->cginfo = NULL;
3157     }
3158     else
3159     {
3160         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3161     }
3162
3163     if (!DOMAINDECOMP(cr))
3164     {
3165         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3166                             mtop->natoms, mtop->natoms, mtop->natoms);
3167     }
3168
3169     fr->print_force = print_force;
3170
3171
3172     /* coarse load balancing vars */
3173     fr->t_fnbf    = 0.;
3174     fr->t_wait    = 0.;
3175     fr->timesteps = 0;
3176
3177     /* Initialize neighbor search */
3178     init_ns(fp, cr, &fr->ns, fr, mtop);
3179
3180     if (cr->duty & DUTY_PP)
3181     {
3182         gmx_nonbonded_setup(fr, bGenericKernelOnly);
3183         /*
3184            if (ir->bAdress)
3185             {
3186                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3187             }
3188          */
3189     }
3190
3191     /* Initialize the thread working data for bonded interactions */
3192     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3193
3194     snew(fr->excl_load, fr->nthreads+1);
3195
3196     if (fr->cutoff_scheme == ecutsVERLET)
3197     {
3198         if (ir->rcoulomb != ir->rvdw)
3199         {
3200             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3201         }
3202
3203         init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3204     }
3205
3206     /* fr->ic is used both by verlet and group kernels (to some extent) now */
3207     init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3208
3209     if (ir->eDispCorr != edispcNO)
3210     {
3211         calc_enervirdiff(fp, ir->eDispCorr, fr);
3212     }
3213 }
3214
3215 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3216 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
3217 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3218
3219 void pr_forcerec(FILE *fp, t_forcerec *fr)
3220 {
3221     int i;
3222
3223     pr_real(fp, fr->rlist);
3224     pr_real(fp, fr->rcoulomb);
3225     pr_real(fp, fr->fudgeQQ);
3226     pr_bool(fp, fr->bGrid);
3227     pr_bool(fp, fr->bTwinRange);
3228     /*pr_int(fp,fr->cg0);
3229        pr_int(fp,fr->hcg);*/
3230     for (i = 0; i < fr->nnblists; i++)
3231     {
3232         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3233     }
3234     pr_real(fp, fr->rcoulomb_switch);
3235     pr_real(fp, fr->rcoulomb);
3236
3237     fflush(fp);
3238 }
3239
3240 void forcerec_set_excl_load(t_forcerec           *fr,
3241                             const gmx_localtop_t *top)
3242 {
3243     const int *ind, *a;
3244     int        t, i, j, ntot, n, ntarget;
3245
3246     ind = top->excls.index;
3247     a   = top->excls.a;
3248
3249     ntot = 0;
3250     for (i = 0; i < top->excls.nr; i++)
3251     {
3252         for (j = ind[i]; j < ind[i+1]; j++)
3253         {
3254             if (a[j] > i)
3255             {
3256                 ntot++;
3257             }
3258         }
3259     }
3260
3261     fr->excl_load[0] = 0;
3262     n                = 0;
3263     i                = 0;
3264     for (t = 1; t <= fr->nthreads; t++)
3265     {
3266         ntarget = (ntot*t)/fr->nthreads;
3267         while (i < top->excls.nr && n < ntarget)
3268         {
3269             for (j = ind[i]; j < ind[i+1]; j++)
3270             {
3271                 if (a[j] > i)
3272                 {
3273                     n++;
3274                 }
3275             }
3276             i++;
3277         }
3278         fr->excl_load[t] = i;
3279     }
3280 }