src/gromacs/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "typedefs.h"
  45 #include "types/commrec.h"
  46 #include "gromacs/math/vec.h"
  47 #include "gromacs/math/utilities.h"
  48 #include "macros.h"
  49 #include "gromacs/utility/smalloc.h"
  50 #include "macros.h"
  51 #include "gromacs/utility/fatalerror.h"
  52 #include "physics.h"
  53 #include "force.h"
  54 #include "tables.h"
  55 #include "nonbonded.h"
  56 #include "invblock.h"
  57 #include "names.h"
  58 #include "network.h"
  59 #include "pbc.h"
  60 #include "ns.h"
  61 #include "mshift.h"
  62 #include "txtdump.h"
  63 #include "coulomb.h"
  64 #include "md_support.h"
  65 #include "md_logging.h"
  66 #include "domdec.h"
  67 #include "qmmm.h"
  68 #include "copyrite.h"
  69 #include "mtop_util.h"
  70 #include "nbnxn_simd.h"
  71 #include "nbnxn_search.h"
  72 #include "nbnxn_atomdata.h"
  73 #include "nbnxn_consts.h"
  74 #include "gmx_omp_nthreads.h"
  75 #include "gmx_detect_hardware.h"
  76 #include "inputrec.h"
  77
  78 #include "types/nbnxn_cuda_types_ext.h"
  79 #include "gpu_utils.h"
  80 #include "nbnxn_cuda_data_mgmt.h"
  81 #include "pmalloc_cuda.h"
  82
  83 t_forcerec *mk_forcerec(void)
  84 {
  85     t_forcerec *fr;
  86
  87     snew(fr, 1);
  88
  89     return fr;
  90 }
  91
  92 #ifdef DEBUG
  93 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
  94 {
  95     int i, j;
  96
  97     for (i = 0; (i < atnr); i++)
  98     {
  99         for (j = 0; (j < atnr); j++)
 100         {
 101             fprintf(fp, "%2d - %2d", i, j);
 102             if (bBHAM)
 103             {
 104                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 105                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 106             }
 107             else
 108             {
 109                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 110                         C12(nbfp, atnr, i, j)/12.0);
 111             }
 112         }
 113     }
 114 }
 115 #endif
 116
 117 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 118 {
 119     real *nbfp;
 120     int   i, j, k, atnr;
 121
 122     atnr = idef->atnr;
 123     if (bBHAM)
 124     {
 125         snew(nbfp, 3*atnr*atnr);
 126         for (i = k = 0; (i < atnr); i++)
 127         {
 128             for (j = 0; (j < atnr); j++, k++)
 129             {
 130                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 131                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 132                 /* nbfp now includes the 6.0 derivative prefactor */
 133                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 134             }
 135         }
 136     }
 137     else
 138     {
 139         snew(nbfp, 2*atnr*atnr);
 140         for (i = k = 0; (i < atnr); i++)
 141         {
 142             for (j = 0; (j < atnr); j++, k++)
 143             {
 144                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 145                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 146                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 147             }
 148         }
 149     }
 150
 151     return nbfp;
 152 }
 153
 154 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
 155 {
 156     int   i, j, k, atnr;
 157     real  c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 158     real *grid;
 159
 160     /* For LJ-PME simulations, we correct the energies with the reciprocal space
 161      * inside of the cut-off. To do this the non-bonded kernels needs to have
 162      * access to the C6-values used on the reciprocal grid in pme.c
 163      */
 164
 165     atnr = idef->atnr;
 166     snew(grid, 2*atnr*atnr);
 167     for (i = k = 0; (i < atnr); i++)
 168     {
 169         for (j = 0; (j < atnr); j++, k++)
 170         {
 171             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 172             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 173             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 174             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 175             c6   = sqrt(c6i * c6j);
 176             if (fr->ljpme_combination_rule == eljpmeLB
 177                 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
 178             {
 179                 sigmai = pow(c12i / c6i, 1.0/6.0);
 180                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 181                 epsi   = c6i * c6i / c12i;
 182                 epsj   = c6j * c6j / c12j;
 183                 c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
 184             }
 185             /* Store the elements at the same relative positions as C6 in nbfp in order
 186              * to simplify access in the kernels
 187              */
 188             grid[2*(atnr*i+j)] = c6*6.0;
 189         }
 190     }
 191     return grid;
 192 }
 193
 194 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 195 {
 196     real *nbfp;
 197     int   i, j, k, atnr;
 198     real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 199     real  c6, c12;
 200
 201     atnr = idef->atnr;
 202     snew(nbfp, 2*atnr*atnr);
 203     for (i = 0; i < atnr; ++i)
 204     {
 205         for (j = 0; j < atnr; ++j)
 206         {
 207             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 208             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 209             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 210             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 211             c6   = sqrt(c6i  * c6j);
 212             c12  = sqrt(c12i * c12j);
 213             if (comb_rule == eCOMB_ARITHMETIC
 214                 && !gmx_numzero(c6) && !gmx_numzero(c12))
 215             {
 216                 sigmai = pow(c12i / c6i, 1.0/6.0);
 217                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 218                 epsi   = c6i * c6i / c12i;
 219                 epsj   = c6j * c6j / c12j;
 220                 c6     = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
 221                 c12    = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
 222             }
 223             C6(nbfp, atnr, i, j)   = c6*6.0;
 224             C12(nbfp, atnr, i, j)  = c12*12.0;
 225         }
 226     }
 227     return nbfp;
 228 }
 229
 230 /* This routine sets fr->solvent_opt to the most common solvent in the
 231  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 232  * the fr->solvent_type array with the correct type (or esolNO).
 233  *
 234  * Charge groups that fulfill the conditions but are not identical to the
 235  * most common one will be marked as esolNO in the solvent_type array.
 236  *
 237  * TIP3p is identical to SPC for these purposes, so we call it
 238  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 239  *
 240  * NOTE: QM particle should not
 241  * become an optimized solvent. Not even if there is only one charge
 242  * group in the Qm
 243  */
 244
 245 typedef struct
 246 {
 247     int    model;
 248     int    count;
 249     int    vdwtype[4];
 250     real   charge[4];
 251 } solvent_parameters_t;
 252
 253 static void
 254 check_solvent_cg(const gmx_moltype_t    *molt,
 255                  int                     cg0,
 256                  int                     nmol,
 257                  const unsigned char    *qm_grpnr,
 258                  const t_grps           *qm_grps,
 259                  t_forcerec   *          fr,
 260                  int                    *n_solvent_parameters,
 261                  solvent_parameters_t  **solvent_parameters_p,
 262                  int                     cginfo,
 263                  int                    *cg_sp)
 264 {
 265     const t_blocka       *excl;
 266     t_atom               *atom;
 267     int                   j, k;
 268     int                   j0, j1, nj;
 269     gmx_bool              perturbed;
 270     gmx_bool              has_vdw[4];
 271     gmx_bool              match;
 272     real                  tmp_charge[4]  = { 0.0 }; /* init to zero to make gcc4.8 happy */
 273     int                   tmp_vdwtype[4] = { 0 };   /* init to zero to make gcc4.8 happy */
 274     int                   tjA;
 275     gmx_bool              qm;
 276     solvent_parameters_t *solvent_parameters;
 277
 278     /* We use a list with parameters for each solvent type.
 279      * Every time we discover a new molecule that fulfills the basic
 280      * conditions for a solvent we compare with the previous entries
 281      * in these lists. If the parameters are the same we just increment
 282      * the counter for that type, and otherwise we create a new type
 283      * based on the current molecule.
 284      *
 285      * Once we've finished going through all molecules we check which
 286      * solvent is most common, and mark all those molecules while we
 287      * clear the flag on all others.
 288      */
 289
 290     solvent_parameters = *solvent_parameters_p;
 291
 292     /* Mark the cg first as non optimized */
 293     *cg_sp = -1;
 294
 295     /* Check if this cg has no exclusions with atoms in other charge groups
 296      * and all atoms inside the charge group excluded.
 297      * We only have 3 or 4 atom solvent loops.
 298      */
 299     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 300         !GET_CGINFO_EXCL_INTRA(cginfo))
 301     {
 302         return;
 303     }
 304
 305     /* Get the indices of the first atom in this charge group */
 306     j0     = molt->cgs.index[cg0];
 307     j1     = molt->cgs.index[cg0+1];
 308
 309     /* Number of atoms in our molecule */
 310     nj     = j1 - j0;
 311
 312     if (debug)
 313     {
 314         fprintf(debug,
 315                 "Moltype '%s': there are %d atoms in this charge group\n",
 316                 *molt->name, nj);
 317     }
 318
 319     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 320      * otherwise skip it.
 321      */
 322     if (nj < 3 || nj > 4)
 323     {
 324         return;
 325     }
 326
 327     /* Check if we are doing QM on this group */
 328     qm = FALSE;
 329     if (qm_grpnr != NULL)
 330     {
 331         for (j = j0; j < j1 && !qm; j++)
 332         {
 333             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 334         }
 335     }
 336     /* Cannot use solvent optimization with QM */
 337     if (qm)
 338     {
 339         return;
 340     }
 341
 342     atom = molt->atoms.atom;
 343
 344     /* Still looks like a solvent, time to check parameters */
 345
 346     /* If it is perturbed (free energy) we can't use the solvent loops,
 347      * so then we just skip to the next molecule.
 348      */
 349     perturbed = FALSE;
 350
 351     for (j = j0; j < j1 && !perturbed; j++)
 352     {
 353         perturbed = PERTURBED(atom[j]);
 354     }
 355
 356     if (perturbed)
 357     {
 358         return;
 359     }
 360
 361     /* Now it's only a question if the VdW and charge parameters
 362      * are OK. Before doing the check we compare and see if they are
 363      * identical to a possible previous solvent type.
 364      * First we assign the current types and charges.
 365      */
 366     for (j = 0; j < nj; j++)
 367     {
 368         tmp_vdwtype[j] = atom[j0+j].type;
 369         tmp_charge[j]  = atom[j0+j].q;
 370     }
 371
 372     /* Does it match any previous solvent type? */
 373     for (k = 0; k < *n_solvent_parameters; k++)
 374     {
 375         match = TRUE;
 376
 377
 378         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 379         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 380              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 381         {
 382             match = FALSE;
 383         }
 384
 385         /* Check that types & charges match for all atoms in molecule */
 386         for (j = 0; j < nj && match == TRUE; j++)
 387         {
 388             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 389             {
 390                 match = FALSE;
 391             }
 392             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 393             {
 394                 match = FALSE;
 395             }
 396         }
 397         if (match == TRUE)
 398         {
 399             /* Congratulations! We have a matched solvent.
 400              * Flag it with this type for later processing.
 401              */
 402             *cg_sp = k;
 403             solvent_parameters[k].count += nmol;
 404
 405             /* We are done with this charge group */
 406             return;
 407         }
 408     }
 409
 410     /* If we get here, we have a tentative new solvent type.
 411      * Before we add it we must check that it fulfills the requirements
 412      * of the solvent optimized loops. First determine which atoms have
 413      * VdW interactions.
 414      */
 415     for (j = 0; j < nj; j++)
 416     {
 417         has_vdw[j] = FALSE;
 418         tjA        = tmp_vdwtype[j];
 419
 420         /* Go through all other tpes and see if any have non-zero
 421          * VdW parameters when combined with this one.
 422          */
 423         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 424         {
 425             /* We already checked that the atoms weren't perturbed,
 426              * so we only need to check state A now.
 427              */
 428             if (fr->bBHAM)
 429             {
 430                 has_vdw[j] = (has_vdw[j] ||
 431                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 432                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 433                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 434             }
 435             else
 436             {
 437                 /* Standard LJ */
 438                 has_vdw[j] = (has_vdw[j] ||
 439                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 440                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 441             }
 442         }
 443     }
 444
 445     /* Now we know all we need to make the final check and assignment. */
 446     if (nj == 3)
 447     {
 448         /* So, is it an SPC?
 449          * For this we require thatn all atoms have charge,
 450          * the charges on atom 2 & 3 should be the same, and only
 451          * atom 1 might have VdW.
 452          */
 453         if (has_vdw[1] == FALSE &&
 454             has_vdw[2] == FALSE &&
 455             tmp_charge[0]  != 0 &&
 456             tmp_charge[1]  != 0 &&
 457             tmp_charge[2]  == tmp_charge[1])
 458         {
 459             srenew(solvent_parameters, *n_solvent_parameters+1);
 460             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 461             solvent_parameters[*n_solvent_parameters].count = nmol;
 462             for (k = 0; k < 3; k++)
 463             {
 464                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 465                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 466             }
 467
 468             *cg_sp = *n_solvent_parameters;
 469             (*n_solvent_parameters)++;
 470         }
 471     }
 472     else if (nj == 4)
 473     {
 474         /* Or could it be a TIP4P?
 475          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 476          * Only atom 1 mght have VdW.
 477          */
 478         if (has_vdw[1] == FALSE &&
 479             has_vdw[2] == FALSE &&
 480             has_vdw[3] == FALSE &&
 481             tmp_charge[0]  == 0 &&
 482             tmp_charge[1]  != 0 &&
 483             tmp_charge[2]  == tmp_charge[1] &&
 484             tmp_charge[3]  != 0)
 485         {
 486             srenew(solvent_parameters, *n_solvent_parameters+1);
 487             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 488             solvent_parameters[*n_solvent_parameters].count = nmol;
 489             for (k = 0; k < 4; k++)
 490             {
 491                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 492                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 493             }
 494
 495             *cg_sp = *n_solvent_parameters;
 496             (*n_solvent_parameters)++;
 497         }
 498     }
 499
 500     *solvent_parameters_p = solvent_parameters;
 501 }
 502
 503 static void
 504 check_solvent(FILE  *                fp,
 505               const gmx_mtop_t  *    mtop,
 506               t_forcerec  *          fr,
 507               cginfo_mb_t           *cginfo_mb)
 508 {
 509     const t_block     *   cgs;
 510     const t_block     *   mols;
 511     const gmx_moltype_t  *molt;
 512     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 513     int                   n_solvent_parameters;
 514     solvent_parameters_t *solvent_parameters;
 515     int                 **cg_sp;
 516     int                   bestsp, bestsol;
 517
 518     if (debug)
 519     {
 520         fprintf(debug, "Going to determine what solvent types we have.\n");
 521     }
 522
 523     mols = &mtop->mols;
 524
 525     n_solvent_parameters = 0;
 526     solvent_parameters   = NULL;
 527     /* Allocate temporary array for solvent type */
 528     snew(cg_sp, mtop->nmolblock);
 529
 530     cg_offset = 0;
 531     at_offset = 0;
 532     for (mb = 0; mb < mtop->nmolblock; mb++)
 533     {
 534         molt = &mtop->moltype[mtop->molblock[mb].type];
 535         cgs  = &molt->cgs;
 536         /* Here we have to loop over all individual molecules
 537          * because we need to check for QMMM particles.
 538          */
 539         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 540         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 541         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 542         for (mol = 0; mol < nmol_ch; mol++)
 543         {
 544             cgm = mol*cgs->nr;
 545             am  = mol*cgs->index[cgs->nr];
 546             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 547             {
 548                 check_solvent_cg(molt, cg_mol, nmol,
 549                                  mtop->groups.grpnr[egcQMMM] ?
 550                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 551                                  &mtop->groups.grps[egcQMMM],
 552                                  fr,
 553                                  &n_solvent_parameters, &solvent_parameters,
 554                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 555                                  &cg_sp[mb][cgm+cg_mol]);
 556             }
 557         }
 558         cg_offset += cgs->nr;
 559         at_offset += cgs->index[cgs->nr];
 560     }
 561
 562     /* Puh! We finished going through all charge groups.
 563      * Now find the most common solvent model.
 564      */
 565
 566     /* Most common solvent this far */
 567     bestsp = -2;
 568     for (i = 0; i < n_solvent_parameters; i++)
 569     {
 570         if (bestsp == -2 ||
 571             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 572         {
 573             bestsp = i;
 574         }
 575     }
 576
 577     if (bestsp >= 0)
 578     {
 579         bestsol = solvent_parameters[bestsp].model;
 580     }
 581     else
 582     {
 583         bestsol = esolNO;
 584     }
 585
 586 #ifdef DISABLE_WATER_NLIST
 587     bestsol = esolNO;
 588 #endif
 589
 590     fr->nWatMol = 0;
 591     for (mb = 0; mb < mtop->nmolblock; mb++)
 592     {
 593         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 594         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 595         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 596         {
 597             if (cg_sp[mb][i] == bestsp)
 598             {
 599                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 600                 fr->nWatMol += nmol;
 601             }
 602             else
 603             {
 604                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 605             }
 606         }
 607         sfree(cg_sp[mb]);
 608     }
 609     sfree(cg_sp);
 610
 611     if (bestsol != esolNO && fp != NULL)
 612     {
 613         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 614                 esol_names[bestsol],
 615                 solvent_parameters[bestsp].count);
 616     }
 617
 618     sfree(solvent_parameters);
 619     fr->solvent_opt = bestsol;
 620 }
 621
 622 enum {
 623     acNONE = 0, acCONSTRAINT, acSETTLE
 624 };
 625
 626 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 627                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 628                                    gmx_bool *bFEP_NonBonded,
 629                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 630 {
 631     const t_block        *cgs;
 632     const t_blocka       *excl;
 633     const gmx_moltype_t  *molt;
 634     const gmx_molblock_t *molb;
 635     cginfo_mb_t          *cginfo_mb;
 636     gmx_bool             *type_VDW;
 637     int                  *cginfo;
 638     int                   cg_offset, a_offset, cgm, am;
 639     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 640     int                  *a_con;
 641     int                   ftype;
 642     int                   ia;
 643     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
 644
 645     ncg_tot = ncg_mtop(mtop);
 646     snew(cginfo_mb, mtop->nmolblock);
 647
 648     snew(type_VDW, fr->ntype);
 649     for (ai = 0; ai < fr->ntype; ai++)
 650     {
 651         type_VDW[ai] = FALSE;
 652         for (j = 0; j < fr->ntype; j++)
 653         {
 654             type_VDW[ai] = type_VDW[ai] ||
 655                 fr->bBHAM ||
 656                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 657                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 658         }
 659     }
 660
 661     *bFEP_NonBonded               = FALSE;
 662     *bExcl_IntraCGAll_InterCGNone = TRUE;
 663
 664     excl_nalloc = 10;
 665     snew(bExcl, excl_nalloc);
 666     cg_offset = 0;
 667     a_offset  = 0;
 668     for (mb = 0; mb < mtop->nmolblock; mb++)
 669     {
 670         molb = &mtop->molblock[mb];
 671         molt = &mtop->moltype[molb->type];
 672         cgs  = &molt->cgs;
 673         excl = &molt->excls;
 674
 675         /* Check if the cginfo is identical for all molecules in this block.
 676          * If so, we only need an array of the size of one molecule.
 677          * Otherwise we make an array of #mol times #cgs per molecule.
 678          */
 679         bId = TRUE;
 680         am  = 0;
 681         for (m = 0; m < molb->nmol; m++)
 682         {
 683             am = m*cgs->index[cgs->nr];
 684             for (cg = 0; cg < cgs->nr; cg++)
 685             {
 686                 a0 = cgs->index[cg];
 687                 a1 = cgs->index[cg+1];
 688                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 689                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 690                 {
 691                     bId = FALSE;
 692                 }
 693                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 694                 {
 695                     for (ai = a0; ai < a1; ai++)
 696                     {
 697                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 698                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 699                         {
 700                             bId = FALSE;
 701                         }
 702                     }
 703                 }
 704             }
 705         }
 706
 707         cginfo_mb[mb].cg_start = cg_offset;
 708         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 709         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 710         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 711         cginfo = cginfo_mb[mb].cginfo;
 712
 713         /* Set constraints flags for constrained atoms */
 714         snew(a_con, molt->atoms.nr);
 715         for (ftype = 0; ftype < F_NRE; ftype++)
 716         {
 717             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 718             {
 719                 int nral;
 720
 721                 nral = NRAL(ftype);
 722                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 723                 {
 724                     int a;
 725
 726                     for (a = 0; a < nral; a++)
 727                     {
 728                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 729                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 730                     }
 731                 }
 732             }
 733         }
 734
 735         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 736         {
 737             cgm = m*cgs->nr;
 738             am  = m*cgs->index[cgs->nr];
 739             for (cg = 0; cg < cgs->nr; cg++)
 740             {
 741                 a0 = cgs->index[cg];
 742                 a1 = cgs->index[cg+1];
 743
 744                 /* Store the energy group in cginfo */
 745                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 746                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 747
 748                 /* Check the intra/inter charge group exclusions */
 749                 if (a1-a0 > excl_nalloc)
 750                 {
 751                     excl_nalloc = a1 - a0;
 752                     srenew(bExcl, excl_nalloc);
 753                 }
 754                 /* bExclIntraAll: all intra cg interactions excluded
 755                  * bExclInter:    any inter cg interactions excluded
 756                  */
 757                 bExclIntraAll       = TRUE;
 758                 bExclInter          = FALSE;
 759                 bHaveVDW            = FALSE;
 760                 bHaveQ              = FALSE;
 761                 bHavePerturbedAtoms = FALSE;
 762                 for (ai = a0; ai < a1; ai++)
 763                 {
 764                     /* Check VDW and electrostatic interactions */
 765                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 766                                             type_VDW[molt->atoms.atom[ai].typeB]);
 767                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 768                                             molt->atoms.atom[ai].qB != 0);
 769
 770                     bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
 771
 772                     /* Clear the exclusion list for atom ai */
 773                     for (aj = a0; aj < a1; aj++)
 774                     {
 775                         bExcl[aj-a0] = FALSE;
 776                     }
 777                     /* Loop over all the exclusions of atom ai */
 778                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 779                     {
 780                         aj = excl->a[j];
 781                         if (aj < a0 || aj >= a1)
 782                         {
 783                             bExclInter = TRUE;
 784                         }
 785                         else
 786                         {
 787                             bExcl[aj-a0] = TRUE;
 788                         }
 789                     }
 790                     /* Check if ai excludes a0 to a1 */
 791                     for (aj = a0; aj < a1; aj++)
 792                     {
 793                         if (!bExcl[aj-a0])
 794                         {
 795                             bExclIntraAll = FALSE;
 796                         }
 797                     }
 798
 799                     switch (a_con[ai])
 800                     {
 801                         case acCONSTRAINT:
 802                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 803                             break;
 804                         case acSETTLE:
 805                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 806                             break;
 807                         default:
 808                             break;
 809                     }
 810                 }
 811                 if (bExclIntraAll)
 812                 {
 813                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 814                 }
 815                 if (bExclInter)
 816                 {
 817                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 818                 }
 819                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 820                 {
 821                     /* The size in cginfo is currently only read with DD */
 822                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 823                 }
 824                 if (bHaveVDW)
 825                 {
 826                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 827                 }
 828                 if (bHaveQ)
 829                 {
 830                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 831                 }
 832                 if (bHavePerturbedAtoms && fr->efep != efepNO)
 833                 {
 834                     SET_CGINFO_FEP(cginfo[cgm+cg]);
 835                     *bFEP_NonBonded = TRUE;
 836                 }
 837                 /* Store the charge group size */
 838                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 839
 840                 if (!bExclIntraAll || bExclInter)
 841                 {
 842                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 843                 }
 844             }
 845         }
 846
 847         sfree(a_con);
 848
 849         cg_offset += molb->nmol*cgs->nr;
 850         a_offset  += molb->nmol*cgs->index[cgs->nr];
 851     }
 852     sfree(bExcl);
 853
 854     /* the solvent optimizer is called after the QM is initialized,
 855      * because we don't want to have the QM subsystemto become an
 856      * optimized solvent
 857      */
 858
 859     check_solvent(fplog, mtop, fr, cginfo_mb);
 860
 861     if (getenv("GMX_NO_SOLV_OPT"))
 862     {
 863         if (fplog)
 864         {
 865             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 866                     "Disabling all solvent optimization\n");
 867         }
 868         fr->solvent_opt = esolNO;
 869     }
 870     if (bNoSolvOpt)
 871     {
 872         fr->solvent_opt = esolNO;
 873     }
 874     if (!fr->solvent_opt)
 875     {
 876         for (mb = 0; mb < mtop->nmolblock; mb++)
 877         {
 878             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 879             {
 880                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 881             }
 882         }
 883     }
 884
 885     return cginfo_mb;
 886 }
 887
 888 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 889 {
 890     int  ncg, mb, cg;
 891     int *cginfo;
 892
 893     ncg = cgi_mb[nmb-1].cg_end;
 894     snew(cginfo, ncg);
 895     mb = 0;
 896     for (cg = 0; cg < ncg; cg++)
 897     {
 898         while (cg >= cgi_mb[mb].cg_end)
 899         {
 900             mb++;
 901         }
 902         cginfo[cg] =
 903             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 904     }
 905
 906     return cginfo;
 907 }
 908
 909 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 910 {
 911     /*This now calculates sum for q and c6*/
 912     double         qsum, q2sum, q, c6sum, c6;
 913     int            mb, nmol, i;
 914     const t_atoms *atoms;
 915
 916     qsum   = 0;
 917     q2sum  = 0;
 918     c6sum  = 0;
 919     for (mb = 0; mb < mtop->nmolblock; mb++)
 920     {
 921         nmol  = mtop->molblock[mb].nmol;
 922         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 923         for (i = 0; i < atoms->nr; i++)
 924         {
 925             q       = atoms->atom[i].q;
 926             qsum   += nmol*q;
 927             q2sum  += nmol*q*q;
 928             c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 929             c6sum  += nmol*c6;
 930         }
 931     }
 932     fr->qsum[0]   = qsum;
 933     fr->q2sum[0]  = q2sum;
 934     fr->c6sum[0]  = c6sum;
 935
 936     if (fr->efep != efepNO)
 937     {
 938         qsum   = 0;
 939         q2sum  = 0;
 940         c6sum  = 0;
 941         for (mb = 0; mb < mtop->nmolblock; mb++)
 942         {
 943             nmol  = mtop->molblock[mb].nmol;
 944             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 945             for (i = 0; i < atoms->nr; i++)
 946             {
 947                 q       = atoms->atom[i].qB;
 948                 qsum   += nmol*q;
 949                 q2sum  += nmol*q*q;
 950                 c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 951                 c6sum  += nmol*c6;
 952             }
 953             fr->qsum[1]   = qsum;
 954             fr->q2sum[1]  = q2sum;
 955             fr->c6sum[1]  = c6sum;
 956         }
 957     }
 958     else
 959     {
 960         fr->qsum[1]   = fr->qsum[0];
 961         fr->q2sum[1]  = fr->q2sum[0];
 962         fr->c6sum[1]  = fr->c6sum[0];
 963     }
 964     if (log)
 965     {
 966         if (fr->efep == efepNO)
 967         {
 968             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 969         }
 970         else
 971         {
 972             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 973                     fr->qsum[0], fr->qsum[1]);
 974         }
 975     }
 976 }
 977
 978 void update_forcerec(t_forcerec *fr, matrix box)
 979 {
 980     if (fr->eeltype == eelGRF)
 981     {
 982         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 983                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 984                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 985     }
 986 }
 987
 988 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 989 {
 990     const t_atoms  *atoms, *atoms_tpi;
 991     const t_blocka *excl;
 992     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 993     gmx_int64_t     npair, npair_ij, tmpi, tmpj;
 994     double          csix, ctwelve;
 995     int             ntp, *typecount;
 996     gmx_bool        bBHAM;
 997     real           *nbfp;
 998     real           *nbfp_comb = NULL;
 999
1000     ntp   = fr->ntype;
1001     bBHAM = fr->bBHAM;
1002     nbfp  = fr->nbfp;
1003
1004     /* For LJ-PME, we want to correct for the difference between the
1005      * actual C6 values and the C6 values used by the LJ-PME based on
1006      * combination rules. */
1007
1008     if (EVDW_PME(fr->vdwtype))
1009     {
1010         nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1011                                              (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1012         for (tpi = 0; tpi < ntp; ++tpi)
1013         {
1014             for (tpj = 0; tpj < ntp; ++tpj)
1015             {
1016                 C6(nbfp_comb, ntp, tpi, tpj) =
1017                     C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1018                 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1019             }
1020         }
1021         nbfp = nbfp_comb;
1022     }
1023     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1024     {
1025         csix    = 0;
1026         ctwelve = 0;
1027         npair   = 0;
1028         nexcl   = 0;
1029         if (!fr->n_tpi)
1030         {
1031             /* Count the types so we avoid natoms^2 operations */
1032             snew(typecount, ntp);
1033             gmx_mtop_count_atomtypes(mtop, q, typecount);
1034
1035             for (tpi = 0; tpi < ntp; tpi++)
1036             {
1037                 for (tpj = tpi; tpj < ntp; tpj++)
1038                 {
1039                     tmpi = typecount[tpi];
1040                     tmpj = typecount[tpj];
1041                     if (tpi != tpj)
1042                     {
1043                         npair_ij = tmpi*tmpj;
1044                     }
1045                     else
1046                     {
1047                         npair_ij = tmpi*(tmpi - 1)/2;
1048                     }
1049                     if (bBHAM)
1050                     {
1051                         /* nbfp now includes the 6.0 derivative prefactor */
1052                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1053                     }
1054                     else
1055                     {
1056                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
1057                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
1058                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
1059                     }
1060                     npair += npair_ij;
1061                 }
1062             }
1063             sfree(typecount);
1064             /* Subtract the excluded pairs.
1065              * The main reason for substracting exclusions is that in some cases
1066              * some combinations might never occur and the parameters could have
1067              * any value. These unused values should not influence the dispersion
1068              * correction.
1069              */
1070             for (mb = 0; mb < mtop->nmolblock; mb++)
1071             {
1072                 nmol  = mtop->molblock[mb].nmol;
1073                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1074                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
1075                 for (i = 0; (i < atoms->nr); i++)
1076                 {
1077                     if (q == 0)
1078                     {
1079                         tpi = atoms->atom[i].type;
1080                     }
1081                     else
1082                     {
1083                         tpi = atoms->atom[i].typeB;
1084                     }
1085                     j1  = excl->index[i];
1086                     j2  = excl->index[i+1];
1087                     for (j = j1; j < j2; j++)
1088                     {
1089                         k = excl->a[j];
1090                         if (k > i)
1091                         {
1092                             if (q == 0)
1093                             {
1094                                 tpj = atoms->atom[k].type;
1095                             }
1096                             else
1097                             {
1098                                 tpj = atoms->atom[k].typeB;
1099                             }
1100                             if (bBHAM)
1101                             {
1102                                 /* nbfp now includes the 6.0 derivative prefactor */
1103                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1104                             }
1105                             else
1106                             {
1107                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1108                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1109                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1110                             }
1111                             nexcl += nmol;
1112                         }
1113                     }
1114                 }
1115             }
1116         }
1117         else
1118         {
1119             /* Only correct for the interaction of the test particle
1120              * with the rest of the system.
1121              */
1122             atoms_tpi =
1123                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1124
1125             npair = 0;
1126             for (mb = 0; mb < mtop->nmolblock; mb++)
1127             {
1128                 nmol  = mtop->molblock[mb].nmol;
1129                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1130                 for (j = 0; j < atoms->nr; j++)
1131                 {
1132                     nmolc = nmol;
1133                     /* Remove the interaction of the test charge group
1134                      * with itself.
1135                      */
1136                     if (mb == mtop->nmolblock-1)
1137                     {
1138                         nmolc--;
1139
1140                         if (mb == 0 && nmol == 1)
1141                         {
1142                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1143                         }
1144                     }
1145                     if (q == 0)
1146                     {
1147                         tpj = atoms->atom[j].type;
1148                     }
1149                     else
1150                     {
1151                         tpj = atoms->atom[j].typeB;
1152                     }
1153                     for (i = 0; i < fr->n_tpi; i++)
1154                     {
1155                         if (q == 0)
1156                         {
1157                             tpi = atoms_tpi->atom[i].type;
1158                         }
1159                         else
1160                         {
1161                             tpi = atoms_tpi->atom[i].typeB;
1162                         }
1163                         if (bBHAM)
1164                         {
1165                             /* nbfp now includes the 6.0 derivative prefactor */
1166                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1167                         }
1168                         else
1169                         {
1170                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1171                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1172                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1173                         }
1174                         npair += nmolc;
1175                     }
1176                 }
1177             }
1178         }
1179         if (npair - nexcl <= 0 && fplog)
1180         {
1181             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1182             csix     = 0;
1183             ctwelve  = 0;
1184         }
1185         else
1186         {
1187             csix    /= npair - nexcl;
1188             ctwelve /= npair - nexcl;
1189         }
1190         if (debug)
1191         {
1192             fprintf(debug, "Counted %d exclusions\n", nexcl);
1193             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1194             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1195         }
1196         fr->avcsix[q]    = csix;
1197         fr->avctwelve[q] = ctwelve;
1198     }
1199
1200     if (EVDW_PME(fr->vdwtype))
1201     {
1202         sfree(nbfp_comb);
1203     }
1204
1205     if (fplog != NULL)
1206     {
1207         if (fr->eDispCorr == edispcAllEner ||
1208             fr->eDispCorr == edispcAllEnerPres)
1209         {
1210             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1211                     fr->avcsix[0], fr->avctwelve[0]);
1212         }
1213         else
1214         {
1215             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1216         }
1217     }
1218 }
1219
1220
1221 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1222                            const gmx_mtop_t *mtop)
1223 {
1224     const t_atoms *at1, *at2;
1225     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1226     real           b, bmin;
1227     real          *nbfp;
1228
1229     if (fplog)
1230     {
1231         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1232     }
1233     nbfp   = fr->nbfp;
1234     ntypes = fr->ntype;
1235
1236     bmin           = -1;
1237     fr->bham_b_max = 0;
1238     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1239     {
1240         at1 = &mtop->moltype[mt1].atoms;
1241         for (i = 0; (i < at1->nr); i++)
1242         {
1243             tpi = at1->atom[i].type;
1244             if (tpi >= ntypes)
1245             {
1246                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1247             }
1248
1249             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1250             {
1251                 at2 = &mtop->moltype[mt2].atoms;
1252                 for (j = 0; (j < at2->nr); j++)
1253                 {
1254                     tpj = at2->atom[j].type;
1255                     if (tpj >= ntypes)
1256                     {
1257                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1258                     }
1259                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1260                     if (b > fr->bham_b_max)
1261                     {
1262                         fr->bham_b_max = b;
1263                     }
1264                     if ((b < bmin) || (bmin == -1))
1265                     {
1266                         bmin = b;
1267                     }
1268                 }
1269             }
1270         }
1271     }
1272     if (fplog)
1273     {
1274         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1275                 bmin, fr->bham_b_max);
1276     }
1277 }
1278
1279 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1280                             t_forcerec *fr, real rtab,
1281                             const t_commrec *cr,
1282                             const char *tabfn, char *eg1, char *eg2,
1283                             t_nblists *nbl)
1284 {
1285     char buf[STRLEN];
1286     int  i, j;
1287
1288     if (tabfn == NULL)
1289     {
1290         if (debug)
1291         {
1292             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1293         }
1294         return;
1295     }
1296
1297     sprintf(buf, "%s", tabfn);
1298     if (eg1 && eg2)
1299     {
1300         /* Append the two energy group names */
1301         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1302                 eg1, eg2, ftp2ext(efXVG));
1303     }
1304     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1305     /* Copy the contents of the table to separate coulomb and LJ tables too,
1306      * to improve cache performance.
1307      */
1308     /* For performance reasons we want
1309      * the table data to be aligned to 16-byte. The pointers could be freed
1310      * but currently aren't.
1311      */
1312     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1313     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1314     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1315     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1316     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1317     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1318     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1319     nbl->table_elec.ninteractions = 1;
1320     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1321     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1322
1323     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1324     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1325     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1326     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1327     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1328     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1329     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1330     nbl->table_vdw.ninteractions = 2;
1331     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1332     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1333
1334     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1335     {
1336         for (j = 0; j < 4; j++)
1337         {
1338             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1339         }
1340         for (j = 0; j < 8; j++)
1341         {
1342             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1343         }
1344     }
1345 }
1346
1347 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1348                          int *ncount, int **count)
1349 {
1350     const gmx_moltype_t *molt;
1351     const t_ilist       *il;
1352     int                  mt, ftype, stride, i, j, tabnr;
1353
1354     for (mt = 0; mt < mtop->nmoltype; mt++)
1355     {
1356         molt = &mtop->moltype[mt];
1357         for (ftype = 0; ftype < F_NRE; ftype++)
1358         {
1359             if (ftype == ftype1 || ftype == ftype2)
1360             {
1361                 il     = &molt->ilist[ftype];
1362                 stride = 1 + NRAL(ftype);
1363                 for (i = 0; i < il->nr; i += stride)
1364                 {
1365                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1366                     if (tabnr < 0)
1367                     {
1368                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1369                     }
1370                     if (tabnr >= *ncount)
1371                     {
1372                         srenew(*count, tabnr+1);
1373                         for (j = *ncount; j < tabnr+1; j++)
1374                         {
1375                             (*count)[j] = 0;
1376                         }
1377                         *ncount = tabnr+1;
1378                     }
1379                     (*count)[tabnr]++;
1380                 }
1381             }
1382         }
1383     }
1384 }
1385
1386 static bondedtable_t *make_bonded_tables(FILE *fplog,
1387                                          int ftype1, int ftype2,
1388                                          const gmx_mtop_t *mtop,
1389                                          const char *basefn, const char *tabext)
1390 {
1391     int            i, ncount, *count;
1392     char           tabfn[STRLEN];
1393     bondedtable_t *tab;
1394
1395     tab = NULL;
1396
1397     ncount = 0;
1398     count  = NULL;
1399     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1400
1401     if (ncount > 0)
1402     {
1403         snew(tab, ncount);
1404         for (i = 0; i < ncount; i++)
1405         {
1406             if (count[i] > 0)
1407             {
1408                 sprintf(tabfn, "%s", basefn);
1409                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1410                         tabext, i, ftp2ext(efXVG));
1411                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1412             }
1413         }
1414         sfree(count);
1415     }
1416
1417     return tab;
1418 }
1419
1420 void forcerec_set_ranges(t_forcerec *fr,
1421                          int ncg_home, int ncg_force,
1422                          int natoms_force,
1423                          int natoms_force_constr, int natoms_f_novirsum)
1424 {
1425     fr->cg0 = 0;
1426     fr->hcg = ncg_home;
1427
1428     /* fr->ncg_force is unused in the standard code,
1429      * but it can be useful for modified code dealing with charge groups.
1430      */
1431     fr->ncg_force           = ncg_force;
1432     fr->natoms_force        = natoms_force;
1433     fr->natoms_force_constr = natoms_force_constr;
1434
1435     if (fr->natoms_force_constr > fr->nalloc_force)
1436     {
1437         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1438
1439         if (fr->bTwinRange)
1440         {
1441             srenew(fr->f_twin, fr->nalloc_force);
1442         }
1443     }
1444
1445     if (fr->bF_NoVirSum)
1446     {
1447         fr->f_novirsum_n = natoms_f_novirsum;
1448         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1449         {
1450             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1451             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1452         }
1453     }
1454     else
1455     {
1456         fr->f_novirsum_n = 0;
1457     }
1458 }
1459
1460 static real cutoff_inf(real cutoff)
1461 {
1462     if (cutoff == 0)
1463     {
1464         cutoff = GMX_CUTOFF_INF;
1465     }
1466
1467     return cutoff;
1468 }
1469
1470 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1471                                   t_forcerec *fr, const t_inputrec *ir,
1472                                   const char *tabfn, const gmx_mtop_t *mtop,
1473                                   matrix     box)
1474 {
1475     char buf[STRLEN];
1476     int  i, j;
1477
1478     if (tabfn == NULL)
1479     {
1480         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1481         return;
1482     }
1483
1484     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1485
1486     sprintf(buf, "%s", tabfn);
1487     for (i = 0; i < ir->adress->n_tf_grps; i++)
1488     {
1489         j = ir->adress->tf_table_index[i]; /* get energy group index */
1490         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1491                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1492         if (fp)
1493         {
1494             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1495         }
1496         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1497     }
1498
1499 }
1500
1501 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1502 {
1503     gmx_bool bAllvsAll;
1504
1505     bAllvsAll =
1506         (
1507             ir->rlist == 0            &&
1508             ir->rcoulomb == 0         &&
1509             ir->rvdw == 0             &&
1510             ir->ePBC == epbcNONE      &&
1511             ir->vdwtype == evdwCUT    &&
1512             ir->coulombtype == eelCUT &&
1513             ir->efep == efepNO        &&
1514             (ir->implicit_solvent == eisNO ||
1515              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1516                                                   ir->gb_algorithm == egbHCT   ||
1517                                                   ir->gb_algorithm == egbOBC))) &&
1518             getenv("GMX_NO_ALLVSALL") == NULL
1519         );
1520
1521     if (bAllvsAll && ir->opts.ngener > 1)
1522     {
1523         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1524
1525         if (bPrintNote)
1526         {
1527             if (MASTER(cr))
1528             {
1529                 fprintf(stderr, "\n%s\n", note);
1530             }
1531             if (fp != NULL)
1532             {
1533                 fprintf(fp, "\n%s\n", note);
1534             }
1535         }
1536         bAllvsAll = FALSE;
1537     }
1538
1539     if (bAllvsAll && fp && MASTER(cr))
1540     {
1541         fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1542     }
1543
1544     return bAllvsAll;
1545 }
1546
1547
1548 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1549 {
1550     int t, i;
1551
1552     /* These thread local data structures are used for bondeds only */
1553     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1554
1555     if (fr->nthreads > 1)
1556     {
1557         snew(fr->f_t, fr->nthreads);
1558         /* Thread 0 uses the global force and energy arrays */
1559         for (t = 1; t < fr->nthreads; t++)
1560         {
1561             fr->f_t[t].f        = NULL;
1562             fr->f_t[t].f_nalloc = 0;
1563             snew(fr->f_t[t].fshift, SHIFTS);
1564             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1565             for (i = 0; i < egNR; i++)
1566             {
1567                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1568             }
1569         }
1570     }
1571 }
1572
1573
1574 gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
1575                                       const t_commrec  *cr,
1576                                       const t_inputrec *ir,
1577                                       gmx_bool          bGPU)
1578 {
1579     if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1580     {
1581         md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1582                       bGPU ? "GPUs" : "SIMD kernels",
1583                       bGPU ? "CPU only" : "plain-C kernels");
1584         return FALSE;
1585     }
1586
1587     return TRUE;
1588 }
1589
1590
1591 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1592                                   int                         *kernel_type,
1593                                   int                         *ewald_excl)
1594 {
1595     *kernel_type = nbnxnk4x4_PlainC;
1596     *ewald_excl  = ewaldexclTable;
1597
1598 #ifdef GMX_NBNXN_SIMD
1599     {
1600 #ifdef GMX_NBNXN_SIMD_4XN
1601         *kernel_type = nbnxnk4xN_SIMD_4xN;
1602 #endif
1603 #ifdef GMX_NBNXN_SIMD_2XNN
1604         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1605 #endif
1606
1607 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1608         /* We need to choose if we want 2x(N+N) or 4xN kernels.
1609          * Currently this is based on the SIMD acceleration choice,
1610          * but it might be better to decide this at runtime based on CPU.
1611          *
1612          * 4xN calculates more (zero) interactions, but has less pair-search
1613          * work and much better kernel instruction scheduling.
1614          *
1615          * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1616          * which doesn't have FMA, both the analytical and tabulated Ewald
1617          * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1618          * 2x(4+4) because it results in significantly fewer pairs.
1619          * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1620          * 10% with HT, 50% without HT. As we currently don't detect the actual
1621          * use of HT, use 4x8 to avoid a potential performance hit.
1622          * On Intel Haswell 4x8 is always faster.
1623          */
1624         *kernel_type = nbnxnk4xN_SIMD_4xN;
1625
1626 #ifndef GMX_SIMD_HAVE_FMA
1627         if (EEL_PME_EWALD(ir->coulombtype) ||
1628             EVDW_PME(ir->vdwtype))
1629         {
1630             /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1631              * There are enough instructions to make 2x(4+4) efficient.
1632              */
1633             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1634         }
1635 #endif
1636 #endif  /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1637
1638
1639         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1640         {
1641 #ifdef GMX_NBNXN_SIMD_4XN
1642             *kernel_type = nbnxnk4xN_SIMD_4xN;
1643 #else
1644             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1645 #endif
1646         }
1647         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1648         {
1649 #ifdef GMX_NBNXN_SIMD_2XNN
1650             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1651 #else
1652             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1653 #endif
1654         }
1655
1656         /* Analytical Ewald exclusion correction is only an option in
1657          * the SIMD kernel.
1658          * Since table lookup's don't parallelize with SIMD, analytical
1659          * will probably always be faster for a SIMD width of 8 or more.
1660          * With FMA analytical is sometimes faster for a width if 4 as well.
1661          * On BlueGene/Q, this is faster regardless of precision.
1662          * In single precision, this is faster on Bulldozer.
1663          */
1664 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1665         (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1666         defined GMX_SIMD_IBM_QPX
1667         *ewald_excl = ewaldexclAnalytical;
1668 #endif
1669         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1670         {
1671             *ewald_excl = ewaldexclTable;
1672         }
1673         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1674         {
1675             *ewald_excl = ewaldexclAnalytical;
1676         }
1677
1678     }
1679 #endif /* GMX_NBNXN_SIMD */
1680 }
1681
1682
1683 const char *lookup_nbnxn_kernel_name(int kernel_type)
1684 {
1685     const char *returnvalue = NULL;
1686     switch (kernel_type)
1687     {
1688         case nbnxnkNotSet:
1689             returnvalue = "not set";
1690             break;
1691         case nbnxnk4x4_PlainC:
1692             returnvalue = "plain C";
1693             break;
1694         case nbnxnk4xN_SIMD_4xN:
1695         case nbnxnk4xN_SIMD_2xNN:
1696 #ifdef GMX_NBNXN_SIMD
1697 #if defined GMX_SIMD_X86_SSE2
1698             returnvalue = "SSE2";
1699 #elif defined GMX_SIMD_X86_SSE4_1
1700             returnvalue = "SSE4.1";
1701 #elif defined GMX_SIMD_X86_AVX_128_FMA
1702             returnvalue = "AVX_128_FMA";
1703 #elif defined GMX_SIMD_X86_AVX_256
1704             returnvalue = "AVX_256";
1705 #elif defined GMX_SIMD_X86_AVX2_256
1706             returnvalue = "AVX2_256";
1707 #else
1708             returnvalue = "SIMD";
1709 #endif
1710 #else  /* GMX_NBNXN_SIMD */
1711             returnvalue = "not available";
1712 #endif /* GMX_NBNXN_SIMD */
1713             break;
1714         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1715         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1716
1717         case nbnxnkNR:
1718         default:
1719             gmx_fatal(FARGS, "Illegal kernel type selected");
1720             returnvalue = NULL;
1721             break;
1722     }
1723     return returnvalue;
1724 };
1725
1726 static void pick_nbnxn_kernel(FILE                *fp,
1727                               const t_commrec     *cr,
1728                               gmx_bool             use_simd_kernels,
1729                               gmx_bool             bUseGPU,
1730                               gmx_bool             bEmulateGPU,
1731                               const t_inputrec    *ir,
1732                               int                 *kernel_type,
1733                               int                 *ewald_excl,
1734                               gmx_bool             bDoNonbonded)
1735 {
1736     assert(kernel_type);
1737
1738     *kernel_type = nbnxnkNotSet;
1739     *ewald_excl  = ewaldexclTable;
1740
1741     if (bEmulateGPU)
1742     {
1743         *kernel_type = nbnxnk8x8x8_PlainC;
1744
1745         if (bDoNonbonded)
1746         {
1747             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1748         }
1749     }
1750     else if (bUseGPU)
1751     {
1752         *kernel_type = nbnxnk8x8x8_CUDA;
1753     }
1754
1755     if (*kernel_type == nbnxnkNotSet)
1756     {
1757         /* LJ PME with LB combination rule does 7 mesh operations.
1758          * This so slow that we don't compile SIMD non-bonded kernels for that.
1759          */
1760         if (use_simd_kernels &&
1761             nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1762         {
1763             pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1764         }
1765         else
1766         {
1767             *kernel_type = nbnxnk4x4_PlainC;
1768         }
1769     }
1770
1771     if (bDoNonbonded && fp != NULL)
1772     {
1773         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1774                 lookup_nbnxn_kernel_name(*kernel_type),
1775                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1776                 nbnxn_kernel_to_cj_size(*kernel_type));
1777     }
1778 }
1779
1780 static void pick_nbnxn_resources(const t_commrec     *cr,
1781                                  const gmx_hw_info_t *hwinfo,
1782                                  gmx_bool             bDoNonbonded,
1783                                  gmx_bool            *bUseGPU,
1784                                  gmx_bool            *bEmulateGPU,
1785                                  const gmx_gpu_opt_t *gpu_opt)
1786 {
1787     gmx_bool bEmulateGPUEnvVarSet;
1788     char     gpu_err_str[STRLEN];
1789
1790     *bUseGPU = FALSE;
1791
1792     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1793
1794     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1795      * GPUs (currently) only handle non-bonded calculations, we will
1796      * automatically switch to emulation if non-bonded calculations are
1797      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1798      * way to turn off GPU initialization, data movement, and cleanup.
1799      *
1800      * GPU emulation can be useful to assess the performance one can expect by
1801      * adding GPU(s) to the machine. The conditional below allows this even
1802      * if mdrun is compiled without GPU acceleration support.
1803      * Note that you should freezing the system as otherwise it will explode.
1804      */
1805     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1806                     (!bDoNonbonded &&
1807                      gpu_opt->ncuda_dev_use > 0));
1808
1809     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1810      */
1811     if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1812     {
1813         /* Each PP node will use the intra-node id-th device from the
1814          * list of detected/selected GPUs. */
1815         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1816                       &hwinfo->gpu_info, gpu_opt))
1817         {
1818             /* At this point the init should never fail as we made sure that
1819              * we have all the GPUs we need. If it still does, we'll bail. */
1820             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1821                       cr->nodeid,
1822                       get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1823                                         cr->rank_pp_intranode),
1824                       gpu_err_str);
1825         }
1826
1827         /* Here we actually turn on hardware GPU acceleration */
1828         *bUseGPU = TRUE;
1829     }
1830 }
1831
1832 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1833                             nonbonded_verlet_t *nbv,
1834                             int                 group)
1835 {
1836     gmx_bool bUsesSimpleTables = TRUE;
1837     int      grp_index;
1838
1839     switch (cutoff_scheme)
1840     {
1841         case ecutsGROUP:
1842             bUsesSimpleTables = TRUE;
1843             break;
1844         case ecutsVERLET:
1845             assert(NULL != nbv && NULL != nbv->grp);
1846             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1847             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1848             break;
1849         default:
1850             gmx_incons("unimplemented");
1851     }
1852     return bUsesSimpleTables;
1853 }
1854
1855 static void init_ewald_f_table(interaction_const_t *ic,
1856                                gmx_bool             bUsesSimpleTables,
1857                                real                 rtab)
1858 {
1859     real maxr;
1860
1861     if (bUsesSimpleTables)
1862     {
1863         /* With a spacing of 0.0005 we are at the force summation accuracy
1864          * for the SSE kernels for "normal" atomistic simulations.
1865          */
1866         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1867                                                    ic->rcoulomb);
1868
1869         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1870         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1871     }
1872     else
1873     {
1874         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1875         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1876         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1877     }
1878
1879     sfree_aligned(ic->tabq_coul_FDV0);
1880     sfree_aligned(ic->tabq_coul_F);
1881     sfree_aligned(ic->tabq_coul_V);
1882
1883     sfree_aligned(ic->tabq_vdw_FDV0);
1884     sfree_aligned(ic->tabq_vdw_F);
1885     sfree_aligned(ic->tabq_vdw_V);
1886
1887     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1888     {
1889         /* Create the original table data in FDV0 */
1890         snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1891         snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1892         snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1893         table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1894                                     ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1895     }
1896
1897     if (EVDW_PME(ic->vdwtype))
1898     {
1899         snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1900         snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1901         snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1902         table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1903                                     ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1904     }
1905 }
1906
1907 void init_interaction_const_tables(FILE                *fp,
1908                                    interaction_const_t *ic,
1909                                    gmx_bool             bUsesSimpleTables,
1910                                    real                 rtab)
1911 {
1912     real spacing;
1913
1914     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1915     {
1916         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1917
1918         if (fp != NULL)
1919         {
1920             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1921                     1/ic->tabq_scale, ic->tabq_size);
1922         }
1923     }
1924 }
1925
1926 static void clear_force_switch_constants(shift_consts_t *sc)
1927 {
1928     sc->c2   = 0;
1929     sc->c3   = 0;
1930     sc->cpot = 0;
1931 }
1932
1933 static void force_switch_constants(real p,
1934                                    real rsw, real rc,
1935                                    shift_consts_t *sc)
1936 {
1937     /* Here we determine the coefficient for shifting the force to zero
1938      * between distance rsw and the cut-off rc.
1939      * For a potential of r^-p, we have force p*r^-(p+1).
1940      * But to save flops we absorb p in the coefficient.
1941      * Thus we get:
1942      * force/p   = r^-(p+1) + c2*r^2 + c3*r^3
1943      * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1944      */
1945     sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1946     sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1947     sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1948 }
1949
1950 static void potential_switch_constants(real rsw, real rc,
1951                                        switch_consts_t *sc)
1952 {
1953     /* The switch function is 1 at rsw and 0 at rc.
1954      * The derivative and second derivate are zero at both ends.
1955      * rsw        = max(r - r_switch, 0)
1956      * sw         = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1957      * dsw        = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1958      * force      = force*dsw - potential*sw
1959      * potential *= sw
1960      */
1961     sc->c3 = -10*pow(rc - rsw, -3);
1962     sc->c4 =  15*pow(rc - rsw, -4);
1963     sc->c5 =  -6*pow(rc - rsw, -5);
1964 }
1965
1966 static void
1967 init_interaction_const(FILE                       *fp,
1968                        const t_commrec gmx_unused *cr,
1969                        interaction_const_t       **interaction_const,
1970                        const t_forcerec           *fr,
1971                        real                        rtab)
1972 {
1973     interaction_const_t *ic;
1974     gmx_bool             bUsesSimpleTables = TRUE;
1975
1976     snew(ic, 1);
1977
1978     /* Just allocate something so we can free it */
1979     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1980     snew_aligned(ic->tabq_coul_F, 16, 32);
1981     snew_aligned(ic->tabq_coul_V, 16, 32);
1982
1983     ic->rlist           = fr->rlist;
1984     ic->rlistlong       = fr->rlistlong;
1985
1986     /* Lennard-Jones */
1987     ic->vdwtype         = fr->vdwtype;
1988     ic->vdw_modifier    = fr->vdw_modifier;
1989     ic->rvdw            = fr->rvdw;
1990     ic->rvdw_switch     = fr->rvdw_switch;
1991     ic->ewaldcoeff_lj   = fr->ewaldcoeff_lj;
1992     ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1993     ic->sh_lj_ewald     = 0;
1994     clear_force_switch_constants(&ic->dispersion_shift);
1995     clear_force_switch_constants(&ic->repulsion_shift);
1996
1997     switch (ic->vdw_modifier)
1998     {
1999         case eintmodPOTSHIFT:
2000             /* Only shift the potential, don't touch the force */
2001             ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2002             ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
2003             if (EVDW_PME(ic->vdwtype))
2004             {
2005                 real crc2;
2006
2007                 crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2008                 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2009             }
2010             break;
2011         case eintmodFORCESWITCH:
2012             /* Switch the force, switch and shift the potential */
2013             force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2014                                    &ic->dispersion_shift);
2015             force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2016                                    &ic->repulsion_shift);
2017             break;
2018         case eintmodPOTSWITCH:
2019             /* Switch the potential and force */
2020             potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2021                                        &ic->vdw_switch);
2022             break;
2023         case eintmodNONE:
2024         case eintmodEXACTCUTOFF:
2025             /* Nothing to do here */
2026             break;
2027         default:
2028             gmx_incons("unimplemented potential modifier");
2029     }
2030
2031     ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2032
2033     /* Electrostatics */
2034     ic->eeltype          = fr->eeltype;
2035     ic->coulomb_modifier = fr->coulomb_modifier;
2036     ic->rcoulomb         = fr->rcoulomb;
2037     ic->epsilon_r        = fr->epsilon_r;
2038     ic->epsfac           = fr->epsfac;
2039     ic->ewaldcoeff_q     = fr->ewaldcoeff_q;
2040
2041     if (fr->coulomb_modifier == eintmodPOTSHIFT)
2042     {
2043         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2044     }
2045     else
2046     {
2047         ic->sh_ewald = 0;
2048     }
2049
2050     /* Reaction-field */
2051     if (EEL_RF(ic->eeltype))
2052     {
2053         ic->epsilon_rf = fr->epsilon_rf;
2054         ic->k_rf       = fr->k_rf;
2055         ic->c_rf       = fr->c_rf;
2056     }
2057     else
2058     {
2059         /* For plain cut-off we might use the reaction-field kernels */
2060         ic->epsilon_rf = ic->epsilon_r;
2061         ic->k_rf       = 0;
2062         if (fr->coulomb_modifier == eintmodPOTSHIFT)
2063         {
2064             ic->c_rf   = 1/ic->rcoulomb;
2065         }
2066         else
2067         {
2068             ic->c_rf   = 0;
2069         }
2070     }
2071
2072     if (fp != NULL)
2073     {
2074         real dispersion_shift;
2075
2076         dispersion_shift = ic->dispersion_shift.cpot;
2077         if (EVDW_PME(ic->vdwtype))
2078         {
2079             dispersion_shift -= ic->sh_lj_ewald;
2080         }
2081         fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2082                 ic->repulsion_shift.cpot, dispersion_shift);
2083
2084         if (ic->eeltype == eelCUT)
2085         {
2086             fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2087         }
2088         else if (EEL_PME(ic->eeltype))
2089         {
2090             fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2091         }
2092         fprintf(fp, "\n");
2093     }
2094
2095     *interaction_const = ic;
2096
2097     if (fr->nbv != NULL && fr->nbv->bUseGPU)
2098     {
2099         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2100
2101         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2102          * also sharing texture references. To keep the code simple, we don't
2103          * treat texture references as shared resources, but this means that
2104          * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2105          * Hence, to ensure that the non-bonded kernels don't start before all
2106          * texture binding operations are finished, we need to wait for all ranks
2107          * to arrive here before continuing.
2108          *
2109          * Note that we could omit this barrier if GPUs are not shared (or
2110          * texture objects are used), but as this is initialization code, there
2111          * is not point in complicating things.
2112          */
2113 #ifdef GMX_THREAD_MPI
2114         if (PAR(cr))
2115         {
2116             gmx_barrier(cr);
2117         }
2118 #endif  /* GMX_THREAD_MPI */
2119     }
2120
2121     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2122     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2123 }
2124
2125 static void init_nb_verlet(FILE                *fp,
2126                            nonbonded_verlet_t **nb_verlet,
2127                            gmx_bool             bFEP_NonBonded,
2128                            const t_inputrec    *ir,
2129                            const t_forcerec    *fr,
2130                            const t_commrec     *cr,
2131                            const char          *nbpu_opt)
2132 {
2133     nonbonded_verlet_t *nbv;
2134     int                 i;
2135     char               *env;
2136     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
2137
2138     nbnxn_alloc_t      *nb_alloc;
2139     nbnxn_free_t       *nb_free;
2140
2141     snew(nbv, 1);
2142
2143     pick_nbnxn_resources(cr, fr->hwinfo,
2144                          fr->bNonbonded,
2145                          &nbv->bUseGPU,
2146                          &bEmulateGPU,
2147                          fr->gpu_opt);
2148
2149     nbv->nbs = NULL;
2150
2151     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2152     for (i = 0; i < nbv->ngrp; i++)
2153     {
2154         nbv->grp[i].nbl_lists.nnbl = 0;
2155         nbv->grp[i].nbat           = NULL;
2156         nbv->grp[i].kernel_type    = nbnxnkNotSet;
2157
2158         if (i == 0) /* local */
2159         {
2160             pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2161                               nbv->bUseGPU, bEmulateGPU, ir,
2162                               &nbv->grp[i].kernel_type,
2163                               &nbv->grp[i].ewald_excl,
2164                               fr->bNonbonded);
2165         }
2166         else /* non-local */
2167         {
2168             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2169             {
2170                 /* Use GPU for local, select a CPU kernel for non-local */
2171                 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2172                                   FALSE, FALSE, ir,
2173                                   &nbv->grp[i].kernel_type,
2174                                   &nbv->grp[i].ewald_excl,
2175                                   fr->bNonbonded);
2176
2177                 bHybridGPURun = TRUE;
2178             }
2179             else
2180             {
2181                 /* Use the same kernel for local and non-local interactions */
2182                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2183                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
2184             }
2185         }
2186     }
2187
2188     if (nbv->bUseGPU)
2189     {
2190         /* init the NxN GPU data; the last argument tells whether we'll have
2191          * both local and non-local NB calculation on GPU */
2192         nbnxn_cuda_init(fp, &nbv->cu_nbv,
2193                         &fr->hwinfo->gpu_info, fr->gpu_opt,
2194                         cr->rank_pp_intranode,
2195                         (nbv->ngrp > 1) && !bHybridGPURun);
2196
2197         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2198         {
2199             char *end;
2200
2201             nbv->min_ci_balanced = strtol(env, &end, 10);
2202             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2203             {
2204                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2205             }
2206
2207             if (debug)
2208             {
2209                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2210                         nbv->min_ci_balanced);
2211             }
2212         }
2213         else
2214         {
2215             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2216             if (debug)
2217             {
2218                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2219                         nbv->min_ci_balanced);
2220             }
2221         }
2222     }
2223     else
2224     {
2225         nbv->min_ci_balanced = 0;
2226     }
2227
2228     *nb_verlet = nbv;
2229
2230     nbnxn_init_search(&nbv->nbs,
2231                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2232                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2233                       bFEP_NonBonded,
2234                       gmx_omp_nthreads_get(emntNonbonded));
2235
2236     for (i = 0; i < nbv->ngrp; i++)
2237     {
2238         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2239         {
2240             nb_alloc = &pmalloc;
2241             nb_free  = &pfree;
2242         }
2243         else
2244         {
2245             nb_alloc = NULL;
2246             nb_free  = NULL;
2247         }
2248
2249         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2250                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2251                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2252                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2253                                 nb_alloc, nb_free);
2254
2255         if (i == 0 ||
2256             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2257         {
2258             gmx_bool bSimpleList;
2259             int      enbnxninitcombrule;
2260
2261             bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2262
2263             if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2264             {
2265                 /* Plain LJ cut-off: we can optimize with combination rules */
2266                 enbnxninitcombrule = enbnxninitcombruleDETECT;
2267             }
2268             else if (fr->vdwtype == evdwPME)
2269             {
2270                 /* LJ-PME: we need to use a combination rule for the grid */
2271                 if (fr->ljpme_combination_rule == eljpmeGEOM)
2272                 {
2273                     enbnxninitcombrule = enbnxninitcombruleGEOM;
2274                 }
2275                 else
2276                 {
2277                     enbnxninitcombrule = enbnxninitcombruleLB;
2278                 }
2279             }
2280             else
2281             {
2282                 /* We use a full combination matrix: no rule required */
2283                 enbnxninitcombrule = enbnxninitcombruleNONE;
2284             }
2285
2286
2287             snew(nbv->grp[i].nbat, 1);
2288             nbnxn_atomdata_init(fp,
2289                                 nbv->grp[i].nbat,
2290                                 nbv->grp[i].kernel_type,
2291                                 enbnxninitcombrule,
2292                                 fr->ntype, fr->nbfp,
2293                                 ir->opts.ngener,
2294                                 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2295                                 nb_alloc, nb_free);
2296         }
2297         else
2298         {
2299             nbv->grp[i].nbat = nbv->grp[0].nbat;
2300         }
2301     }
2302 }
2303
2304 void init_forcerec(FILE              *fp,
2305                    const output_env_t oenv,
2306                    t_forcerec        *fr,
2307                    t_fcdata          *fcd,
2308                    const t_inputrec  *ir,
2309                    const gmx_mtop_t  *mtop,
2310                    const t_commrec   *cr,
2311                    matrix             box,
2312                    const char        *tabfn,
2313                    const char        *tabafn,
2314                    const char        *tabpfn,
2315                    const char        *tabbfn,
2316                    const char        *nbpu_opt,
2317                    gmx_bool           bNoSolvOpt,
2318                    real               print_force)
2319 {
2320     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2321     real           rtab;
2322     char          *env;
2323     double         dbl;
2324     const t_block *cgs;
2325     gmx_bool       bGenericKernelOnly;
2326     gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2327     gmx_bool       bFEP_NonBonded;
2328     t_nblists     *nbl;
2329     int           *nm_ind, egp_flags;
2330
2331     if (fr->hwinfo == NULL)
2332     {
2333         /* Detect hardware, gather information.
2334          * In mdrun, hwinfo has already been set before calling init_forcerec.
2335          * Here we ignore GPUs, as tools will not use them anyhow.
2336          */
2337         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2338     }
2339
2340     /* By default we turn SIMD kernels on, but it might be turned off further down... */
2341     fr->use_simd_kernels = TRUE;
2342
2343     fr->bDomDec = DOMAINDECOMP(cr);
2344
2345     natoms = mtop->natoms;
2346
2347     if (check_box(ir->ePBC, box))
2348     {
2349         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2350     }
2351
2352     /* Test particle insertion ? */
2353     if (EI_TPI(ir->eI))
2354     {
2355         /* Set to the size of the molecule to be inserted (the last one) */
2356         /* Because of old style topologies, we have to use the last cg
2357          * instead of the last molecule type.
2358          */
2359         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2360         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2361         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2362         {
2363             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2364         }
2365     }
2366     else
2367     {
2368         fr->n_tpi = 0;
2369     }
2370
2371     /* Copy AdResS parameters */
2372     if (ir->bAdress)
2373     {
2374         fr->adress_type           = ir->adress->type;
2375         fr->adress_const_wf       = ir->adress->const_wf;
2376         fr->adress_ex_width       = ir->adress->ex_width;
2377         fr->adress_hy_width       = ir->adress->hy_width;
2378         fr->adress_icor           = ir->adress->icor;
2379         fr->adress_site           = ir->adress->site;
2380         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2381         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2382
2383
2384         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2385         for (i = 0; i < ir->adress->n_energy_grps; i++)
2386         {
2387             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2388         }
2389
2390         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2391         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2392         for (i = 0; i < fr->n_adress_tf_grps; i++)
2393         {
2394             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2395         }
2396         copy_rvec(ir->adress->refs, fr->adress_refs);
2397     }
2398     else
2399     {
2400         fr->adress_type           = eAdressOff;
2401         fr->adress_do_hybridpairs = FALSE;
2402     }
2403
2404     /* Copy the user determined parameters */
2405     fr->userint1  = ir->userint1;
2406     fr->userint2  = ir->userint2;
2407     fr->userint3  = ir->userint3;
2408     fr->userint4  = ir->userint4;
2409     fr->userreal1 = ir->userreal1;
2410     fr->userreal2 = ir->userreal2;
2411     fr->userreal3 = ir->userreal3;
2412     fr->userreal4 = ir->userreal4;
2413
2414     /* Shell stuff */
2415     fr->fc_stepsize = ir->fc_stepsize;
2416
2417     /* Free energy */
2418     fr->efep        = ir->efep;
2419     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2420     if (ir->fepvals->bScCoul)
2421     {
2422         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2423         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2424     }
2425     else
2426     {
2427         fr->sc_alphacoul  = 0;
2428         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2429     }
2430     fr->sc_power      = ir->fepvals->sc_power;
2431     fr->sc_r_power    = ir->fepvals->sc_r_power;
2432     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2433
2434     env = getenv("GMX_SCSIGMA_MIN");
2435     if (env != NULL)
2436     {
2437         dbl = 0;
2438         sscanf(env, "%lf", &dbl);
2439         fr->sc_sigma6_min = pow(dbl, 6);
2440         if (fp)
2441         {
2442             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2443         }
2444     }
2445
2446     fr->bNonbonded = TRUE;
2447     if (getenv("GMX_NO_NONBONDED") != NULL)
2448     {
2449         /* turn off non-bonded calculations */
2450         fr->bNonbonded = FALSE;
2451         md_print_warn(cr, fp,
2452                       "Found environment variable GMX_NO_NONBONDED.\n"
2453                       "Disabling nonbonded calculations.\n");
2454     }
2455
2456     bGenericKernelOnly = FALSE;
2457
2458     /* We now check in the NS code whether a particular combination of interactions
2459      * can be used with water optimization, and disable it if that is not the case.
2460      */
2461
2462     if (getenv("GMX_NB_GENERIC") != NULL)
2463     {
2464         if (fp != NULL)
2465         {
2466             fprintf(fp,
2467                     "Found environment variable GMX_NB_GENERIC.\n"
2468                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2469                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2470         }
2471         bGenericKernelOnly = TRUE;
2472     }
2473
2474     if (bGenericKernelOnly == TRUE)
2475     {
2476         bNoSolvOpt         = TRUE;
2477     }
2478
2479     if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2480     {
2481         fr->use_simd_kernels = FALSE;
2482         if (fp != NULL)
2483         {
2484             fprintf(fp,
2485                     "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2486                     "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2487         }
2488     }
2489
2490     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2491
2492     /* Check if we can/should do all-vs-all kernels */
2493     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
2494     fr->AllvsAll_work   = NULL;
2495     fr->AllvsAll_workgb = NULL;
2496
2497     /* All-vs-all kernels have not been implemented in 4.6, and
2498      * the SIMD group kernels are also buggy in this case. Non-SIMD
2499      * group kernels are OK. See Redmine #1249. */
2500     if (fr->bAllvsAll)
2501     {
2502         fr->bAllvsAll            = FALSE;
2503         fr->use_simd_kernels     = FALSE;
2504         if (fp != NULL)
2505         {
2506             fprintf(fp,
2507                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2508                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2509                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2510                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2511                     "we are proceeding by disabling all CPU architecture-specific\n"
2512                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2513                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2514         }
2515     }
2516
2517     /* Neighbour searching stuff */
2518     fr->cutoff_scheme = ir->cutoff_scheme;
2519     fr->bGrid         = (ir->ns_type == ensGRID);
2520     fr->ePBC          = ir->ePBC;
2521
2522     if (fr->cutoff_scheme == ecutsGROUP)
2523     {
2524         const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2525             "removed in a future release when 'verlet' supports all interaction forms.\n";
2526
2527         if (MASTER(cr))
2528         {
2529             fprintf(stderr, "\n%s\n", note);
2530         }
2531         if (fp != NULL)
2532         {
2533             fprintf(fp, "\n%s\n", note);
2534         }
2535     }
2536
2537     /* Determine if we will do PBC for distances in bonded interactions */
2538     if (fr->ePBC == epbcNONE)
2539     {
2540         fr->bMolPBC = FALSE;
2541     }
2542     else
2543     {
2544         if (!DOMAINDECOMP(cr))
2545         {
2546             /* The group cut-off scheme and SHAKE assume charge groups
2547              * are whole, but not using molpbc is faster in most cases.
2548              */
2549             if (fr->cutoff_scheme == ecutsGROUP ||
2550                 (ir->eConstrAlg == econtSHAKE &&
2551                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2552                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2553             {
2554                 fr->bMolPBC = ir->bPeriodicMols;
2555             }
2556             else
2557             {
2558                 fr->bMolPBC = TRUE;
2559                 if (getenv("GMX_USE_GRAPH") != NULL)
2560                 {
2561                     fr->bMolPBC = FALSE;
2562                     if (fp)
2563                     {
2564                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2565                     }
2566                 }
2567             }
2568         }
2569         else
2570         {
2571             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2572         }
2573     }
2574     fr->bGB = (ir->implicit_solvent == eisGBSA);
2575
2576     fr->rc_scaling = ir->refcoord_scaling;
2577     copy_rvec(ir->posres_com, fr->posres_com);
2578     copy_rvec(ir->posres_comB, fr->posres_comB);
2579     fr->rlist                    = cutoff_inf(ir->rlist);
2580     fr->rlistlong                = cutoff_inf(ir->rlistlong);
2581     fr->eeltype                  = ir->coulombtype;
2582     fr->vdwtype                  = ir->vdwtype;
2583     fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
2584
2585     fr->coulomb_modifier = ir->coulomb_modifier;
2586     fr->vdw_modifier     = ir->vdw_modifier;
2587
2588     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2589     switch (fr->eeltype)
2590     {
2591         case eelCUT:
2592             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2593             break;
2594
2595         case eelRF:
2596         case eelGRF:
2597         case eelRF_NEC:
2598             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2599             break;
2600
2601         case eelRF_ZERO:
2602             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2603             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2604             break;
2605
2606         case eelSWITCH:
2607         case eelSHIFT:
2608         case eelUSER:
2609         case eelENCADSHIFT:
2610         case eelPMESWITCH:
2611         case eelPMEUSER:
2612         case eelPMEUSERSWITCH:
2613             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2614             break;
2615
2616         case eelPME:
2617         case eelEWALD:
2618             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2619             break;
2620
2621         default:
2622             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2623             break;
2624     }
2625
2626     /* Vdw: Translate from mdp settings to kernel format */
2627     switch (fr->vdwtype)
2628     {
2629         case evdwCUT:
2630             if (fr->bBHAM)
2631             {
2632                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2633             }
2634             else
2635             {
2636                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2637             }
2638             break;
2639         case evdwPME:
2640             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2641             break;
2642
2643         case evdwSWITCH:
2644         case evdwSHIFT:
2645         case evdwUSER:
2646         case evdwENCADSHIFT:
2647             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2648             break;
2649
2650         default:
2651             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2652             break;
2653     }
2654
2655     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2656     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2657     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2658
2659     fr->bTwinRange = fr->rlistlong > fr->rlist;
2660     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2661
2662     fr->reppow     = mtop->ffparams.reppow;
2663
2664     if (ir->cutoff_scheme == ecutsGROUP)
2665     {
2666         fr->bvdwtab    = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2667                           && !EVDW_PME(fr->vdwtype));
2668         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2669         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2670                            fr->eeltype == eelEWALD ||
2671                            fr->eeltype == eelPME ||
2672                            fr->eeltype == eelRF ||
2673                            fr->eeltype == eelRF_ZERO);
2674
2675         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2676          * going to be faster to tabulate the interaction than calling the generic kernel.
2677          */
2678         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2679         {
2680             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2681             {
2682                 fr->bcoultab = TRUE;
2683             }
2684         }
2685         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2686                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2687                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2688                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2689         {
2690             if (fr->rcoulomb != fr->rvdw)
2691             {
2692                 fr->bcoultab = TRUE;
2693             }
2694         }
2695
2696         if (getenv("GMX_REQUIRE_TABLES"))
2697         {
2698             fr->bvdwtab  = TRUE;
2699             fr->bcoultab = TRUE;
2700         }
2701
2702         if (fp)
2703         {
2704             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2705             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2706         }
2707
2708         if (fr->bvdwtab == TRUE)
2709         {
2710             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2711             fr->nbkernel_vdw_modifier    = eintmodNONE;
2712         }
2713         if (fr->bcoultab == TRUE)
2714         {
2715             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2716             fr->nbkernel_elec_modifier    = eintmodNONE;
2717         }
2718     }
2719
2720     if (ir->cutoff_scheme == ecutsVERLET)
2721     {
2722         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2723         {
2724             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2725         }
2726         fr->bvdwtab  = FALSE;
2727         fr->bcoultab = FALSE;
2728     }
2729
2730     /* Tables are used for direct ewald sum */
2731     if (fr->bEwald)
2732     {
2733         if (EEL_PME(ir->coulombtype))
2734         {
2735             if (fp)
2736             {
2737                 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2738             }
2739             if (ir->coulombtype == eelP3M_AD)
2740             {
2741                 please_cite(fp, "Hockney1988");
2742                 please_cite(fp, "Ballenegger2012");
2743             }
2744             else
2745             {
2746                 please_cite(fp, "Essmann95a");
2747             }
2748
2749             if (ir->ewald_geometry == eewg3DC)
2750             {
2751                 if (fp)
2752                 {
2753                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2754                 }
2755                 please_cite(fp, "In-Chul99a");
2756             }
2757         }
2758         fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2759         init_ewald_tab(&(fr->ewald_table), ir, fp);
2760         if (fp)
2761         {
2762             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2763                     1/fr->ewaldcoeff_q);
2764         }
2765     }
2766
2767     if (EVDW_PME(ir->vdwtype))
2768     {
2769         if (fp)
2770         {
2771             fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2772         }
2773         please_cite(fp, "Essmann95a");
2774         fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2775         if (fp)
2776         {
2777             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2778                     1/fr->ewaldcoeff_lj);
2779         }
2780     }
2781
2782     /* Electrostatics */
2783     fr->epsilon_r       = ir->epsilon_r;
2784     fr->epsilon_rf      = ir->epsilon_rf;
2785     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2786     fr->rcoulomb_switch = ir->rcoulomb_switch;
2787     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2788
2789     /* Parameters for generalized RF */
2790     fr->zsquare = 0.0;
2791     fr->temp    = 0.0;
2792
2793     if (fr->eeltype == eelGRF)
2794     {
2795         init_generalized_rf(fp, mtop, ir, fr);
2796     }
2797
2798     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2799                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2800                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2801                        IR_ELEC_FIELD(*ir) ||
2802                        (fr->adress_icor != eAdressICOff)
2803                        );
2804
2805     if (fr->cutoff_scheme == ecutsGROUP &&
2806         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2807     {
2808         /* Count the total number of charge groups */
2809         fr->cg_nalloc = ncg_mtop(mtop);
2810         srenew(fr->cg_cm, fr->cg_nalloc);
2811     }
2812     if (fr->shift_vec == NULL)
2813     {
2814         snew(fr->shift_vec, SHIFTS);
2815     }
2816
2817     if (fr->fshift == NULL)
2818     {
2819         snew(fr->fshift, SHIFTS);
2820     }
2821
2822     if (fr->nbfp == NULL)
2823     {
2824         fr->ntype = mtop->ffparams.atnr;
2825         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2826         if (EVDW_PME(fr->vdwtype))
2827         {
2828             fr->ljpme_c6grid  = make_ljpme_c6grid(&mtop->ffparams, fr);
2829         }
2830     }
2831
2832     /* Copy the energy group exclusions */
2833     fr->egp_flags = ir->opts.egp_flags;
2834
2835     /* Van der Waals stuff */
2836     fr->rvdw        = cutoff_inf(ir->rvdw);
2837     fr->rvdw_switch = ir->rvdw_switch;
2838     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2839     {
2840         if (fr->rvdw_switch >= fr->rvdw)
2841         {
2842             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2843                       fr->rvdw_switch, fr->rvdw);
2844         }
2845         if (fp)
2846         {
2847             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2848                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2849                     fr->rvdw_switch, fr->rvdw);
2850         }
2851     }
2852
2853     if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2854     {
2855         gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2856     }
2857
2858     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2859     {
2860         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2861     }
2862
2863     if (fp)
2864     {
2865         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2866                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2867     }
2868
2869     fr->eDispCorr = ir->eDispCorr;
2870     if (ir->eDispCorr != edispcNO)
2871     {
2872         set_avcsixtwelve(fp, fr, mtop);
2873     }
2874
2875     if (fr->bBHAM)
2876     {
2877         set_bham_b_max(fp, fr, mtop);
2878     }
2879
2880     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2881
2882     /* Copy the GBSA data (radius, volume and surftens for each
2883      * atomtype) from the topology atomtype section to forcerec.
2884      */
2885     snew(fr->atype_radius, fr->ntype);
2886     snew(fr->atype_vol, fr->ntype);
2887     snew(fr->atype_surftens, fr->ntype);
2888     snew(fr->atype_gb_radius, fr->ntype);
2889     snew(fr->atype_S_hct, fr->ntype);
2890
2891     if (mtop->atomtypes.nr > 0)
2892     {
2893         for (i = 0; i < fr->ntype; i++)
2894         {
2895             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2896         }
2897         for (i = 0; i < fr->ntype; i++)
2898         {
2899             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2900         }
2901         for (i = 0; i < fr->ntype; i++)
2902         {
2903             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2904         }
2905         for (i = 0; i < fr->ntype; i++)
2906         {
2907             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2908         }
2909         for (i = 0; i < fr->ntype; i++)
2910         {
2911             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2912         }
2913     }
2914
2915     /* Generate the GB table if needed */
2916     if (fr->bGB)
2917     {
2918 #ifdef GMX_DOUBLE
2919         fr->gbtabscale = 2000;
2920 #else
2921         fr->gbtabscale = 500;
2922 #endif
2923
2924         fr->gbtabr = 100;
2925         fr->gbtab  = make_gb_table(oenv, fr);
2926
2927         init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2928
2929         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2930         if (!DOMAINDECOMP(cr))
2931         {
2932             make_local_gb(cr, fr->born, ir->gb_algorithm);
2933         }
2934     }
2935
2936     /* Set the charge scaling */
2937     if (fr->epsilon_r != 0)
2938     {
2939         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2940     }
2941     else
2942     {
2943         /* eps = 0 is infinite dieletric: no coulomb interactions */
2944         fr->epsfac = 0;
2945     }
2946
2947     /* Reaction field constants */
2948     if (EEL_RF(fr->eeltype))
2949     {
2950         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2951                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2952                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2953     }
2954
2955     /*This now calculates sum for q and c6*/
2956     set_chargesum(fp, fr, mtop);
2957
2958     /* if we are using LR electrostatics, and they are tabulated,
2959      * the tables will contain modified coulomb interactions.
2960      * Since we want to use the non-shifted ones for 1-4
2961      * coulombic interactions, we must have an extra set of tables.
2962      */
2963
2964     /* Construct tables.
2965      * A little unnecessary to make both vdw and coul tables sometimes,
2966      * but what the heck... */
2967
2968     bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2969         (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2970
2971     bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2972                              fr->bBHAM || fr->bEwald) &&
2973                             (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2974                              gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2975                              gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2976
2977     negp_pp   = ir->opts.ngener - ir->nwall;
2978     negptable = 0;
2979     if (!bMakeTables)
2980     {
2981         bSomeNormalNbListsAreInUse = TRUE;
2982         fr->nnblists               = 1;
2983     }
2984     else
2985     {
2986         bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2987         for (egi = 0; egi < negp_pp; egi++)
2988         {
2989             for (egj = egi; egj < negp_pp; egj++)
2990             {
2991                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2992                 if (!(egp_flags & EGP_EXCL))
2993                 {
2994                     if (egp_flags & EGP_TABLE)
2995                     {
2996                         negptable++;
2997                     }
2998                     else
2999                     {
3000                         bSomeNormalNbListsAreInUse = TRUE;
3001                     }
3002                 }
3003             }
3004         }
3005         if (bSomeNormalNbListsAreInUse)
3006         {
3007             fr->nnblists = negptable + 1;
3008         }
3009         else
3010         {
3011             fr->nnblists = negptable;
3012         }
3013         if (fr->nnblists > 1)
3014         {
3015             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3016         }
3017     }
3018
3019     if (ir->adress)
3020     {
3021         fr->nnblists *= 2;
3022     }
3023
3024     snew(fr->nblists, fr->nnblists);
3025
3026     /* This code automatically gives table length tabext without cut-off's,
3027      * in that case grompp should already have checked that we do not need
3028      * normal tables and we only generate tables for 1-4 interactions.
3029      */
3030     rtab = ir->rlistlong + ir->tabext;
3031
3032     if (bMakeTables)
3033     {
3034         /* make tables for ordinary interactions */
3035         if (bSomeNormalNbListsAreInUse)
3036         {
3037             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3038             if (ir->adress)
3039             {
3040                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3041             }
3042             if (!bMakeSeparate14Table)
3043             {
3044                 fr->tab14 = fr->nblists[0].table_elec_vdw;
3045             }
3046             m = 1;
3047         }
3048         else
3049         {
3050             m = 0;
3051         }
3052         if (negptable > 0)
3053         {
3054             /* Read the special tables for certain energy group pairs */
3055             nm_ind = mtop->groups.grps[egcENER].nm_ind;
3056             for (egi = 0; egi < negp_pp; egi++)
3057             {
3058                 for (egj = egi; egj < negp_pp; egj++)
3059                 {
3060                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3061                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3062                     {
3063                         nbl = &(fr->nblists[m]);
3064                         if (fr->nnblists > 1)
3065                         {
3066                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3067                         }
3068                         /* Read the table file with the two energy groups names appended */
3069                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3070                                         *mtop->groups.grpname[nm_ind[egi]],
3071                                         *mtop->groups.grpname[nm_ind[egj]],
3072                                         &fr->nblists[m]);
3073                         if (ir->adress)
3074                         {
3075                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3076                                             *mtop->groups.grpname[nm_ind[egi]],
3077                                             *mtop->groups.grpname[nm_ind[egj]],
3078                                             &fr->nblists[fr->nnblists/2+m]);
3079                         }
3080                         m++;
3081                     }
3082                     else if (fr->nnblists > 1)
3083                     {
3084                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3085                     }
3086                 }
3087             }
3088         }
3089     }
3090     if (bMakeSeparate14Table)
3091     {
3092         /* generate extra tables with plain Coulomb for 1-4 interactions only */
3093         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3094                                 GMX_MAKETABLES_14ONLY);
3095     }
3096
3097     /* Read AdResS Thermo Force table if needed */
3098     if (fr->adress_icor == eAdressICThermoForce)
3099     {
3100         /* old todo replace */
3101
3102         if (ir->adress->n_tf_grps > 0)
3103         {
3104             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3105
3106         }
3107         else
3108         {
3109             /* load the default table */
3110             snew(fr->atf_tabs, 1);
3111             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3112         }
3113     }
3114
3115     /* Wall stuff */
3116     fr->nwall = ir->nwall;
3117     if (ir->nwall && ir->wall_type == ewtTABLE)
3118     {
3119         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3120     }
3121
3122     if (fcd && tabbfn)
3123     {
3124         fcd->bondtab  = make_bonded_tables(fp,
3125                                            F_TABBONDS, F_TABBONDSNC,
3126                                            mtop, tabbfn, "b");
3127         fcd->angletab = make_bonded_tables(fp,
3128                                            F_TABANGLES, -1,
3129                                            mtop, tabbfn, "a");
3130         fcd->dihtab   = make_bonded_tables(fp,
3131                                            F_TABDIHS, -1,
3132                                            mtop, tabbfn, "d");
3133     }
3134     else
3135     {
3136         if (debug)
3137         {
3138             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3139         }
3140     }
3141
3142     /* QM/MM initialization if requested
3143      */
3144     if (ir->bQMMM)
3145     {
3146         fprintf(stderr, "QM/MM calculation requested.\n");
3147     }
3148
3149     fr->bQMMM      = ir->bQMMM;
3150     fr->qr         = mk_QMMMrec();
3151
3152     /* Set all the static charge group info */
3153     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3154                                    &bFEP_NonBonded,
3155                                    &fr->bExcl_IntraCGAll_InterCGNone);
3156     if (DOMAINDECOMP(cr))
3157     {
3158         fr->cginfo = NULL;
3159     }
3160     else
3161     {
3162         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3163     }
3164
3165     if (!DOMAINDECOMP(cr))
3166     {
3167         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3168                             mtop->natoms, mtop->natoms, mtop->natoms);
3169     }
3170
3171     fr->print_force = print_force;
3172
3173
3174     /* coarse load balancing vars */
3175     fr->t_fnbf    = 0.;
3176     fr->t_wait    = 0.;
3177     fr->timesteps = 0;
3178
3179     /* Initialize neighbor search */
3180     init_ns(fp, cr, &fr->ns, fr, mtop);
3181
3182     if (cr->duty & DUTY_PP)
3183     {
3184         gmx_nonbonded_setup(fr, bGenericKernelOnly);
3185         /*
3186            if (ir->bAdress)
3187             {
3188                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3189             }
3190          */
3191     }
3192
3193     /* Initialize the thread working data for bonded interactions */
3194     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3195
3196     snew(fr->excl_load, fr->nthreads+1);
3197
3198     if (fr->cutoff_scheme == ecutsVERLET)
3199     {
3200         if (ir->rcoulomb != ir->rvdw)
3201         {
3202             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3203         }
3204
3205         init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3206     }
3207
3208     /* fr->ic is used both by verlet and group kernels (to some extent) now */
3209     init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3210
3211     if (ir->eDispCorr != edispcNO)
3212     {
3213         calc_enervirdiff(fp, ir->eDispCorr, fr);
3214     }
3215 }
3216
3217 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3218 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
3219 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3220
3221 void pr_forcerec(FILE *fp, t_forcerec *fr)
3222 {
3223     int i;
3224
3225     pr_real(fp, fr->rlist);
3226     pr_real(fp, fr->rcoulomb);
3227     pr_real(fp, fr->fudgeQQ);
3228     pr_bool(fp, fr->bGrid);
3229     pr_bool(fp, fr->bTwinRange);
3230     /*pr_int(fp,fr->cg0);
3231        pr_int(fp,fr->hcg);*/
3232     for (i = 0; i < fr->nnblists; i++)
3233     {
3234         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3235     }
3236     pr_real(fp, fr->rcoulomb_switch);
3237     pr_real(fp, fr->rcoulomb);
3238
3239     fflush(fp);
3240 }
3241
3242 void forcerec_set_excl_load(t_forcerec           *fr,
3243                             const gmx_localtop_t *top)
3244 {
3245     const int *ind, *a;
3246     int        t, i, j, ntot, n, ntarget;
3247
3248     ind = top->excls.index;
3249     a   = top->excls.a;
3250
3251     ntot = 0;
3252     for (i = 0; i < top->excls.nr; i++)
3253     {
3254         for (j = ind[i]; j < ind[i+1]; j++)
3255         {
3256             if (a[j] > i)
3257             {
3258                 ntot++;
3259             }
3260         }
3261     }
3262
3263     fr->excl_load[0] = 0;
3264     n                = 0;
3265     i                = 0;
3266     for (t = 1; t <= fr->nthreads; t++)
3267     {
3268         ntarget = (ntot*t)/fr->nthreads;
3269         while (i < top->excls.nr && n < ntarget)
3270         {
3271             for (j = ind[i]; j < ind[i+1]; j++)
3272             {
3273                 if (a[j] > i)
3274                 {
3275                     n++;
3276                 }
3277             }
3278             i++;
3279         }
3280         fr->excl_load[t] = i;
3281     }
3282 }