src/gromacs/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "typedefs.h"
  46 #include "vec.h"
  47 #include "maths.h"
  48 #include "macros.h"
  49 #include "smalloc.h"
  50 #include "macros.h"
  51 #include "gmx_fatal.h"
  52 #include "gmx_fatal_collective.h"
  53 #include "physics.h"
  54 #include "force.h"
  55 #include "tables.h"
  56 #include "nonbonded.h"
  57 #include "invblock.h"
  58 #include "names.h"
  59 #include "network.h"
  60 #include "pbc.h"
  61 #include "ns.h"
  62 #include "mshift.h"
  63 #include "txtdump.h"
  64 #include "coulomb.h"
  65 #include "md_support.h"
  66 #include "md_logging.h"
  67 #include "domdec.h"
  68 #include "partdec.h"
  69 #include "qmmm.h"
  70 #include "copyrite.h"
  71 #include "mtop_util.h"
  72 #include "nbnxn_search.h"
  73 #include "nbnxn_atomdata.h"
  74 #include "nbnxn_consts.h"
  75 #include "statutil.h"
  76 #include "gmx_omp_nthreads.h"
  77 #include "gmx_detect_hardware.h"
  78
  79 #ifdef _MSC_VER
  80 /* MSVC definition for __cpuid() */
  81 #include <intrin.h>
  82 #endif
  83
  84 #include "types/nbnxn_cuda_types_ext.h"
  85 #include "gpu_utils.h"
  86 #include "nbnxn_cuda_data_mgmt.h"
  87 #include "pmalloc_cuda.h"
  88
  89 t_forcerec *mk_forcerec(void)
  90 {
  91     t_forcerec *fr;
  92
  93     snew(fr, 1);
  94
  95     return fr;
  96 }
  97
  98 #ifdef DEBUG
  99 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
 100 {
 101     int i, j;
 102
 103     for (i = 0; (i < atnr); i++)
 104     {
 105         for (j = 0; (j < atnr); j++)
 106         {
 107             fprintf(fp, "%2d - %2d", i, j);
 108             if (bBHAM)
 109             {
 110                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 111                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 112             }
 113             else
 114             {
 115                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 116                         C12(nbfp, atnr, i, j)/12.0);
 117             }
 118         }
 119     }
 120 }
 121 #endif
 122
 123 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 124 {
 125     real *nbfp;
 126     int   i, j, k, atnr;
 127
 128     atnr = idef->atnr;
 129     if (bBHAM)
 130     {
 131         snew(nbfp, 3*atnr*atnr);
 132         for (i = k = 0; (i < atnr); i++)
 133         {
 134             for (j = 0; (j < atnr); j++, k++)
 135             {
 136                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 137                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 138                 /* nbfp now includes the 6.0 derivative prefactor */
 139                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 140             }
 141         }
 142     }
 143     else
 144     {
 145         snew(nbfp, 2*atnr*atnr);
 146         for (i = k = 0; (i < atnr); i++)
 147         {
 148             for (j = 0; (j < atnr); j++, k++)
 149             {
 150                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 151                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 152                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 153             }
 154         }
 155     }
 156
 157     return nbfp;
 158 }
 159
 160 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 161 {
 162     real *nbfp;
 163     int   i, j, k, atnr;
 164     real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 165     real  c6, c12;
 166
 167     atnr = idef->atnr;
 168     snew(nbfp, 2*atnr*atnr);
 169     for (i = 0; i < atnr; ++i)
 170     {
 171         for (j = 0; j < atnr; ++j)
 172         {
 173             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 174             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 175             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 176             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 177             c6   = sqrt(c6i  * c6j);
 178             c12  = sqrt(c12i * c12j);
 179             if (comb_rule == eCOMB_ARITHMETIC
 180                 && !gmx_numzero(c6) && !gmx_numzero(c12))
 181             {
 182                 sigmai = pow(c12i / c6i, 1.0/6.0);
 183                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 184                 epsi   = c6i * c6i / c12i;
 185                 epsj   = c6j * c6j / c12j;
 186                 c6     = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
 187                 c12    = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
 188             }
 189             C6(nbfp, atnr, i, j)   = c6*6.0;
 190             C12(nbfp, atnr, i, j)  = c12*12.0;
 191         }
 192     }
 193     return nbfp;
 194 }
 195
 196 /* This routine sets fr->solvent_opt to the most common solvent in the
 197  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 198  * the fr->solvent_type array with the correct type (or esolNO).
 199  *
 200  * Charge groups that fulfill the conditions but are not identical to the
 201  * most common one will be marked as esolNO in the solvent_type array.
 202  *
 203  * TIP3p is identical to SPC for these purposes, so we call it
 204  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 205  *
 206  * NOTE: QM particle should not
 207  * become an optimized solvent. Not even if there is only one charge
 208  * group in the Qm
 209  */
 210
 211 typedef struct
 212 {
 213     int    model;
 214     int    count;
 215     int    vdwtype[4];
 216     real   charge[4];
 217 } solvent_parameters_t;
 218
 219 static void
 220 check_solvent_cg(const gmx_moltype_t    *molt,
 221                  int                     cg0,
 222                  int                     nmol,
 223                  const unsigned char    *qm_grpnr,
 224                  const t_grps           *qm_grps,
 225                  t_forcerec   *          fr,
 226                  int                    *n_solvent_parameters,
 227                  solvent_parameters_t  **solvent_parameters_p,
 228                  int                     cginfo,
 229                  int                    *cg_sp)
 230 {
 231     const t_blocka     *  excl;
 232     t_atom               *atom;
 233     int                   j, k;
 234     int                   j0, j1, nj;
 235     gmx_bool              perturbed;
 236     gmx_bool              has_vdw[4];
 237     gmx_bool              match;
 238     real                  tmp_charge[4];
 239     int                   tmp_vdwtype[4];
 240     int                   tjA;
 241     gmx_bool              qm;
 242     solvent_parameters_t *solvent_parameters;
 243
 244     /* We use a list with parameters for each solvent type.
 245      * Every time we discover a new molecule that fulfills the basic
 246      * conditions for a solvent we compare with the previous entries
 247      * in these lists. If the parameters are the same we just increment
 248      * the counter for that type, and otherwise we create a new type
 249      * based on the current molecule.
 250      *
 251      * Once we've finished going through all molecules we check which
 252      * solvent is most common, and mark all those molecules while we
 253      * clear the flag on all others.
 254      */
 255
 256     solvent_parameters = *solvent_parameters_p;
 257
 258     /* Mark the cg first as non optimized */
 259     *cg_sp = -1;
 260
 261     /* Check if this cg has no exclusions with atoms in other charge groups
 262      * and all atoms inside the charge group excluded.
 263      * We only have 3 or 4 atom solvent loops.
 264      */
 265     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 266         !GET_CGINFO_EXCL_INTRA(cginfo))
 267     {
 268         return;
 269     }
 270
 271     /* Get the indices of the first atom in this charge group */
 272     j0     = molt->cgs.index[cg0];
 273     j1     = molt->cgs.index[cg0+1];
 274
 275     /* Number of atoms in our molecule */
 276     nj     = j1 - j0;
 277
 278     if (debug)
 279     {
 280         fprintf(debug,
 281                 "Moltype '%s': there are %d atoms in this charge group\n",
 282                 *molt->name, nj);
 283     }
 284
 285     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 286      * otherwise skip it.
 287      */
 288     if (nj < 3 || nj > 4)
 289     {
 290         return;
 291     }
 292
 293     /* Check if we are doing QM on this group */
 294     qm = FALSE;
 295     if (qm_grpnr != NULL)
 296     {
 297         for (j = j0; j < j1 && !qm; j++)
 298         {
 299             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 300         }
 301     }
 302     /* Cannot use solvent optimization with QM */
 303     if (qm)
 304     {
 305         return;
 306     }
 307
 308     atom = molt->atoms.atom;
 309
 310     /* Still looks like a solvent, time to check parameters */
 311
 312     /* If it is perturbed (free energy) we can't use the solvent loops,
 313      * so then we just skip to the next molecule.
 314      */
 315     perturbed = FALSE;
 316
 317     for (j = j0; j < j1 && !perturbed; j++)
 318     {
 319         perturbed = PERTURBED(atom[j]);
 320     }
 321
 322     if (perturbed)
 323     {
 324         return;
 325     }
 326
 327     /* Now it's only a question if the VdW and charge parameters
 328      * are OK. Before doing the check we compare and see if they are
 329      * identical to a possible previous solvent type.
 330      * First we assign the current types and charges.
 331      */
 332     for (j = 0; j < nj; j++)
 333     {
 334         tmp_vdwtype[j] = atom[j0+j].type;
 335         tmp_charge[j]  = atom[j0+j].q;
 336     }
 337
 338     /* Does it match any previous solvent type? */
 339     for (k = 0; k < *n_solvent_parameters; k++)
 340     {
 341         match = TRUE;
 342
 343
 344         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 345         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 346              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 347         {
 348             match = FALSE;
 349         }
 350
 351         /* Check that types & charges match for all atoms in molecule */
 352         for (j = 0; j < nj && match == TRUE; j++)
 353         {
 354             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 355             {
 356                 match = FALSE;
 357             }
 358             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 359             {
 360                 match = FALSE;
 361             }
 362         }
 363         if (match == TRUE)
 364         {
 365             /* Congratulations! We have a matched solvent.
 366              * Flag it with this type for later processing.
 367              */
 368             *cg_sp = k;
 369             solvent_parameters[k].count += nmol;
 370
 371             /* We are done with this charge group */
 372             return;
 373         }
 374     }
 375
 376     /* If we get here, we have a tentative new solvent type.
 377      * Before we add it we must check that it fulfills the requirements
 378      * of the solvent optimized loops. First determine which atoms have
 379      * VdW interactions.
 380      */
 381     for (j = 0; j < nj; j++)
 382     {
 383         has_vdw[j] = FALSE;
 384         tjA        = tmp_vdwtype[j];
 385
 386         /* Go through all other tpes and see if any have non-zero
 387          * VdW parameters when combined with this one.
 388          */
 389         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 390         {
 391             /* We already checked that the atoms weren't perturbed,
 392              * so we only need to check state A now.
 393              */
 394             if (fr->bBHAM)
 395             {
 396                 has_vdw[j] = (has_vdw[j] ||
 397                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 398                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 399                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 400             }
 401             else
 402             {
 403                 /* Standard LJ */
 404                 has_vdw[j] = (has_vdw[j] ||
 405                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 406                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 407             }
 408         }
 409     }
 410
 411     /* Now we know all we need to make the final check and assignment. */
 412     if (nj == 3)
 413     {
 414         /* So, is it an SPC?
 415          * For this we require thatn all atoms have charge,
 416          * the charges on atom 2 & 3 should be the same, and only
 417          * atom 1 might have VdW.
 418          */
 419         if (has_vdw[1] == FALSE &&
 420             has_vdw[2] == FALSE &&
 421             tmp_charge[0]  != 0 &&
 422             tmp_charge[1]  != 0 &&
 423             tmp_charge[2]  == tmp_charge[1])
 424         {
 425             srenew(solvent_parameters, *n_solvent_parameters+1);
 426             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 427             solvent_parameters[*n_solvent_parameters].count = nmol;
 428             for (k = 0; k < 3; k++)
 429             {
 430                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 431                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 432             }
 433
 434             *cg_sp = *n_solvent_parameters;
 435             (*n_solvent_parameters)++;
 436         }
 437     }
 438     else if (nj == 4)
 439     {
 440         /* Or could it be a TIP4P?
 441          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 442          * Only atom 1 mght have VdW.
 443          */
 444         if (has_vdw[1] == FALSE &&
 445             has_vdw[2] == FALSE &&
 446             has_vdw[3] == FALSE &&
 447             tmp_charge[0]  == 0 &&
 448             tmp_charge[1]  != 0 &&
 449             tmp_charge[2]  == tmp_charge[1] &&
 450             tmp_charge[3]  != 0)
 451         {
 452             srenew(solvent_parameters, *n_solvent_parameters+1);
 453             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 454             solvent_parameters[*n_solvent_parameters].count = nmol;
 455             for (k = 0; k < 4; k++)
 456             {
 457                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 458                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 459             }
 460
 461             *cg_sp = *n_solvent_parameters;
 462             (*n_solvent_parameters)++;
 463         }
 464     }
 465
 466     *solvent_parameters_p = solvent_parameters;
 467 }
 468
 469 static void
 470 check_solvent(FILE  *                fp,
 471               const gmx_mtop_t  *    mtop,
 472               t_forcerec  *          fr,
 473               cginfo_mb_t           *cginfo_mb)
 474 {
 475     const t_block     *   cgs;
 476     const t_block     *   mols;
 477     const gmx_moltype_t  *molt;
 478     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 479     int                   n_solvent_parameters;
 480     solvent_parameters_t *solvent_parameters;
 481     int                 **cg_sp;
 482     int                   bestsp, bestsol;
 483
 484     if (debug)
 485     {
 486         fprintf(debug, "Going to determine what solvent types we have.\n");
 487     }
 488
 489     mols = &mtop->mols;
 490
 491     n_solvent_parameters = 0;
 492     solvent_parameters   = NULL;
 493     /* Allocate temporary array for solvent type */
 494     snew(cg_sp, mtop->nmolblock);
 495
 496     cg_offset = 0;
 497     at_offset = 0;
 498     for (mb = 0; mb < mtop->nmolblock; mb++)
 499     {
 500         molt = &mtop->moltype[mtop->molblock[mb].type];
 501         cgs  = &molt->cgs;
 502         /* Here we have to loop over all individual molecules
 503          * because we need to check for QMMM particles.
 504          */
 505         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 506         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 507         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 508         for (mol = 0; mol < nmol_ch; mol++)
 509         {
 510             cgm = mol*cgs->nr;
 511             am  = mol*cgs->index[cgs->nr];
 512             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 513             {
 514                 check_solvent_cg(molt, cg_mol, nmol,
 515                                  mtop->groups.grpnr[egcQMMM] ?
 516                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 517                                  &mtop->groups.grps[egcQMMM],
 518                                  fr,
 519                                  &n_solvent_parameters, &solvent_parameters,
 520                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 521                                  &cg_sp[mb][cgm+cg_mol]);
 522             }
 523         }
 524         cg_offset += cgs->nr;
 525         at_offset += cgs->index[cgs->nr];
 526     }
 527
 528     /* Puh! We finished going through all charge groups.
 529      * Now find the most common solvent model.
 530      */
 531
 532     /* Most common solvent this far */
 533     bestsp = -2;
 534     for (i = 0; i < n_solvent_parameters; i++)
 535     {
 536         if (bestsp == -2 ||
 537             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 538         {
 539             bestsp = i;
 540         }
 541     }
 542
 543     if (bestsp >= 0)
 544     {
 545         bestsol = solvent_parameters[bestsp].model;
 546     }
 547     else
 548     {
 549         bestsol = esolNO;
 550     }
 551
 552 #ifdef DISABLE_WATER_NLIST
 553     bestsol = esolNO;
 554 #endif
 555
 556     fr->nWatMol = 0;
 557     for (mb = 0; mb < mtop->nmolblock; mb++)
 558     {
 559         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 560         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 561         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 562         {
 563             if (cg_sp[mb][i] == bestsp)
 564             {
 565                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 566                 fr->nWatMol += nmol;
 567             }
 568             else
 569             {
 570                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 571             }
 572         }
 573         sfree(cg_sp[mb]);
 574     }
 575     sfree(cg_sp);
 576
 577     if (bestsol != esolNO && fp != NULL)
 578     {
 579         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 580                 esol_names[bestsol],
 581                 solvent_parameters[bestsp].count);
 582     }
 583
 584     sfree(solvent_parameters);
 585     fr->solvent_opt = bestsol;
 586 }
 587
 588 enum {
 589     acNONE = 0, acCONSTRAINT, acSETTLE
 590 };
 591
 592 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 593                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 594                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 595 {
 596     const t_block        *cgs;
 597     const t_blocka       *excl;
 598     const gmx_moltype_t  *molt;
 599     const gmx_molblock_t *molb;
 600     cginfo_mb_t          *cginfo_mb;
 601     gmx_bool             *type_VDW;
 602     int                  *cginfo;
 603     int                   cg_offset, a_offset, cgm, am;
 604     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 605     int                  *a_con;
 606     int                   ftype;
 607     int                   ia;
 608     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
 609
 610     ncg_tot = ncg_mtop(mtop);
 611     snew(cginfo_mb, mtop->nmolblock);
 612
 613     snew(type_VDW, fr->ntype);
 614     for (ai = 0; ai < fr->ntype; ai++)
 615     {
 616         type_VDW[ai] = FALSE;
 617         for (j = 0; j < fr->ntype; j++)
 618         {
 619             type_VDW[ai] = type_VDW[ai] ||
 620                 fr->bBHAM ||
 621                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 622                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 623         }
 624     }
 625
 626     *bExcl_IntraCGAll_InterCGNone = TRUE;
 627
 628     excl_nalloc = 10;
 629     snew(bExcl, excl_nalloc);
 630     cg_offset = 0;
 631     a_offset  = 0;
 632     for (mb = 0; mb < mtop->nmolblock; mb++)
 633     {
 634         molb = &mtop->molblock[mb];
 635         molt = &mtop->moltype[molb->type];
 636         cgs  = &molt->cgs;
 637         excl = &molt->excls;
 638
 639         /* Check if the cginfo is identical for all molecules in this block.
 640          * If so, we only need an array of the size of one molecule.
 641          * Otherwise we make an array of #mol times #cgs per molecule.
 642          */
 643         bId = TRUE;
 644         am  = 0;
 645         for (m = 0; m < molb->nmol; m++)
 646         {
 647             am = m*cgs->index[cgs->nr];
 648             for (cg = 0; cg < cgs->nr; cg++)
 649             {
 650                 a0 = cgs->index[cg];
 651                 a1 = cgs->index[cg+1];
 652                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 653                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 654                 {
 655                     bId = FALSE;
 656                 }
 657                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 658                 {
 659                     for (ai = a0; ai < a1; ai++)
 660                     {
 661                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 662                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 663                         {
 664                             bId = FALSE;
 665                         }
 666                     }
 667                 }
 668             }
 669         }
 670
 671         cginfo_mb[mb].cg_start = cg_offset;
 672         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 673         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 674         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 675         cginfo = cginfo_mb[mb].cginfo;
 676
 677         /* Set constraints flags for constrained atoms */
 678         snew(a_con, molt->atoms.nr);
 679         for (ftype = 0; ftype < F_NRE; ftype++)
 680         {
 681             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 682             {
 683                 int nral;
 684
 685                 nral = NRAL(ftype);
 686                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 687                 {
 688                     int a;
 689
 690                     for (a = 0; a < nral; a++)
 691                     {
 692                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 693                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 694                     }
 695                 }
 696             }
 697         }
 698
 699         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 700         {
 701             cgm = m*cgs->nr;
 702             am  = m*cgs->index[cgs->nr];
 703             for (cg = 0; cg < cgs->nr; cg++)
 704             {
 705                 a0 = cgs->index[cg];
 706                 a1 = cgs->index[cg+1];
 707
 708                 /* Store the energy group in cginfo */
 709                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 710                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 711
 712                 /* Check the intra/inter charge group exclusions */
 713                 if (a1-a0 > excl_nalloc)
 714                 {
 715                     excl_nalloc = a1 - a0;
 716                     srenew(bExcl, excl_nalloc);
 717                 }
 718                 /* bExclIntraAll: all intra cg interactions excluded
 719                  * bExclInter:    any inter cg interactions excluded
 720                  */
 721                 bExclIntraAll = TRUE;
 722                 bExclInter    = FALSE;
 723                 bHaveVDW      = FALSE;
 724                 bHaveQ        = FALSE;
 725                 for (ai = a0; ai < a1; ai++)
 726                 {
 727                     /* Check VDW and electrostatic interactions */
 728                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 729                                             type_VDW[molt->atoms.atom[ai].typeB]);
 730                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 731                                             molt->atoms.atom[ai].qB != 0);
 732
 733                     /* Clear the exclusion list for atom ai */
 734                     for (aj = a0; aj < a1; aj++)
 735                     {
 736                         bExcl[aj-a0] = FALSE;
 737                     }
 738                     /* Loop over all the exclusions of atom ai */
 739                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 740                     {
 741                         aj = excl->a[j];
 742                         if (aj < a0 || aj >= a1)
 743                         {
 744                             bExclInter = TRUE;
 745                         }
 746                         else
 747                         {
 748                             bExcl[aj-a0] = TRUE;
 749                         }
 750                     }
 751                     /* Check if ai excludes a0 to a1 */
 752                     for (aj = a0; aj < a1; aj++)
 753                     {
 754                         if (!bExcl[aj-a0])
 755                         {
 756                             bExclIntraAll = FALSE;
 757                         }
 758                     }
 759
 760                     switch (a_con[ai])
 761                     {
 762                         case acCONSTRAINT:
 763                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 764                             break;
 765                         case acSETTLE:
 766                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 767                             break;
 768                         default:
 769                             break;
 770                     }
 771                 }
 772                 if (bExclIntraAll)
 773                 {
 774                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 775                 }
 776                 if (bExclInter)
 777                 {
 778                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 779                 }
 780                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 781                 {
 782                     /* The size in cginfo is currently only read with DD */
 783                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 784                 }
 785                 if (bHaveVDW)
 786                 {
 787                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 788                 }
 789                 if (bHaveQ)
 790                 {
 791                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 792                 }
 793                 /* Store the charge group size */
 794                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 795
 796                 if (!bExclIntraAll || bExclInter)
 797                 {
 798                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 799                 }
 800             }
 801         }
 802
 803         sfree(a_con);
 804
 805         cg_offset += molb->nmol*cgs->nr;
 806         a_offset  += molb->nmol*cgs->index[cgs->nr];
 807     }
 808     sfree(bExcl);
 809
 810     /* the solvent optimizer is called after the QM is initialized,
 811      * because we don't want to have the QM subsystemto become an
 812      * optimized solvent
 813      */
 814
 815     check_solvent(fplog, mtop, fr, cginfo_mb);
 816
 817     if (getenv("GMX_NO_SOLV_OPT"))
 818     {
 819         if (fplog)
 820         {
 821             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 822                     "Disabling all solvent optimization\n");
 823         }
 824         fr->solvent_opt = esolNO;
 825     }
 826     if (bNoSolvOpt)
 827     {
 828         fr->solvent_opt = esolNO;
 829     }
 830     if (!fr->solvent_opt)
 831     {
 832         for (mb = 0; mb < mtop->nmolblock; mb++)
 833         {
 834             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 835             {
 836                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 837             }
 838         }
 839     }
 840
 841     return cginfo_mb;
 842 }
 843
 844 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 845 {
 846     int  ncg, mb, cg;
 847     int *cginfo;
 848
 849     ncg = cgi_mb[nmb-1].cg_end;
 850     snew(cginfo, ncg);
 851     mb = 0;
 852     for (cg = 0; cg < ncg; cg++)
 853     {
 854         while (cg >= cgi_mb[mb].cg_end)
 855         {
 856             mb++;
 857         }
 858         cginfo[cg] =
 859             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 860     }
 861
 862     return cginfo;
 863 }
 864
 865 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 866 {
 867     /*This now calculates sum for q and c6*/
 868     double         qsum, q2sum, q, c6sum, c6, sqrc6;
 869     int            mb, nmol, i;
 870     const t_atoms *atoms;
 871
 872     qsum   = 0;
 873     q2sum  = 0;
 874     c6sum  = 0;
 875     for (mb = 0; mb < mtop->nmolblock; mb++)
 876     {
 877         nmol  = mtop->molblock[mb].nmol;
 878         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 879         for (i = 0; i < atoms->nr; i++)
 880         {
 881             q       = atoms->atom[i].q;
 882             qsum   += nmol*q;
 883             q2sum  += nmol*q*q;
 884             c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 885             sqrc6   = sqrt(c6);
 886             c6sum  += nmol*sqrc6*sqrc6;
 887         }
 888     }
 889     fr->qsum[0]   = qsum;
 890     fr->q2sum[0]  = q2sum;
 891     fr->c6sum[0]  = c6sum/12;
 892
 893     if (fr->efep != efepNO)
 894     {
 895         qsum   = 0;
 896         q2sum  = 0;
 897         c6sum  = 0;
 898         for (mb = 0; mb < mtop->nmolblock; mb++)
 899         {
 900             nmol  = mtop->molblock[mb].nmol;
 901             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 902             for (i = 0; i < atoms->nr; i++)
 903             {
 904                 q       = atoms->atom[i].qB;
 905                 qsum   += nmol*q;
 906                 q2sum  += nmol*q*q;
 907                 c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 908                 sqrc6   = sqrt(c6);
 909                 c6sum  += nmol*sqrc6*sqrc6;
 910             }
 911             fr->qsum[1]   = qsum;
 912             fr->q2sum[1]  = q2sum;
 913             fr->c6sum[1]  = c6sum/12;
 914         }
 915     }
 916     else
 917     {
 918         fr->qsum[1]   = fr->qsum[0];
 919         fr->q2sum[1]  = fr->q2sum[0];
 920         fr->c6sum[1]  = fr->c6sum[0];
 921     }
 922     if (log)
 923     {
 924         if (fr->efep == efepNO)
 925         {
 926             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 927         }
 928         else
 929         {
 930             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 931                     fr->qsum[0], fr->qsum[1]);
 932         }
 933     }
 934 }
 935
 936 void update_forcerec(t_forcerec *fr, matrix box)
 937 {
 938     if (fr->eeltype == eelGRF)
 939     {
 940         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 941                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 942                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 943     }
 944 }
 945
 946 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 947 {
 948     const t_atoms  *atoms, *atoms_tpi;
 949     const t_blocka *excl;
 950     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 951 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 952     long long int   npair, npair_ij, tmpi, tmpj;
 953 #else
 954     double          npair, npair_ij, tmpi, tmpj;
 955 #endif
 956     double          csix, ctwelve;
 957     int             ntp, *typecount;
 958     gmx_bool        bBHAM;
 959     real           *nbfp;
 960     real           *nbfp_comb = NULL;
 961
 962     ntp   = fr->ntype;
 963     bBHAM = fr->bBHAM;
 964     nbfp  = fr->nbfp;
 965
 966     /* For LJ-PME, we want to correct for the difference between the
 967      * actual C6 values and the C6 values used by the LJ-PME based on
 968      * combination rules. */
 969
 970     if (EVDW_PME(fr->vdwtype))
 971     {
 972         nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
 973                                              (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
 974         for (tpi = 0; tpi < ntp; ++tpi)
 975         {
 976             for (tpj = 0; tpj < ntp; ++tpj)
 977             {
 978                 C6(nbfp_comb, ntp, tpi, tpj) =
 979                     C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
 980                 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
 981             }
 982         }
 983         nbfp = nbfp_comb;
 984     }
 985     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 986     {
 987         csix    = 0;
 988         ctwelve = 0;
 989         npair   = 0;
 990         nexcl   = 0;
 991         if (!fr->n_tpi)
 992         {
 993             /* Count the types so we avoid natoms^2 operations */
 994             snew(typecount, ntp);
 995             gmx_mtop_count_atomtypes(mtop, q, typecount);
 996
 997             for (tpi = 0; tpi < ntp; tpi++)
 998             {
 999                 for (tpj = tpi; tpj < ntp; tpj++)
1000                 {
1001                     tmpi = typecount[tpi];
1002                     tmpj = typecount[tpj];
1003                     if (tpi != tpj)
1004                     {
1005                         npair_ij = tmpi*tmpj;
1006                     }
1007                     else
1008                     {
1009                         npair_ij = tmpi*(tmpi - 1)/2;
1010                     }
1011                     if (bBHAM)
1012                     {
1013                         /* nbfp now includes the 6.0 derivative prefactor */
1014                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1015                     }
1016                     else
1017                     {
1018                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
1019                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
1020                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
1021                     }
1022                     npair += npair_ij;
1023                 }
1024             }
1025             sfree(typecount);
1026             /* Subtract the excluded pairs.
1027              * The main reason for substracting exclusions is that in some cases
1028              * some combinations might never occur and the parameters could have
1029              * any value. These unused values should not influence the dispersion
1030              * correction.
1031              */
1032             for (mb = 0; mb < mtop->nmolblock; mb++)
1033             {
1034                 nmol  = mtop->molblock[mb].nmol;
1035                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1036                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
1037                 for (i = 0; (i < atoms->nr); i++)
1038                 {
1039                     if (q == 0)
1040                     {
1041                         tpi = atoms->atom[i].type;
1042                     }
1043                     else
1044                     {
1045                         tpi = atoms->atom[i].typeB;
1046                     }
1047                     j1  = excl->index[i];
1048                     j2  = excl->index[i+1];
1049                     for (j = j1; j < j2; j++)
1050                     {
1051                         k = excl->a[j];
1052                         if (k > i)
1053                         {
1054                             if (q == 0)
1055                             {
1056                                 tpj = atoms->atom[k].type;
1057                             }
1058                             else
1059                             {
1060                                 tpj = atoms->atom[k].typeB;
1061                             }
1062                             if (bBHAM)
1063                             {
1064                                 /* nbfp now includes the 6.0 derivative prefactor */
1065                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1066                             }
1067                             else
1068                             {
1069                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1070                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1071                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1072                             }
1073                             nexcl += nmol;
1074                         }
1075                     }
1076                 }
1077             }
1078         }
1079         else
1080         {
1081             /* Only correct for the interaction of the test particle
1082              * with the rest of the system.
1083              */
1084             atoms_tpi =
1085                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1086
1087             npair = 0;
1088             for (mb = 0; mb < mtop->nmolblock; mb++)
1089             {
1090                 nmol  = mtop->molblock[mb].nmol;
1091                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1092                 for (j = 0; j < atoms->nr; j++)
1093                 {
1094                     nmolc = nmol;
1095                     /* Remove the interaction of the test charge group
1096                      * with itself.
1097                      */
1098                     if (mb == mtop->nmolblock-1)
1099                     {
1100                         nmolc--;
1101
1102                         if (mb == 0 && nmol == 1)
1103                         {
1104                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1105                         }
1106                     }
1107                     if (q == 0)
1108                     {
1109                         tpj = atoms->atom[j].type;
1110                     }
1111                     else
1112                     {
1113                         tpj = atoms->atom[j].typeB;
1114                     }
1115                     for (i = 0; i < fr->n_tpi; i++)
1116                     {
1117                         if (q == 0)
1118                         {
1119                             tpi = atoms_tpi->atom[i].type;
1120                         }
1121                         else
1122                         {
1123                             tpi = atoms_tpi->atom[i].typeB;
1124                         }
1125                         if (bBHAM)
1126                         {
1127                             /* nbfp now includes the 6.0 derivative prefactor */
1128                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1129                         }
1130                         else
1131                         {
1132                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1133                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1134                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1135                         }
1136                         npair += nmolc;
1137                     }
1138                 }
1139             }
1140         }
1141         if (npair - nexcl <= 0 && fplog)
1142         {
1143             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1144             csix     = 0;
1145             ctwelve  = 0;
1146         }
1147         else
1148         {
1149             csix    /= npair - nexcl;
1150             ctwelve /= npair - nexcl;
1151         }
1152         if (debug)
1153         {
1154             fprintf(debug, "Counted %d exclusions\n", nexcl);
1155             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1156             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1157         }
1158         fr->avcsix[q]    = csix;
1159         fr->avctwelve[q] = ctwelve;
1160     }
1161
1162     if (EVDW_PME(fr->vdwtype))
1163     {
1164         sfree(nbfp_comb);
1165     }
1166
1167     if (fplog != NULL)
1168     {
1169         if (fr->eDispCorr == edispcAllEner ||
1170             fr->eDispCorr == edispcAllEnerPres)
1171         {
1172             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1173                     fr->avcsix[0], fr->avctwelve[0]);
1174         }
1175         else
1176         {
1177             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1178         }
1179     }
1180 }
1181
1182
1183 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1184                            const gmx_mtop_t *mtop)
1185 {
1186     const t_atoms *at1, *at2;
1187     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1188     real           b, bmin;
1189     real          *nbfp;
1190
1191     if (fplog)
1192     {
1193         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1194     }
1195     nbfp   = fr->nbfp;
1196     ntypes = fr->ntype;
1197
1198     bmin           = -1;
1199     fr->bham_b_max = 0;
1200     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1201     {
1202         at1 = &mtop->moltype[mt1].atoms;
1203         for (i = 0; (i < at1->nr); i++)
1204         {
1205             tpi = at1->atom[i].type;
1206             if (tpi >= ntypes)
1207             {
1208                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1209             }
1210
1211             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1212             {
1213                 at2 = &mtop->moltype[mt2].atoms;
1214                 for (j = 0; (j < at2->nr); j++)
1215                 {
1216                     tpj = at2->atom[j].type;
1217                     if (tpj >= ntypes)
1218                     {
1219                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1220                     }
1221                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1222                     if (b > fr->bham_b_max)
1223                     {
1224                         fr->bham_b_max = b;
1225                     }
1226                     if ((b < bmin) || (bmin == -1))
1227                     {
1228                         bmin = b;
1229                     }
1230                 }
1231             }
1232         }
1233     }
1234     if (fplog)
1235     {
1236         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1237                 bmin, fr->bham_b_max);
1238     }
1239 }
1240
1241 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1242                             t_forcerec *fr, real rtab,
1243                             const t_commrec *cr,
1244                             const char *tabfn, char *eg1, char *eg2,
1245                             t_nblists *nbl)
1246 {
1247     char buf[STRLEN];
1248     int  i, j;
1249
1250     if (tabfn == NULL)
1251     {
1252         if (debug)
1253         {
1254             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1255         }
1256         return;
1257     }
1258
1259     sprintf(buf, "%s", tabfn);
1260     if (eg1 && eg2)
1261     {
1262         /* Append the two energy group names */
1263         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1264                 eg1, eg2, ftp2ext(efXVG));
1265     }
1266     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1267     /* Copy the contents of the table to separate coulomb and LJ tables too,
1268      * to improve cache performance.
1269      */
1270     /* For performance reasons we want
1271      * the table data to be aligned to 16-byte. The pointers could be freed
1272      * but currently aren't.
1273      */
1274     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1275     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1276     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1277     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1278     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1279     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1280     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1281     nbl->table_elec.ninteractions = 1;
1282     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1283     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1284
1285     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1286     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1287     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1288     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1289     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1290     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1291     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1292     nbl->table_vdw.ninteractions = 2;
1293     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1294     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1295
1296     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1297     {
1298         for (j = 0; j < 4; j++)
1299         {
1300             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1301         }
1302         for (j = 0; j < 8; j++)
1303         {
1304             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1305         }
1306     }
1307 }
1308
1309 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1310                          int *ncount, int **count)
1311 {
1312     const gmx_moltype_t *molt;
1313     const t_ilist       *il;
1314     int                  mt, ftype, stride, i, j, tabnr;
1315
1316     for (mt = 0; mt < mtop->nmoltype; mt++)
1317     {
1318         molt = &mtop->moltype[mt];
1319         for (ftype = 0; ftype < F_NRE; ftype++)
1320         {
1321             if (ftype == ftype1 || ftype == ftype2)
1322             {
1323                 il     = &molt->ilist[ftype];
1324                 stride = 1 + NRAL(ftype);
1325                 for (i = 0; i < il->nr; i += stride)
1326                 {
1327                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1328                     if (tabnr < 0)
1329                     {
1330                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1331                     }
1332                     if (tabnr >= *ncount)
1333                     {
1334                         srenew(*count, tabnr+1);
1335                         for (j = *ncount; j < tabnr+1; j++)
1336                         {
1337                             (*count)[j] = 0;
1338                         }
1339                         *ncount = tabnr+1;
1340                     }
1341                     (*count)[tabnr]++;
1342                 }
1343             }
1344         }
1345     }
1346 }
1347
1348 static bondedtable_t *make_bonded_tables(FILE *fplog,
1349                                          int ftype1, int ftype2,
1350                                          const gmx_mtop_t *mtop,
1351                                          const char *basefn, const char *tabext)
1352 {
1353     int            i, ncount, *count;
1354     char           tabfn[STRLEN];
1355     bondedtable_t *tab;
1356
1357     tab = NULL;
1358
1359     ncount = 0;
1360     count  = NULL;
1361     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1362
1363     if (ncount > 0)
1364     {
1365         snew(tab, ncount);
1366         for (i = 0; i < ncount; i++)
1367         {
1368             if (count[i] > 0)
1369             {
1370                 sprintf(tabfn, "%s", basefn);
1371                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1372                         tabext, i, ftp2ext(efXVG));
1373                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1374             }
1375         }
1376         sfree(count);
1377     }
1378
1379     return tab;
1380 }
1381
1382 void forcerec_set_ranges(t_forcerec *fr,
1383                          int ncg_home, int ncg_force,
1384                          int natoms_force,
1385                          int natoms_force_constr, int natoms_f_novirsum)
1386 {
1387     fr->cg0 = 0;
1388     fr->hcg = ncg_home;
1389
1390     /* fr->ncg_force is unused in the standard code,
1391      * but it can be useful for modified code dealing with charge groups.
1392      */
1393     fr->ncg_force           = ncg_force;
1394     fr->natoms_force        = natoms_force;
1395     fr->natoms_force_constr = natoms_force_constr;
1396
1397     if (fr->natoms_force_constr > fr->nalloc_force)
1398     {
1399         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1400
1401         if (fr->bTwinRange)
1402         {
1403             srenew(fr->f_twin, fr->nalloc_force);
1404         }
1405     }
1406
1407     if (fr->bF_NoVirSum)
1408     {
1409         fr->f_novirsum_n = natoms_f_novirsum;
1410         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1411         {
1412             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1413             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1414         }
1415     }
1416     else
1417     {
1418         fr->f_novirsum_n = 0;
1419     }
1420 }
1421
1422 static real cutoff_inf(real cutoff)
1423 {
1424     if (cutoff == 0)
1425     {
1426         cutoff = GMX_CUTOFF_INF;
1427     }
1428
1429     return cutoff;
1430 }
1431
1432 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1433                                   t_forcerec *fr, const t_inputrec *ir,
1434                                   const char *tabfn, const gmx_mtop_t *mtop,
1435                                   matrix     box)
1436 {
1437     char buf[STRLEN];
1438     int  i, j;
1439
1440     if (tabfn == NULL)
1441     {
1442         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1443         return;
1444     }
1445
1446     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1447
1448     sprintf(buf, "%s", tabfn);
1449     for (i = 0; i < ir->adress->n_tf_grps; i++)
1450     {
1451         j = ir->adress->tf_table_index[i]; /* get energy group index */
1452         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1453                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1454         if (fp)
1455         {
1456             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1457         }
1458         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1459     }
1460
1461 }
1462
1463 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1464 {
1465     gmx_bool bAllvsAll;
1466
1467     bAllvsAll =
1468         (
1469             ir->rlist == 0            &&
1470             ir->rcoulomb == 0         &&
1471             ir->rvdw == 0             &&
1472             ir->ePBC == epbcNONE      &&
1473             ir->vdwtype == evdwCUT    &&
1474             ir->coulombtype == eelCUT &&
1475             ir->efep == efepNO        &&
1476             (ir->implicit_solvent == eisNO ||
1477              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1478                                                   ir->gb_algorithm == egbHCT   ||
1479                                                   ir->gb_algorithm == egbOBC))) &&
1480             getenv("GMX_NO_ALLVSALL") == NULL
1481         );
1482
1483     if (bAllvsAll && ir->opts.ngener > 1)
1484     {
1485         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1486
1487         if (bPrintNote)
1488         {
1489             if (MASTER(cr))
1490             {
1491                 fprintf(stderr, "\n%s\n", note);
1492             }
1493             if (fp != NULL)
1494             {
1495                 fprintf(fp, "\n%s\n", note);
1496             }
1497         }
1498         bAllvsAll = FALSE;
1499     }
1500
1501     if (bAllvsAll && fp && MASTER(cr))
1502     {
1503         fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1504     }
1505
1506     return bAllvsAll;
1507 }
1508
1509
1510 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1511 {
1512     int t, i;
1513
1514     /* These thread local data structures are used for bondeds only */
1515     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1516
1517     if (fr->nthreads > 1)
1518     {
1519         snew(fr->f_t, fr->nthreads);
1520         /* Thread 0 uses the global force and energy arrays */
1521         for (t = 1; t < fr->nthreads; t++)
1522         {
1523             fr->f_t[t].f        = NULL;
1524             fr->f_t[t].f_nalloc = 0;
1525             snew(fr->f_t[t].fshift, SHIFTS);
1526             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1527             for (i = 0; i < egNR; i++)
1528             {
1529                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1530             }
1531         }
1532     }
1533 }
1534
1535
1536 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1537                                   int                         *kernel_type,
1538                                   int                         *ewald_excl)
1539 {
1540     *kernel_type = nbnxnk4x4_PlainC;
1541     *ewald_excl  = ewaldexclTable;
1542
1543 #ifdef GMX_NBNXN_SIMD
1544     {
1545 #ifdef GMX_NBNXN_SIMD_4XN
1546         *kernel_type = nbnxnk4xN_SIMD_4xN;
1547 #endif
1548 #ifdef GMX_NBNXN_SIMD_2XNN
1549         /* We expect the 2xNN kernels to be faster in most cases */
1550         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1551 #endif
1552
1553 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1554         if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1555         {
1556             /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1557              * 10% with HT, 50% without HT, but extra zeros interactions
1558              * can compensate. As we currently don't detect the actual use
1559              * of HT, switch to 4x8 to avoid a potential performance hit.
1560              */
1561             *kernel_type = nbnxnk4xN_SIMD_4xN;
1562         }
1563 #endif
1564         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1565         {
1566 #ifdef GMX_NBNXN_SIMD_4XN
1567             *kernel_type = nbnxnk4xN_SIMD_4xN;
1568 #else
1569             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1570 #endif
1571         }
1572         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1573         {
1574 #ifdef GMX_NBNXN_SIMD_2XNN
1575             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1576 #else
1577             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1578 #endif
1579         }
1580
1581         /* Analytical Ewald exclusion correction is only an option in
1582          * the SIMD kernel. On BlueGene/Q, this is faster regardless
1583          * of precision. In single precision, this is faster on
1584          * Bulldozer, and slightly faster on Sandy Bridge.
1585          */
1586 #if ((defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE) || (defined GMX_CPU_ACCELERATION_IBM_QPX)
1587         *ewald_excl = ewaldexclAnalytical;
1588 #endif
1589         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1590         {
1591             *ewald_excl = ewaldexclTable;
1592         }
1593         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1594         {
1595             *ewald_excl = ewaldexclAnalytical;
1596         }
1597
1598     }
1599 #endif /* GMX_NBNXN_SIMD */
1600 }
1601
1602
1603 const char *lookup_nbnxn_kernel_name(int kernel_type)
1604 {
1605     const char *returnvalue = NULL;
1606     switch (kernel_type)
1607     {
1608         case nbnxnkNotSet:
1609             returnvalue = "not set";
1610             break;
1611         case nbnxnk4x4_PlainC:
1612             returnvalue = "plain C";
1613             break;
1614         case nbnxnk4xN_SIMD_4xN:
1615         case nbnxnk4xN_SIMD_2xNN:
1616 #ifdef GMX_NBNXN_SIMD
1617 #ifdef GMX_X86_SSE2
1618             /* We have x86 SSE2 compatible SIMD */
1619 #ifdef GMX_X86_AVX_128_FMA
1620             returnvalue = "AVX-128-FMA";
1621 #else
1622 #if defined GMX_X86_AVX_256 || defined __AVX__
1623             /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1624              * on compiler flags. As we use nearly identical intrinsics,
1625              * compiling for AVX without an AVX macros effectively results
1626              * in AVX kernels.
1627              * For gcc we check for __AVX__
1628              * At least a check for icc should be added (if there is a macro)
1629              */
1630 #if defined GMX_X86_AVX_256 && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1631             returnvalue = "AVX-256";
1632 #else
1633             returnvalue = "AVX-128";
1634 #endif
1635 #else
1636 #ifdef GMX_X86_SSE4_1
1637             returnvalue  = "SSE4.1";
1638 #else
1639             returnvalue  = "SSE2";
1640 #endif
1641 #endif
1642 #endif
1643 #else   /* GMX_X86_SSE2 */
1644             /* not GMX_X86_SSE2, but other SIMD */
1645             returnvalue  = "SIMD";
1646 #endif /* GMX_X86_SSE2 */
1647 #else  /* GMX_NBNXN_SIMD */
1648             returnvalue = "not available";
1649 #endif /* GMX_NBNXN_SIMD */
1650             break;
1651         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1652         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1653
1654         case nbnxnkNR:
1655         default:
1656             gmx_fatal(FARGS, "Illegal kernel type selected");
1657             returnvalue = NULL;
1658             break;
1659     }
1660     return returnvalue;
1661 };
1662
1663 static void pick_nbnxn_kernel(FILE                *fp,
1664                               const t_commrec     *cr,
1665                               gmx_bool             use_cpu_acceleration,
1666                               gmx_bool             bUseGPU,
1667                               gmx_bool             bEmulateGPU,
1668                               const t_inputrec    *ir,
1669                               int                 *kernel_type,
1670                               int                 *ewald_excl,
1671                               gmx_bool             bDoNonbonded)
1672 {
1673     assert(kernel_type);
1674
1675     *kernel_type = nbnxnkNotSet;
1676     *ewald_excl  = ewaldexclTable;
1677
1678     if (bEmulateGPU)
1679     {
1680         *kernel_type = nbnxnk8x8x8_PlainC;
1681
1682         if (bDoNonbonded)
1683         {
1684             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1685         }
1686     }
1687     else if (bUseGPU)
1688     {
1689         *kernel_type = nbnxnk8x8x8_CUDA;
1690     }
1691
1692     if (*kernel_type == nbnxnkNotSet)
1693     {
1694         if (use_cpu_acceleration)
1695         {
1696             pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1697         }
1698         else
1699         {
1700             *kernel_type = nbnxnk4x4_PlainC;
1701         }
1702     }
1703
1704     if (bDoNonbonded && fp != NULL)
1705     {
1706         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1707                 lookup_nbnxn_kernel_name(*kernel_type),
1708                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1709                 nbnxn_kernel_to_cj_size(*kernel_type));
1710     }
1711 }
1712
1713 static void pick_nbnxn_resources(const t_commrec     *cr,
1714                                  const gmx_hw_info_t *hwinfo,
1715                                  gmx_bool             bDoNonbonded,
1716                                  gmx_bool            *bUseGPU,
1717                                  gmx_bool            *bEmulateGPU,
1718                                  const gmx_gpu_opt_t *gpu_opt)
1719 {
1720     gmx_bool bEmulateGPUEnvVarSet;
1721     char     gpu_err_str[STRLEN];
1722
1723     *bUseGPU = FALSE;
1724
1725     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1726
1727     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1728      * GPUs (currently) only handle non-bonded calculations, we will
1729      * automatically switch to emulation if non-bonded calculations are
1730      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1731      * way to turn off GPU initialization, data movement, and cleanup.
1732      *
1733      * GPU emulation can be useful to assess the performance one can expect by
1734      * adding GPU(s) to the machine. The conditional below allows this even
1735      * if mdrun is compiled without GPU acceleration support.
1736      * Note that you should freezing the system as otherwise it will explode.
1737      */
1738     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1739                     (!bDoNonbonded &&
1740                      gpu_opt->ncuda_dev_use > 0));
1741
1742     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1743      */
1744     if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1745     {
1746         /* Each PP node will use the intra-node id-th device from the
1747          * list of detected/selected GPUs. */
1748         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1749                       &hwinfo->gpu_info, gpu_opt))
1750         {
1751             /* At this point the init should never fail as we made sure that
1752              * we have all the GPUs we need. If it still does, we'll bail. */
1753             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1754                       cr->nodeid,
1755                       get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1756                                         cr->rank_pp_intranode),
1757                       gpu_err_str);
1758         }
1759
1760         /* Here we actually turn on hardware GPU acceleration */
1761         *bUseGPU = TRUE;
1762     }
1763 }
1764
1765 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1766                             nonbonded_verlet_t *nbv,
1767                             int                 group)
1768 {
1769     gmx_bool bUsesSimpleTables = TRUE;
1770     int      grp_index;
1771
1772     switch (cutoff_scheme)
1773     {
1774         case ecutsGROUP:
1775             bUsesSimpleTables = TRUE;
1776             break;
1777         case ecutsVERLET:
1778             assert(NULL != nbv && NULL != nbv->grp);
1779             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1780             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1781             break;
1782         default:
1783             gmx_incons("unimplemented");
1784     }
1785     return bUsesSimpleTables;
1786 }
1787
1788 static void init_ewald_f_table(interaction_const_t *ic,
1789                                gmx_bool             bUsesSimpleTables,
1790                                real                 rtab)
1791 {
1792     real maxr;
1793
1794     if (bUsesSimpleTables)
1795     {
1796         /* With a spacing of 0.0005 we are at the force summation accuracy
1797          * for the SSE kernels for "normal" atomistic simulations.
1798          */
1799         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1800                                                    ic->rcoulomb);
1801
1802         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1803         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1804     }
1805     else
1806     {
1807         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1808         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1809         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1810     }
1811
1812     sfree_aligned(ic->tabq_coul_FDV0);
1813     sfree_aligned(ic->tabq_coul_F);
1814     sfree_aligned(ic->tabq_coul_V);
1815
1816     /* Create the original table data in FDV0 */
1817     snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1818     snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1819     snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1820     table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1821                                 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1822 }
1823
1824 void init_interaction_const_tables(FILE                *fp,
1825                                    interaction_const_t *ic,
1826                                    gmx_bool             bUsesSimpleTables,
1827                                    real                 rtab)
1828 {
1829     real spacing;
1830
1831     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1832     {
1833         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1834
1835         if (fp != NULL)
1836         {
1837             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1838                     1/ic->tabq_scale, ic->tabq_size);
1839         }
1840     }
1841 }
1842
1843 static void init_interaction_const(FILE                       *fp,
1844                                    const t_commrec gmx_unused *cr,
1845                                    interaction_const_t       **interaction_const,
1846                                    const t_forcerec           *fr,
1847                                    real                        rtab)
1848 {
1849     interaction_const_t *ic;
1850     gmx_bool             bUsesSimpleTables = TRUE;
1851
1852     snew(ic, 1);
1853
1854     /* Just allocate something so we can free it */
1855     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1856     snew_aligned(ic->tabq_coul_F, 16, 32);
1857     snew_aligned(ic->tabq_coul_V, 16, 32);
1858
1859     ic->rlist       = fr->rlist;
1860     ic->rlistlong   = fr->rlistlong;
1861
1862     /* Lennard-Jones */
1863     ic->rvdw        = fr->rvdw;
1864     if (fr->vdw_modifier == eintmodPOTSHIFT)
1865     {
1866         ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1867     }
1868     else
1869     {
1870         ic->sh_invrc6 = 0;
1871     }
1872
1873     /* Electrostatics */
1874     ic->eeltype     = fr->eeltype;
1875     ic->rcoulomb    = fr->rcoulomb;
1876     ic->epsilon_r   = fr->epsilon_r;
1877     ic->epsfac      = fr->epsfac;
1878
1879     /* Ewald */
1880     ic->ewaldcoeff_q   = fr->ewaldcoeff_q;
1881     ic->ewaldcoeff_lj  = fr->ewaldcoeff_lj;
1882
1883     if (fr->coulomb_modifier == eintmodPOTSHIFT)
1884     {
1885         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1886     }
1887     else
1888     {
1889         ic->sh_ewald = 0;
1890     }
1891
1892     /* Reaction-field */
1893     if (EEL_RF(ic->eeltype))
1894     {
1895         ic->epsilon_rf = fr->epsilon_rf;
1896         ic->k_rf       = fr->k_rf;
1897         ic->c_rf       = fr->c_rf;
1898     }
1899     else
1900     {
1901         /* For plain cut-off we might use the reaction-field kernels */
1902         ic->epsilon_rf = ic->epsilon_r;
1903         ic->k_rf       = 0;
1904         if (fr->coulomb_modifier == eintmodPOTSHIFT)
1905         {
1906             ic->c_rf   = 1/ic->rcoulomb;
1907         }
1908         else
1909         {
1910             ic->c_rf   = 0;
1911         }
1912     }
1913
1914     if (fp != NULL)
1915     {
1916         fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1917                 sqr(ic->sh_invrc6), ic->sh_invrc6);
1918         if (ic->eeltype == eelCUT)
1919         {
1920             fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1921         }
1922         else if (EEL_PME(ic->eeltype))
1923         {
1924             fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1925         }
1926         fprintf(fp, "\n");
1927     }
1928
1929     *interaction_const = ic;
1930
1931     if (fr->nbv != NULL && fr->nbv->bUseGPU)
1932     {
1933         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1934
1935         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
1936          * also sharing texture references. To keep the code simple, we don't
1937          * treat texture references as shared resources, but this means that
1938          * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
1939          * Hence, to ensure that the non-bonded kernels don't start before all
1940          * texture binding operations are finished, we need to wait for all ranks
1941          * to arrive here before continuing.
1942          *
1943          * Note that we could omit this barrier if GPUs are not shared (or
1944          * texture objects are used), but as this is initialization code, there
1945          * is not point in complicating things.
1946          */
1947 #ifdef GMX_THREAD_MPI
1948         if (PAR(cr))
1949         {
1950             gmx_barrier(cr);
1951         }
1952 #endif  /* GMX_THREAD_MPI */
1953     }
1954
1955     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1956     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1957 }
1958
1959 static void init_nb_verlet(FILE                *fp,
1960                            nonbonded_verlet_t **nb_verlet,
1961                            const t_inputrec    *ir,
1962                            const t_forcerec    *fr,
1963                            const t_commrec     *cr,
1964                            const char          *nbpu_opt)
1965 {
1966     nonbonded_verlet_t *nbv;
1967     int                 i;
1968     char               *env;
1969     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
1970
1971     nbnxn_alloc_t      *nb_alloc;
1972     nbnxn_free_t       *nb_free;
1973
1974     snew(nbv, 1);
1975
1976     pick_nbnxn_resources(cr, fr->hwinfo,
1977                          fr->bNonbonded,
1978                          &nbv->bUseGPU,
1979                          &bEmulateGPU,
1980                          fr->gpu_opt);
1981
1982     nbv->nbs = NULL;
1983
1984     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1985     for (i = 0; i < nbv->ngrp; i++)
1986     {
1987         nbv->grp[i].nbl_lists.nnbl = 0;
1988         nbv->grp[i].nbat           = NULL;
1989         nbv->grp[i].kernel_type    = nbnxnkNotSet;
1990
1991         if (i == 0) /* local */
1992         {
1993             pick_nbnxn_kernel(fp, cr, fr->use_cpu_acceleration,
1994                               nbv->bUseGPU, bEmulateGPU, ir,
1995                               &nbv->grp[i].kernel_type,
1996                               &nbv->grp[i].ewald_excl,
1997                               fr->bNonbonded);
1998         }
1999         else /* non-local */
2000         {
2001             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2002             {
2003                 /* Use GPU for local, select a CPU kernel for non-local */
2004                 pick_nbnxn_kernel(fp, cr, fr->use_cpu_acceleration,
2005                                   FALSE, FALSE, ir,
2006                                   &nbv->grp[i].kernel_type,
2007                                   &nbv->grp[i].ewald_excl,
2008                                   fr->bNonbonded);
2009
2010                 bHybridGPURun = TRUE;
2011             }
2012             else
2013             {
2014                 /* Use the same kernel for local and non-local interactions */
2015                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2016                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
2017             }
2018         }
2019     }
2020
2021     if (nbv->bUseGPU)
2022     {
2023         /* init the NxN GPU data; the last argument tells whether we'll have
2024          * both local and non-local NB calculation on GPU */
2025         nbnxn_cuda_init(fp, &nbv->cu_nbv,
2026                         &fr->hwinfo->gpu_info, fr->gpu_opt,
2027                         cr->rank_pp_intranode,
2028                         (nbv->ngrp > 1) && !bHybridGPURun);
2029
2030         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2031         {
2032             char *end;
2033
2034             nbv->min_ci_balanced = strtol(env, &end, 10);
2035             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2036             {
2037                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2038             }
2039
2040             if (debug)
2041             {
2042                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2043                         nbv->min_ci_balanced);
2044             }
2045         }
2046         else
2047         {
2048             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2049             if (debug)
2050             {
2051                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2052                         nbv->min_ci_balanced);
2053             }
2054         }
2055     }
2056     else
2057     {
2058         nbv->min_ci_balanced = 0;
2059     }
2060
2061     *nb_verlet = nbv;
2062
2063     nbnxn_init_search(&nbv->nbs,
2064                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2065                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2066                       gmx_omp_nthreads_get(emntNonbonded));
2067
2068     for (i = 0; i < nbv->ngrp; i++)
2069     {
2070         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2071         {
2072             nb_alloc = &pmalloc;
2073             nb_free  = &pfree;
2074         }
2075         else
2076         {
2077             nb_alloc = NULL;
2078             nb_free  = NULL;
2079         }
2080
2081         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2082                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2083                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2084                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2085                                 nb_alloc, nb_free);
2086
2087         if (i == 0 ||
2088             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2089         {
2090             snew(nbv->grp[i].nbat, 1);
2091             nbnxn_atomdata_init(fp,
2092                                 nbv->grp[i].nbat,
2093                                 nbv->grp[i].kernel_type,
2094                                 fr->ntype, fr->nbfp,
2095                                 ir->opts.ngener,
2096                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2097                                 nb_alloc, nb_free);
2098         }
2099         else
2100         {
2101             nbv->grp[i].nbat = nbv->grp[0].nbat;
2102         }
2103     }
2104 }
2105
2106 void init_forcerec(FILE              *fp,
2107                    const output_env_t oenv,
2108                    t_forcerec        *fr,
2109                    t_fcdata          *fcd,
2110                    const t_inputrec  *ir,
2111                    const gmx_mtop_t  *mtop,
2112                    const t_commrec   *cr,
2113                    matrix             box,
2114                    const char        *tabfn,
2115                    const char        *tabafn,
2116                    const char        *tabpfn,
2117                    const char        *tabbfn,
2118                    const char        *nbpu_opt,
2119                    gmx_bool           bNoSolvOpt,
2120                    real               print_force)
2121 {
2122     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2123     real           rtab;
2124     char          *env;
2125     double         dbl;
2126     const t_block *cgs;
2127     gmx_bool       bGenericKernelOnly;
2128     gmx_bool       bTab, bSep14tab, bNormalnblists;
2129     t_nblists     *nbl;
2130     int           *nm_ind, egp_flags;
2131
2132     if (fr->hwinfo == NULL)
2133     {
2134         /* Detect hardware, gather information.
2135          * In mdrun, hwinfo has already been set before calling init_forcerec.
2136          * Here we ignore GPUs, as tools will not use them anyhow.
2137          */
2138         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2139     }
2140
2141     /* By default we turn acceleration on, but it might be turned off further down... */
2142     fr->use_cpu_acceleration = TRUE;
2143
2144     fr->bDomDec = DOMAINDECOMP(cr);
2145
2146     natoms = mtop->natoms;
2147
2148     if (check_box(ir->ePBC, box))
2149     {
2150         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2151     }
2152
2153     /* Test particle insertion ? */
2154     if (EI_TPI(ir->eI))
2155     {
2156         /* Set to the size of the molecule to be inserted (the last one) */
2157         /* Because of old style topologies, we have to use the last cg
2158          * instead of the last molecule type.
2159          */
2160         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2161         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2162         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2163         {
2164             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2165         }
2166     }
2167     else
2168     {
2169         fr->n_tpi = 0;
2170     }
2171
2172     /* Copy AdResS parameters */
2173     if (ir->bAdress)
2174     {
2175         fr->adress_type           = ir->adress->type;
2176         fr->adress_const_wf       = ir->adress->const_wf;
2177         fr->adress_ex_width       = ir->adress->ex_width;
2178         fr->adress_hy_width       = ir->adress->hy_width;
2179         fr->adress_icor           = ir->adress->icor;
2180         fr->adress_site           = ir->adress->site;
2181         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2182         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2183
2184
2185         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2186         for (i = 0; i < ir->adress->n_energy_grps; i++)
2187         {
2188             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2189         }
2190
2191         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2192         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2193         for (i = 0; i < fr->n_adress_tf_grps; i++)
2194         {
2195             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2196         }
2197         copy_rvec(ir->adress->refs, fr->adress_refs);
2198     }
2199     else
2200     {
2201         fr->adress_type           = eAdressOff;
2202         fr->adress_do_hybridpairs = FALSE;
2203     }
2204
2205     /* Copy the user determined parameters */
2206     fr->userint1  = ir->userint1;
2207     fr->userint2  = ir->userint2;
2208     fr->userint3  = ir->userint3;
2209     fr->userint4  = ir->userint4;
2210     fr->userreal1 = ir->userreal1;
2211     fr->userreal2 = ir->userreal2;
2212     fr->userreal3 = ir->userreal3;
2213     fr->userreal4 = ir->userreal4;
2214
2215     /* Shell stuff */
2216     fr->fc_stepsize = ir->fc_stepsize;
2217
2218     /* Free energy */
2219     fr->efep        = ir->efep;
2220     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2221     if (ir->fepvals->bScCoul)
2222     {
2223         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2224         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2225     }
2226     else
2227     {
2228         fr->sc_alphacoul  = 0;
2229         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2230     }
2231     fr->sc_power      = ir->fepvals->sc_power;
2232     fr->sc_r_power    = ir->fepvals->sc_r_power;
2233     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2234
2235     env = getenv("GMX_SCSIGMA_MIN");
2236     if (env != NULL)
2237     {
2238         dbl = 0;
2239         sscanf(env, "%lf", &dbl);
2240         fr->sc_sigma6_min = pow(dbl, 6);
2241         if (fp)
2242         {
2243             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2244         }
2245     }
2246
2247     fr->bNonbonded = TRUE;
2248     if (getenv("GMX_NO_NONBONDED") != NULL)
2249     {
2250         /* turn off non-bonded calculations */
2251         fr->bNonbonded = FALSE;
2252         md_print_warn(cr, fp,
2253                       "Found environment variable GMX_NO_NONBONDED.\n"
2254                       "Disabling nonbonded calculations.\n");
2255     }
2256
2257     bGenericKernelOnly = FALSE;
2258
2259     /* We now check in the NS code whether a particular combination of interactions
2260      * can be used with water optimization, and disable it if that is not the case.
2261      */
2262
2263     if (getenv("GMX_NB_GENERIC") != NULL)
2264     {
2265         if (fp != NULL)
2266         {
2267             fprintf(fp,
2268                     "Found environment variable GMX_NB_GENERIC.\n"
2269                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2270                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2271         }
2272         bGenericKernelOnly = TRUE;
2273     }
2274
2275     if (bGenericKernelOnly == TRUE)
2276     {
2277         bNoSolvOpt         = TRUE;
2278     }
2279
2280     if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2281     {
2282         fr->use_cpu_acceleration = FALSE;
2283         if (fp != NULL)
2284         {
2285             fprintf(fp,
2286                     "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2287                     "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2288         }
2289     }
2290
2291     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2292
2293     /* Check if we can/should do all-vs-all kernels */
2294     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
2295     fr->AllvsAll_work   = NULL;
2296     fr->AllvsAll_workgb = NULL;
2297
2298     /* All-vs-all kernels have not been implemented in 4.6, and
2299      * the SIMD group kernels are also buggy in this case. Non-accelerated
2300      * group kernels are OK. See Redmine #1249. */
2301     if (fr->bAllvsAll)
2302     {
2303         fr->bAllvsAll            = FALSE;
2304         fr->use_cpu_acceleration = FALSE;
2305         if (fp != NULL)
2306         {
2307             fprintf(fp,
2308                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2309                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2310                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2311                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2312                     "we are proceeding by disabling all CPU architecture-specific\n"
2313                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2314                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2315         }
2316     }
2317
2318     /* Neighbour searching stuff */
2319     fr->cutoff_scheme = ir->cutoff_scheme;
2320     fr->bGrid         = (ir->ns_type == ensGRID);
2321     fr->ePBC          = ir->ePBC;
2322
2323     /* Determine if we will do PBC for distances in bonded interactions */
2324     if (fr->ePBC == epbcNONE)
2325     {
2326         fr->bMolPBC = FALSE;
2327     }
2328     else
2329     {
2330         if (!DOMAINDECOMP(cr))
2331         {
2332             /* The group cut-off scheme and SHAKE assume charge groups
2333              * are whole, but not using molpbc is faster in most cases.
2334              */
2335             if (fr->cutoff_scheme == ecutsGROUP ||
2336                 (ir->eConstrAlg == econtSHAKE &&
2337                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2338                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2339             {
2340                 fr->bMolPBC = ir->bPeriodicMols;
2341             }
2342             else
2343             {
2344                 fr->bMolPBC = TRUE;
2345                 if (getenv("GMX_USE_GRAPH") != NULL)
2346                 {
2347                     fr->bMolPBC = FALSE;
2348                     if (fp)
2349                     {
2350                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2351                     }
2352                 }
2353             }
2354         }
2355         else
2356         {
2357             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2358         }
2359     }
2360     fr->bGB = (ir->implicit_solvent == eisGBSA);
2361
2362     fr->rc_scaling = ir->refcoord_scaling;
2363     copy_rvec(ir->posres_com, fr->posres_com);
2364     copy_rvec(ir->posres_comB, fr->posres_comB);
2365     fr->rlist                    = cutoff_inf(ir->rlist);
2366     fr->rlistlong                = cutoff_inf(ir->rlistlong);
2367     fr->eeltype                  = ir->coulombtype;
2368     fr->vdwtype                  = ir->vdwtype;
2369     fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
2370
2371     fr->coulomb_modifier = ir->coulomb_modifier;
2372     fr->vdw_modifier     = ir->vdw_modifier;
2373
2374     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2375     switch (fr->eeltype)
2376     {
2377         case eelCUT:
2378             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2379             break;
2380
2381         case eelRF:
2382         case eelGRF:
2383         case eelRF_NEC:
2384             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2385             break;
2386
2387         case eelRF_ZERO:
2388             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2389             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2390             break;
2391
2392         case eelSWITCH:
2393         case eelSHIFT:
2394         case eelUSER:
2395         case eelENCADSHIFT:
2396         case eelPMESWITCH:
2397         case eelPMEUSER:
2398         case eelPMEUSERSWITCH:
2399             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2400             break;
2401
2402         case eelPME:
2403         case eelEWALD:
2404             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2405             break;
2406
2407         default:
2408             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2409             break;
2410     }
2411
2412     /* Vdw: Translate from mdp settings to kernel format */
2413     switch (fr->vdwtype)
2414     {
2415         case evdwCUT:
2416         case evdwPME:
2417             if (fr->bBHAM)
2418             {
2419                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2420             }
2421             else
2422             {
2423                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2424             }
2425             break;
2426
2427         case evdwSWITCH:
2428         case evdwSHIFT:
2429         case evdwUSER:
2430         case evdwENCADSHIFT:
2431             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2432             break;
2433
2434         default:
2435             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2436             break;
2437     }
2438
2439     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2440     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2441     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2442
2443     fr->bTwinRange = fr->rlistlong > fr->rlist;
2444     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2445
2446     fr->reppow     = mtop->ffparams.reppow;
2447
2448     if (ir->cutoff_scheme == ecutsGROUP)
2449     {
2450         fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
2451                           !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2452         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2453         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2454                            fr->eeltype == eelEWALD ||
2455                            fr->eeltype == eelPME ||
2456                            fr->eeltype == eelRF ||
2457                            fr->eeltype == eelRF_ZERO);
2458
2459         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2460          * going to be faster to tabulate the interaction than calling the generic kernel.
2461          */
2462         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2463         {
2464             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2465             {
2466                 fr->bcoultab = TRUE;
2467             }
2468         }
2469         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2470                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2471                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2472                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2473         {
2474             if (fr->rcoulomb != fr->rvdw)
2475             {
2476                 fr->bcoultab = TRUE;
2477             }
2478         }
2479
2480         if (getenv("GMX_REQUIRE_TABLES"))
2481         {
2482             fr->bvdwtab  = TRUE;
2483             fr->bcoultab = TRUE;
2484         }
2485
2486         if (fp)
2487         {
2488             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2489             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2490         }
2491
2492         if (fr->bvdwtab == TRUE)
2493         {
2494             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2495             fr->nbkernel_vdw_modifier    = eintmodNONE;
2496         }
2497         if (fr->bcoultab == TRUE)
2498         {
2499             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2500             fr->nbkernel_elec_modifier    = eintmodNONE;
2501         }
2502     }
2503
2504     if (ir->cutoff_scheme == ecutsVERLET)
2505     {
2506         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2507         {
2508             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2509         }
2510         fr->bvdwtab  = FALSE;
2511         fr->bcoultab = FALSE;
2512     }
2513
2514     /* Tables are used for direct ewald sum */
2515     if (fr->bEwald)
2516     {
2517         if (EEL_PME(ir->coulombtype))
2518         {
2519             if (fp)
2520             {
2521                 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2522             }
2523             if (ir->coulombtype == eelP3M_AD)
2524             {
2525                 please_cite(fp, "Hockney1988");
2526                 please_cite(fp, "Ballenegger2012");
2527             }
2528             else
2529             {
2530                 please_cite(fp, "Essmann95a");
2531             }
2532
2533             if (ir->ewald_geometry == eewg3DC)
2534             {
2535                 if (fp)
2536                 {
2537                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2538                 }
2539                 please_cite(fp, "In-Chul99a");
2540             }
2541         }
2542         fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2543         init_ewald_tab(&(fr->ewald_table), ir, fp);
2544         if (fp)
2545         {
2546             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2547                     1/fr->ewaldcoeff_q);
2548         }
2549     }
2550
2551     if (EVDW_PME(ir->vdwtype))
2552     {
2553         if (fp)
2554         {
2555             fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2556         }
2557         please_cite(fp, "Essman95a");
2558         fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2559         if (fp)
2560         {
2561             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2562                     1/fr->ewaldcoeff_lj);
2563         }
2564     }
2565
2566     /* Electrostatics */
2567     fr->epsilon_r       = ir->epsilon_r;
2568     fr->epsilon_rf      = ir->epsilon_rf;
2569     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2570     fr->rcoulomb_switch = ir->rcoulomb_switch;
2571     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2572
2573     /* Parameters for generalized RF */
2574     fr->zsquare = 0.0;
2575     fr->temp    = 0.0;
2576
2577     if (fr->eeltype == eelGRF)
2578     {
2579         init_generalized_rf(fp, mtop, ir, fr);
2580     }
2581
2582     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2583                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2584                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2585                        IR_ELEC_FIELD(*ir) ||
2586                        (fr->adress_icor != eAdressICOff)
2587                        );
2588
2589     if (fr->cutoff_scheme == ecutsGROUP &&
2590         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2591     {
2592         /* Count the total number of charge groups */
2593         fr->cg_nalloc = ncg_mtop(mtop);
2594         srenew(fr->cg_cm, fr->cg_nalloc);
2595     }
2596     if (fr->shift_vec == NULL)
2597     {
2598         snew(fr->shift_vec, SHIFTS);
2599     }
2600
2601     if (fr->fshift == NULL)
2602     {
2603         snew(fr->fshift, SHIFTS);
2604     }
2605
2606     if (fr->nbfp == NULL)
2607     {
2608         fr->ntype = mtop->ffparams.atnr;
2609         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2610     }
2611
2612     /* Copy the energy group exclusions */
2613     fr->egp_flags = ir->opts.egp_flags;
2614
2615     /* Van der Waals stuff */
2616     fr->rvdw        = cutoff_inf(ir->rvdw);
2617     fr->rvdw_switch = ir->rvdw_switch;
2618     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2619     {
2620         if (fr->rvdw_switch >= fr->rvdw)
2621         {
2622             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2623                       fr->rvdw_switch, fr->rvdw);
2624         }
2625         if (fp)
2626         {
2627             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2628                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2629                     fr->rvdw_switch, fr->rvdw);
2630         }
2631     }
2632
2633     if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2634     {
2635         gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2636     }
2637
2638     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2639     {
2640         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2641     }
2642
2643     if (fp)
2644     {
2645         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2646                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2647     }
2648
2649     fr->eDispCorr = ir->eDispCorr;
2650     if (ir->eDispCorr != edispcNO)
2651     {
2652         set_avcsixtwelve(fp, fr, mtop);
2653     }
2654
2655     if (fr->bBHAM)
2656     {
2657         set_bham_b_max(fp, fr, mtop);
2658     }
2659
2660     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2661
2662     /* Copy the GBSA data (radius, volume and surftens for each
2663      * atomtype) from the topology atomtype section to forcerec.
2664      */
2665     snew(fr->atype_radius, fr->ntype);
2666     snew(fr->atype_vol, fr->ntype);
2667     snew(fr->atype_surftens, fr->ntype);
2668     snew(fr->atype_gb_radius, fr->ntype);
2669     snew(fr->atype_S_hct, fr->ntype);
2670
2671     if (mtop->atomtypes.nr > 0)
2672     {
2673         for (i = 0; i < fr->ntype; i++)
2674         {
2675             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2676         }
2677         for (i = 0; i < fr->ntype; i++)
2678         {
2679             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2680         }
2681         for (i = 0; i < fr->ntype; i++)
2682         {
2683             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2684         }
2685         for (i = 0; i < fr->ntype; i++)
2686         {
2687             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2688         }
2689         for (i = 0; i < fr->ntype; i++)
2690         {
2691             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2692         }
2693     }
2694
2695     /* Generate the GB table if needed */
2696     if (fr->bGB)
2697     {
2698 #ifdef GMX_DOUBLE
2699         fr->gbtabscale = 2000;
2700 #else
2701         fr->gbtabscale = 500;
2702 #endif
2703
2704         fr->gbtabr = 100;
2705         fr->gbtab  = make_gb_table(oenv, fr);
2706
2707         init_gb(&fr->born, cr, fr, ir, mtop, ir->gb_algorithm);
2708
2709         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2710         if (!DOMAINDECOMP(cr))
2711         {
2712             make_local_gb(cr, fr->born, ir->gb_algorithm);
2713         }
2714     }
2715
2716     /* Set the charge scaling */
2717     if (fr->epsilon_r != 0)
2718     {
2719         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2720     }
2721     else
2722     {
2723         /* eps = 0 is infinite dieletric: no coulomb interactions */
2724         fr->epsfac = 0;
2725     }
2726
2727     /* Reaction field constants */
2728     if (EEL_RF(fr->eeltype))
2729     {
2730         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2731                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2732                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2733     }
2734
2735     /*This now calculates sum for q and c6*/
2736     set_chargesum(fp, fr, mtop);
2737
2738     /* if we are using LR electrostatics, and they are tabulated,
2739      * the tables will contain modified coulomb interactions.
2740      * Since we want to use the non-shifted ones for 1-4
2741      * coulombic interactions, we must have an extra set of tables.
2742      */
2743
2744     /* Construct tables.
2745      * A little unnecessary to make both vdw and coul tables sometimes,
2746      * but what the heck... */
2747
2748     bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2749
2750     bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2751                   fr->bBHAM || fr->bEwald) &&
2752                  (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2753                   gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2754                   gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2755
2756     negp_pp   = ir->opts.ngener - ir->nwall;
2757     negptable = 0;
2758     if (!bTab)
2759     {
2760         bNormalnblists = TRUE;
2761         fr->nnblists   = 1;
2762     }
2763     else
2764     {
2765         bNormalnblists = (ir->eDispCorr != edispcNO);
2766         for (egi = 0; egi < negp_pp; egi++)
2767         {
2768             for (egj = egi; egj < negp_pp; egj++)
2769             {
2770                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2771                 if (!(egp_flags & EGP_EXCL))
2772                 {
2773                     if (egp_flags & EGP_TABLE)
2774                     {
2775                         negptable++;
2776                     }
2777                     else
2778                     {
2779                         bNormalnblists = TRUE;
2780                     }
2781                 }
2782             }
2783         }
2784         if (bNormalnblists)
2785         {
2786             fr->nnblists = negptable + 1;
2787         }
2788         else
2789         {
2790             fr->nnblists = negptable;
2791         }
2792         if (fr->nnblists > 1)
2793         {
2794             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2795         }
2796     }
2797
2798     if (ir->adress)
2799     {
2800         fr->nnblists *= 2;
2801     }
2802
2803     snew(fr->nblists, fr->nnblists);
2804
2805     /* This code automatically gives table length tabext without cut-off's,
2806      * in that case grompp should already have checked that we do not need
2807      * normal tables and we only generate tables for 1-4 interactions.
2808      */
2809     rtab = ir->rlistlong + ir->tabext;
2810
2811     if (bTab)
2812     {
2813         /* make tables for ordinary interactions */
2814         if (bNormalnblists)
2815         {
2816             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2817             if (ir->adress)
2818             {
2819                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2820             }
2821             if (!bSep14tab)
2822             {
2823                 fr->tab14 = fr->nblists[0].table_elec_vdw;
2824             }
2825             m = 1;
2826         }
2827         else
2828         {
2829             m = 0;
2830         }
2831         if (negptable > 0)
2832         {
2833             /* Read the special tables for certain energy group pairs */
2834             nm_ind = mtop->groups.grps[egcENER].nm_ind;
2835             for (egi = 0; egi < negp_pp; egi++)
2836             {
2837                 for (egj = egi; egj < negp_pp; egj++)
2838                 {
2839                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2840                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2841                     {
2842                         nbl = &(fr->nblists[m]);
2843                         if (fr->nnblists > 1)
2844                         {
2845                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2846                         }
2847                         /* Read the table file with the two energy groups names appended */
2848                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2849                                         *mtop->groups.grpname[nm_ind[egi]],
2850                                         *mtop->groups.grpname[nm_ind[egj]],
2851                                         &fr->nblists[m]);
2852                         if (ir->adress)
2853                         {
2854                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2855                                             *mtop->groups.grpname[nm_ind[egi]],
2856                                             *mtop->groups.grpname[nm_ind[egj]],
2857                                             &fr->nblists[fr->nnblists/2+m]);
2858                         }
2859                         m++;
2860                     }
2861                     else if (fr->nnblists > 1)
2862                     {
2863                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2864                     }
2865                 }
2866             }
2867         }
2868     }
2869     if (bSep14tab)
2870     {
2871         /* generate extra tables with plain Coulomb for 1-4 interactions only */
2872         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2873                                 GMX_MAKETABLES_14ONLY);
2874     }
2875
2876     /* Read AdResS Thermo Force table if needed */
2877     if (fr->adress_icor == eAdressICThermoForce)
2878     {
2879         /* old todo replace */
2880
2881         if (ir->adress->n_tf_grps > 0)
2882         {
2883             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2884
2885         }
2886         else
2887         {
2888             /* load the default table */
2889             snew(fr->atf_tabs, 1);
2890             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2891         }
2892     }
2893
2894     /* Wall stuff */
2895     fr->nwall = ir->nwall;
2896     if (ir->nwall && ir->wall_type == ewtTABLE)
2897     {
2898         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2899     }
2900
2901     if (fcd && tabbfn)
2902     {
2903         fcd->bondtab  = make_bonded_tables(fp,
2904                                            F_TABBONDS, F_TABBONDSNC,
2905                                            mtop, tabbfn, "b");
2906         fcd->angletab = make_bonded_tables(fp,
2907                                            F_TABANGLES, -1,
2908                                            mtop, tabbfn, "a");
2909         fcd->dihtab   = make_bonded_tables(fp,
2910                                            F_TABDIHS, -1,
2911                                            mtop, tabbfn, "d");
2912     }
2913     else
2914     {
2915         if (debug)
2916         {
2917             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2918         }
2919     }
2920
2921     /* QM/MM initialization if requested
2922      */
2923     if (ir->bQMMM)
2924     {
2925         fprintf(stderr, "QM/MM calculation requested.\n");
2926     }
2927
2928     fr->bQMMM      = ir->bQMMM;
2929     fr->qr         = mk_QMMMrec();
2930
2931     /* Set all the static charge group info */
2932     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2933                                    &fr->bExcl_IntraCGAll_InterCGNone);
2934     if (DOMAINDECOMP(cr))
2935     {
2936         fr->cginfo = NULL;
2937     }
2938     else
2939     {
2940         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2941     }
2942
2943     if (!DOMAINDECOMP(cr))
2944     {
2945         /* When using particle decomposition, the effect of the second argument,
2946          * which sets fr->hcg, is corrected later in do_md and init_em.
2947          */
2948         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2949                             mtop->natoms, mtop->natoms, mtop->natoms);
2950     }
2951
2952     fr->print_force = print_force;
2953
2954
2955     /* coarse load balancing vars */
2956     fr->t_fnbf    = 0.;
2957     fr->t_wait    = 0.;
2958     fr->timesteps = 0;
2959
2960     /* Initialize neighbor search */
2961     init_ns(fp, cr, &fr->ns, fr, mtop);
2962
2963     if (cr->duty & DUTY_PP)
2964     {
2965         gmx_nonbonded_setup(fr, bGenericKernelOnly);
2966         /*
2967            if (ir->bAdress)
2968             {
2969                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2970             }
2971          */
2972     }
2973
2974     /* Initialize the thread working data for bonded interactions */
2975     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2976
2977     snew(fr->excl_load, fr->nthreads+1);
2978
2979     if (fr->cutoff_scheme == ecutsVERLET)
2980     {
2981         if (ir->rcoulomb != ir->rvdw)
2982         {
2983             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2984         }
2985
2986         init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2987     }
2988
2989     /* fr->ic is used both by verlet and group kernels (to some extent) now */
2990     init_interaction_const(fp, cr, &fr->ic, fr, rtab);
2991
2992     if (ir->eDispCorr != edispcNO)
2993     {
2994         calc_enervirdiff(fp, ir->eDispCorr, fr);
2995     }
2996 }
2997
2998 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2999 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
3000 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3001
3002 void pr_forcerec(FILE *fp, t_forcerec *fr)
3003 {
3004     int i;
3005
3006     pr_real(fp, fr->rlist);
3007     pr_real(fp, fr->rcoulomb);
3008     pr_real(fp, fr->fudgeQQ);
3009     pr_bool(fp, fr->bGrid);
3010     pr_bool(fp, fr->bTwinRange);
3011     /*pr_int(fp,fr->cg0);
3012        pr_int(fp,fr->hcg);*/
3013     for (i = 0; i < fr->nnblists; i++)
3014     {
3015         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3016     }
3017     pr_real(fp, fr->rcoulomb_switch);
3018     pr_real(fp, fr->rcoulomb);
3019
3020     fflush(fp);
3021 }
3022
3023 void forcerec_set_excl_load(t_forcerec *fr,
3024                             const gmx_localtop_t *top, const t_commrec *cr)
3025 {
3026     const int *ind, *a;
3027     int        t, i, j, ntot, n, ntarget;
3028
3029     if (cr != NULL && PARTDECOMP(cr))
3030     {
3031         /* No OpenMP with particle decomposition */
3032         pd_at_range(cr,
3033                     &fr->excl_load[0],
3034                     &fr->excl_load[1]);
3035
3036         return;
3037     }
3038
3039     ind = top->excls.index;
3040     a   = top->excls.a;
3041
3042     ntot = 0;
3043     for (i = 0; i < top->excls.nr; i++)
3044     {
3045         for (j = ind[i]; j < ind[i+1]; j++)
3046         {
3047             if (a[j] > i)
3048             {
3049                 ntot++;
3050             }
3051         }
3052     }
3053
3054     fr->excl_load[0] = 0;
3055     n                = 0;
3056     i                = 0;
3057     for (t = 1; t <= fr->nthreads; t++)
3058     {
3059         ntarget = (ntot*t)/fr->nthreads;
3060         while (i < top->excls.nr && n < ntarget)
3061         {
3062             for (j = ind[i]; j < ind[i+1]; j++)
3063             {
3064                 if (a[j] > i)
3065                 {
3066                     n++;
3067                 }
3068             }
3069             i++;
3070         }
3071         fr->excl_load[t] = i;
3072     }
3073 }