src/gromacs/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "typedefs.h"
  46 #include "types/commrec.h"
  47 #include "vec.h"
  48 #include "gromacs/math/utilities.h"
  49 #include "macros.h"
  50 #include "smalloc.h"
  51 #include "macros.h"
  52 #include "gmx_fatal.h"
  53 #include "gmx_fatal_collective.h"
  54 #include "physics.h"
  55 #include "force.h"
  56 #include "tables.h"
  57 #include "nonbonded.h"
  58 #include "invblock.h"
  59 #include "names.h"
  60 #include "network.h"
  61 #include "pbc.h"
  62 #include "ns.h"
  63 #include "mshift.h"
  64 #include "txtdump.h"
  65 #include "coulomb.h"
  66 #include "md_support.h"
  67 #include "md_logging.h"
  68 #include "domdec.h"
  69 #include "qmmm.h"
  70 #include "copyrite.h"
  71 #include "mtop_util.h"
  72 #include "nbnxn_simd.h"
  73 #include "nbnxn_search.h"
  74 #include "nbnxn_atomdata.h"
  75 #include "nbnxn_consts.h"
  76 #include "gmx_omp_nthreads.h"
  77 #include "gmx_detect_hardware.h"
  78 #include "inputrec.h"
  79
  80 #ifdef _MSC_VER
  81 /* MSVC definition for __cpuid() */
  82 #include <intrin.h>
  83 #endif
  84
  85 #include "types/nbnxn_cuda_types_ext.h"
  86 #include "gpu_utils.h"
  87 #include "nbnxn_cuda_data_mgmt.h"
  88 #include "pmalloc_cuda.h"
  89
  90 t_forcerec *mk_forcerec(void)
  91 {
  92     t_forcerec *fr;
  93
  94     snew(fr, 1);
  95
  96     return fr;
  97 }
  98
  99 #ifdef DEBUG
 100 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
 101 {
 102     int i, j;
 103
 104     for (i = 0; (i < atnr); i++)
 105     {
 106         for (j = 0; (j < atnr); j++)
 107         {
 108             fprintf(fp, "%2d - %2d", i, j);
 109             if (bBHAM)
 110             {
 111                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 112                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 113             }
 114             else
 115             {
 116                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 117                         C12(nbfp, atnr, i, j)/12.0);
 118             }
 119         }
 120     }
 121 }
 122 #endif
 123
 124 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 125 {
 126     real *nbfp;
 127     int   i, j, k, atnr;
 128
 129     atnr = idef->atnr;
 130     if (bBHAM)
 131     {
 132         snew(nbfp, 3*atnr*atnr);
 133         for (i = k = 0; (i < atnr); i++)
 134         {
 135             for (j = 0; (j < atnr); j++, k++)
 136             {
 137                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 138                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 139                 /* nbfp now includes the 6.0 derivative prefactor */
 140                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 141             }
 142         }
 143     }
 144     else
 145     {
 146         snew(nbfp, 2*atnr*atnr);
 147         for (i = k = 0; (i < atnr); i++)
 148         {
 149             for (j = 0; (j < atnr); j++, k++)
 150             {
 151                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 152                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 153                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 154             }
 155         }
 156     }
 157
 158     return nbfp;
 159 }
 160
 161 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 162 {
 163     real *nbfp;
 164     int   i, j, k, atnr;
 165     real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 166     real  c6, c12;
 167
 168     atnr = idef->atnr;
 169     snew(nbfp, 2*atnr*atnr);
 170     for (i = 0; i < atnr; ++i)
 171     {
 172         for (j = 0; j < atnr; ++j)
 173         {
 174             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 175             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 176             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 177             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 178             c6   = sqrt(c6i  * c6j);
 179             c12  = sqrt(c12i * c12j);
 180             if (comb_rule == eCOMB_ARITHMETIC
 181                 && !gmx_numzero(c6) && !gmx_numzero(c12))
 182             {
 183                 sigmai = pow(c12i / c6i, 1.0/6.0);
 184                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 185                 epsi   = c6i * c6i / c12i;
 186                 epsj   = c6j * c6j / c12j;
 187                 c6     = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
 188                 c12    = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
 189             }
 190             C6(nbfp, atnr, i, j)   = c6*6.0;
 191             C12(nbfp, atnr, i, j)  = c12*12.0;
 192         }
 193     }
 194     return nbfp;
 195 }
 196
 197 /* This routine sets fr->solvent_opt to the most common solvent in the
 198  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 199  * the fr->solvent_type array with the correct type (or esolNO).
 200  *
 201  * Charge groups that fulfill the conditions but are not identical to the
 202  * most common one will be marked as esolNO in the solvent_type array.
 203  *
 204  * TIP3p is identical to SPC for these purposes, so we call it
 205  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 206  *
 207  * NOTE: QM particle should not
 208  * become an optimized solvent. Not even if there is only one charge
 209  * group in the Qm
 210  */
 211
 212 typedef struct
 213 {
 214     int    model;
 215     int    count;
 216     int    vdwtype[4];
 217     real   charge[4];
 218 } solvent_parameters_t;
 219
 220 static void
 221 check_solvent_cg(const gmx_moltype_t    *molt,
 222                  int                     cg0,
 223                  int                     nmol,
 224                  const unsigned char    *qm_grpnr,
 225                  const t_grps           *qm_grps,
 226                  t_forcerec   *          fr,
 227                  int                    *n_solvent_parameters,
 228                  solvent_parameters_t  **solvent_parameters_p,
 229                  int                     cginfo,
 230                  int                    *cg_sp)
 231 {
 232     const t_blocka       *excl;
 233     t_atom               *atom;
 234     int                   j, k;
 235     int                   j0, j1, nj;
 236     gmx_bool              perturbed;
 237     gmx_bool              has_vdw[4];
 238     gmx_bool              match;
 239     real                  tmp_charge[4]  = { 0.0 }; /* init to zero to make gcc4.8 happy */
 240     int                   tmp_vdwtype[4] = { 0 };   /* init to zero to make gcc4.8 happy */
 241     int                   tjA;
 242     gmx_bool              qm;
 243     solvent_parameters_t *solvent_parameters;
 244
 245     /* We use a list with parameters for each solvent type.
 246      * Every time we discover a new molecule that fulfills the basic
 247      * conditions for a solvent we compare with the previous entries
 248      * in these lists. If the parameters are the same we just increment
 249      * the counter for that type, and otherwise we create a new type
 250      * based on the current molecule.
 251      *
 252      * Once we've finished going through all molecules we check which
 253      * solvent is most common, and mark all those molecules while we
 254      * clear the flag on all others.
 255      */
 256
 257     solvent_parameters = *solvent_parameters_p;
 258
 259     /* Mark the cg first as non optimized */
 260     *cg_sp = -1;
 261
 262     /* Check if this cg has no exclusions with atoms in other charge groups
 263      * and all atoms inside the charge group excluded.
 264      * We only have 3 or 4 atom solvent loops.
 265      */
 266     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 267         !GET_CGINFO_EXCL_INTRA(cginfo))
 268     {
 269         return;
 270     }
 271
 272     /* Get the indices of the first atom in this charge group */
 273     j0     = molt->cgs.index[cg0];
 274     j1     = molt->cgs.index[cg0+1];
 275
 276     /* Number of atoms in our molecule */
 277     nj     = j1 - j0;
 278
 279     if (debug)
 280     {
 281         fprintf(debug,
 282                 "Moltype '%s': there are %d atoms in this charge group\n",
 283                 *molt->name, nj);
 284     }
 285
 286     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 287      * otherwise skip it.
 288      */
 289     if (nj < 3 || nj > 4)
 290     {
 291         return;
 292     }
 293
 294     /* Check if we are doing QM on this group */
 295     qm = FALSE;
 296     if (qm_grpnr != NULL)
 297     {
 298         for (j = j0; j < j1 && !qm; j++)
 299         {
 300             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 301         }
 302     }
 303     /* Cannot use solvent optimization with QM */
 304     if (qm)
 305     {
 306         return;
 307     }
 308
 309     atom = molt->atoms.atom;
 310
 311     /* Still looks like a solvent, time to check parameters */
 312
 313     /* If it is perturbed (free energy) we can't use the solvent loops,
 314      * so then we just skip to the next molecule.
 315      */
 316     perturbed = FALSE;
 317
 318     for (j = j0; j < j1 && !perturbed; j++)
 319     {
 320         perturbed = PERTURBED(atom[j]);
 321     }
 322
 323     if (perturbed)
 324     {
 325         return;
 326     }
 327
 328     /* Now it's only a question if the VdW and charge parameters
 329      * are OK. Before doing the check we compare and see if they are
 330      * identical to a possible previous solvent type.
 331      * First we assign the current types and charges.
 332      */
 333     for (j = 0; j < nj; j++)
 334     {
 335         tmp_vdwtype[j] = atom[j0+j].type;
 336         tmp_charge[j]  = atom[j0+j].q;
 337     }
 338
 339     /* Does it match any previous solvent type? */
 340     for (k = 0; k < *n_solvent_parameters; k++)
 341     {
 342         match = TRUE;
 343
 344
 345         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 346         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 347              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 348         {
 349             match = FALSE;
 350         }
 351
 352         /* Check that types & charges match for all atoms in molecule */
 353         for (j = 0; j < nj && match == TRUE; j++)
 354         {
 355             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 356             {
 357                 match = FALSE;
 358             }
 359             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 360             {
 361                 match = FALSE;
 362             }
 363         }
 364         if (match == TRUE)
 365         {
 366             /* Congratulations! We have a matched solvent.
 367              * Flag it with this type for later processing.
 368              */
 369             *cg_sp = k;
 370             solvent_parameters[k].count += nmol;
 371
 372             /* We are done with this charge group */
 373             return;
 374         }
 375     }
 376
 377     /* If we get here, we have a tentative new solvent type.
 378      * Before we add it we must check that it fulfills the requirements
 379      * of the solvent optimized loops. First determine which atoms have
 380      * VdW interactions.
 381      */
 382     for (j = 0; j < nj; j++)
 383     {
 384         has_vdw[j] = FALSE;
 385         tjA        = tmp_vdwtype[j];
 386
 387         /* Go through all other tpes and see if any have non-zero
 388          * VdW parameters when combined with this one.
 389          */
 390         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 391         {
 392             /* We already checked that the atoms weren't perturbed,
 393              * so we only need to check state A now.
 394              */
 395             if (fr->bBHAM)
 396             {
 397                 has_vdw[j] = (has_vdw[j] ||
 398                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 399                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 400                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 401             }
 402             else
 403             {
 404                 /* Standard LJ */
 405                 has_vdw[j] = (has_vdw[j] ||
 406                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 407                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 408             }
 409         }
 410     }
 411
 412     /* Now we know all we need to make the final check and assignment. */
 413     if (nj == 3)
 414     {
 415         /* So, is it an SPC?
 416          * For this we require thatn all atoms have charge,
 417          * the charges on atom 2 & 3 should be the same, and only
 418          * atom 1 might have VdW.
 419          */
 420         if (has_vdw[1] == FALSE &&
 421             has_vdw[2] == FALSE &&
 422             tmp_charge[0]  != 0 &&
 423             tmp_charge[1]  != 0 &&
 424             tmp_charge[2]  == tmp_charge[1])
 425         {
 426             srenew(solvent_parameters, *n_solvent_parameters+1);
 427             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 428             solvent_parameters[*n_solvent_parameters].count = nmol;
 429             for (k = 0; k < 3; k++)
 430             {
 431                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 432                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 433             }
 434
 435             *cg_sp = *n_solvent_parameters;
 436             (*n_solvent_parameters)++;
 437         }
 438     }
 439     else if (nj == 4)
 440     {
 441         /* Or could it be a TIP4P?
 442          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 443          * Only atom 1 mght have VdW.
 444          */
 445         if (has_vdw[1] == FALSE &&
 446             has_vdw[2] == FALSE &&
 447             has_vdw[3] == FALSE &&
 448             tmp_charge[0]  == 0 &&
 449             tmp_charge[1]  != 0 &&
 450             tmp_charge[2]  == tmp_charge[1] &&
 451             tmp_charge[3]  != 0)
 452         {
 453             srenew(solvent_parameters, *n_solvent_parameters+1);
 454             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 455             solvent_parameters[*n_solvent_parameters].count = nmol;
 456             for (k = 0; k < 4; k++)
 457             {
 458                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 459                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 460             }
 461
 462             *cg_sp = *n_solvent_parameters;
 463             (*n_solvent_parameters)++;
 464         }
 465     }
 466
 467     *solvent_parameters_p = solvent_parameters;
 468 }
 469
 470 static void
 471 check_solvent(FILE  *                fp,
 472               const gmx_mtop_t  *    mtop,
 473               t_forcerec  *          fr,
 474               cginfo_mb_t           *cginfo_mb)
 475 {
 476     const t_block     *   cgs;
 477     const t_block     *   mols;
 478     const gmx_moltype_t  *molt;
 479     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 480     int                   n_solvent_parameters;
 481     solvent_parameters_t *solvent_parameters;
 482     int                 **cg_sp;
 483     int                   bestsp, bestsol;
 484
 485     if (debug)
 486     {
 487         fprintf(debug, "Going to determine what solvent types we have.\n");
 488     }
 489
 490     mols = &mtop->mols;
 491
 492     n_solvent_parameters = 0;
 493     solvent_parameters   = NULL;
 494     /* Allocate temporary array for solvent type */
 495     snew(cg_sp, mtop->nmolblock);
 496
 497     cg_offset = 0;
 498     at_offset = 0;
 499     for (mb = 0; mb < mtop->nmolblock; mb++)
 500     {
 501         molt = &mtop->moltype[mtop->molblock[mb].type];
 502         cgs  = &molt->cgs;
 503         /* Here we have to loop over all individual molecules
 504          * because we need to check for QMMM particles.
 505          */
 506         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 507         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 508         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 509         for (mol = 0; mol < nmol_ch; mol++)
 510         {
 511             cgm = mol*cgs->nr;
 512             am  = mol*cgs->index[cgs->nr];
 513             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 514             {
 515                 check_solvent_cg(molt, cg_mol, nmol,
 516                                  mtop->groups.grpnr[egcQMMM] ?
 517                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 518                                  &mtop->groups.grps[egcQMMM],
 519                                  fr,
 520                                  &n_solvent_parameters, &solvent_parameters,
 521                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 522                                  &cg_sp[mb][cgm+cg_mol]);
 523             }
 524         }
 525         cg_offset += cgs->nr;
 526         at_offset += cgs->index[cgs->nr];
 527     }
 528
 529     /* Puh! We finished going through all charge groups.
 530      * Now find the most common solvent model.
 531      */
 532
 533     /* Most common solvent this far */
 534     bestsp = -2;
 535     for (i = 0; i < n_solvent_parameters; i++)
 536     {
 537         if (bestsp == -2 ||
 538             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 539         {
 540             bestsp = i;
 541         }
 542     }
 543
 544     if (bestsp >= 0)
 545     {
 546         bestsol = solvent_parameters[bestsp].model;
 547     }
 548     else
 549     {
 550         bestsol = esolNO;
 551     }
 552
 553 #ifdef DISABLE_WATER_NLIST
 554     bestsol = esolNO;
 555 #endif
 556
 557     fr->nWatMol = 0;
 558     for (mb = 0; mb < mtop->nmolblock; mb++)
 559     {
 560         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 561         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 562         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 563         {
 564             if (cg_sp[mb][i] == bestsp)
 565             {
 566                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 567                 fr->nWatMol += nmol;
 568             }
 569             else
 570             {
 571                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 572             }
 573         }
 574         sfree(cg_sp[mb]);
 575     }
 576     sfree(cg_sp);
 577
 578     if (bestsol != esolNO && fp != NULL)
 579     {
 580         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 581                 esol_names[bestsol],
 582                 solvent_parameters[bestsp].count);
 583     }
 584
 585     sfree(solvent_parameters);
 586     fr->solvent_opt = bestsol;
 587 }
 588
 589 enum {
 590     acNONE = 0, acCONSTRAINT, acSETTLE
 591 };
 592
 593 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 594                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 595                                    gmx_bool *bFEP_NonBonded,
 596                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 597 {
 598     const t_block        *cgs;
 599     const t_blocka       *excl;
 600     const gmx_moltype_t  *molt;
 601     const gmx_molblock_t *molb;
 602     cginfo_mb_t          *cginfo_mb;
 603     gmx_bool             *type_VDW;
 604     int                  *cginfo;
 605     int                   cg_offset, a_offset, cgm, am;
 606     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 607     int                  *a_con;
 608     int                   ftype;
 609     int                   ia;
 610     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
 611
 612     ncg_tot = ncg_mtop(mtop);
 613     snew(cginfo_mb, mtop->nmolblock);
 614
 615     snew(type_VDW, fr->ntype);
 616     for (ai = 0; ai < fr->ntype; ai++)
 617     {
 618         type_VDW[ai] = FALSE;
 619         for (j = 0; j < fr->ntype; j++)
 620         {
 621             type_VDW[ai] = type_VDW[ai] ||
 622                 fr->bBHAM ||
 623                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 624                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 625         }
 626     }
 627
 628     *bFEP_NonBonded               = FALSE;
 629     *bExcl_IntraCGAll_InterCGNone = TRUE;
 630
 631     excl_nalloc = 10;
 632     snew(bExcl, excl_nalloc);
 633     cg_offset = 0;
 634     a_offset  = 0;
 635     for (mb = 0; mb < mtop->nmolblock; mb++)
 636     {
 637         molb = &mtop->molblock[mb];
 638         molt = &mtop->moltype[molb->type];
 639         cgs  = &molt->cgs;
 640         excl = &molt->excls;
 641
 642         /* Check if the cginfo is identical for all molecules in this block.
 643          * If so, we only need an array of the size of one molecule.
 644          * Otherwise we make an array of #mol times #cgs per molecule.
 645          */
 646         bId = TRUE;
 647         am  = 0;
 648         for (m = 0; m < molb->nmol; m++)
 649         {
 650             am = m*cgs->index[cgs->nr];
 651             for (cg = 0; cg < cgs->nr; cg++)
 652             {
 653                 a0 = cgs->index[cg];
 654                 a1 = cgs->index[cg+1];
 655                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 656                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 657                 {
 658                     bId = FALSE;
 659                 }
 660                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 661                 {
 662                     for (ai = a0; ai < a1; ai++)
 663                     {
 664                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 665                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 666                         {
 667                             bId = FALSE;
 668                         }
 669                     }
 670                 }
 671             }
 672         }
 673
 674         cginfo_mb[mb].cg_start = cg_offset;
 675         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 676         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 677         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 678         cginfo = cginfo_mb[mb].cginfo;
 679
 680         /* Set constraints flags for constrained atoms */
 681         snew(a_con, molt->atoms.nr);
 682         for (ftype = 0; ftype < F_NRE; ftype++)
 683         {
 684             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 685             {
 686                 int nral;
 687
 688                 nral = NRAL(ftype);
 689                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 690                 {
 691                     int a;
 692
 693                     for (a = 0; a < nral; a++)
 694                     {
 695                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 696                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 697                     }
 698                 }
 699             }
 700         }
 701
 702         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 703         {
 704             cgm = m*cgs->nr;
 705             am  = m*cgs->index[cgs->nr];
 706             for (cg = 0; cg < cgs->nr; cg++)
 707             {
 708                 a0 = cgs->index[cg];
 709                 a1 = cgs->index[cg+1];
 710
 711                 /* Store the energy group in cginfo */
 712                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 713                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 714
 715                 /* Check the intra/inter charge group exclusions */
 716                 if (a1-a0 > excl_nalloc)
 717                 {
 718                     excl_nalloc = a1 - a0;
 719                     srenew(bExcl, excl_nalloc);
 720                 }
 721                 /* bExclIntraAll: all intra cg interactions excluded
 722                  * bExclInter:    any inter cg interactions excluded
 723                  */
 724                 bExclIntraAll       = TRUE;
 725                 bExclInter          = FALSE;
 726                 bHaveVDW            = FALSE;
 727                 bHaveQ              = FALSE;
 728                 bHavePerturbedAtoms = FALSE;
 729                 for (ai = a0; ai < a1; ai++)
 730                 {
 731                     /* Check VDW and electrostatic interactions */
 732                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 733                                             type_VDW[molt->atoms.atom[ai].typeB]);
 734                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 735                                             molt->atoms.atom[ai].qB != 0);
 736
 737                     bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
 738
 739                     /* Clear the exclusion list for atom ai */
 740                     for (aj = a0; aj < a1; aj++)
 741                     {
 742                         bExcl[aj-a0] = FALSE;
 743                     }
 744                     /* Loop over all the exclusions of atom ai */
 745                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 746                     {
 747                         aj = excl->a[j];
 748                         if (aj < a0 || aj >= a1)
 749                         {
 750                             bExclInter = TRUE;
 751                         }
 752                         else
 753                         {
 754                             bExcl[aj-a0] = TRUE;
 755                         }
 756                     }
 757                     /* Check if ai excludes a0 to a1 */
 758                     for (aj = a0; aj < a1; aj++)
 759                     {
 760                         if (!bExcl[aj-a0])
 761                         {
 762                             bExclIntraAll = FALSE;
 763                         }
 764                     }
 765
 766                     switch (a_con[ai])
 767                     {
 768                         case acCONSTRAINT:
 769                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 770                             break;
 771                         case acSETTLE:
 772                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 773                             break;
 774                         default:
 775                             break;
 776                     }
 777                 }
 778                 if (bExclIntraAll)
 779                 {
 780                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 781                 }
 782                 if (bExclInter)
 783                 {
 784                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 785                 }
 786                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 787                 {
 788                     /* The size in cginfo is currently only read with DD */
 789                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 790                 }
 791                 if (bHaveVDW)
 792                 {
 793                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 794                 }
 795                 if (bHaveQ)
 796                 {
 797                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 798                 }
 799                 if (bHavePerturbedAtoms && fr->efep != efepNO)
 800                 {
 801                     SET_CGINFO_FEP(cginfo[cgm+cg]);
 802                     *bFEP_NonBonded = TRUE;
 803                 }
 804                 /* Store the charge group size */
 805                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 806
 807                 if (!bExclIntraAll || bExclInter)
 808                 {
 809                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 810                 }
 811             }
 812         }
 813
 814         sfree(a_con);
 815
 816         cg_offset += molb->nmol*cgs->nr;
 817         a_offset  += molb->nmol*cgs->index[cgs->nr];
 818     }
 819     sfree(bExcl);
 820
 821     /* the solvent optimizer is called after the QM is initialized,
 822      * because we don't want to have the QM subsystemto become an
 823      * optimized solvent
 824      */
 825
 826     check_solvent(fplog, mtop, fr, cginfo_mb);
 827
 828     if (getenv("GMX_NO_SOLV_OPT"))
 829     {
 830         if (fplog)
 831         {
 832             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 833                     "Disabling all solvent optimization\n");
 834         }
 835         fr->solvent_opt = esolNO;
 836     }
 837     if (bNoSolvOpt)
 838     {
 839         fr->solvent_opt = esolNO;
 840     }
 841     if (!fr->solvent_opt)
 842     {
 843         for (mb = 0; mb < mtop->nmolblock; mb++)
 844         {
 845             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 846             {
 847                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 848             }
 849         }
 850     }
 851
 852     return cginfo_mb;
 853 }
 854
 855 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 856 {
 857     int  ncg, mb, cg;
 858     int *cginfo;
 859
 860     ncg = cgi_mb[nmb-1].cg_end;
 861     snew(cginfo, ncg);
 862     mb = 0;
 863     for (cg = 0; cg < ncg; cg++)
 864     {
 865         while (cg >= cgi_mb[mb].cg_end)
 866         {
 867             mb++;
 868         }
 869         cginfo[cg] =
 870             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 871     }
 872
 873     return cginfo;
 874 }
 875
 876 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 877 {
 878     /*This now calculates sum for q and c6*/
 879     double         qsum, q2sum, q, c6sum, c6;
 880     int            mb, nmol, i;
 881     const t_atoms *atoms;
 882
 883     qsum   = 0;
 884     q2sum  = 0;
 885     c6sum  = 0;
 886     for (mb = 0; mb < mtop->nmolblock; mb++)
 887     {
 888         nmol  = mtop->molblock[mb].nmol;
 889         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 890         for (i = 0; i < atoms->nr; i++)
 891         {
 892             q       = atoms->atom[i].q;
 893             qsum   += nmol*q;
 894             q2sum  += nmol*q*q;
 895             c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 896             c6sum  += nmol*c6;
 897         }
 898     }
 899     fr->qsum[0]   = qsum;
 900     fr->q2sum[0]  = q2sum;
 901     fr->c6sum[0]  = c6sum;
 902
 903     if (fr->efep != efepNO)
 904     {
 905         qsum   = 0;
 906         q2sum  = 0;
 907         c6sum  = 0;
 908         for (mb = 0; mb < mtop->nmolblock; mb++)
 909         {
 910             nmol  = mtop->molblock[mb].nmol;
 911             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 912             for (i = 0; i < atoms->nr; i++)
 913             {
 914                 q       = atoms->atom[i].qB;
 915                 qsum   += nmol*q;
 916                 q2sum  += nmol*q*q;
 917                 c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 918                 c6sum  += nmol*c6;
 919             }
 920             fr->qsum[1]   = qsum;
 921             fr->q2sum[1]  = q2sum;
 922             fr->c6sum[1]  = c6sum;
 923         }
 924     }
 925     else
 926     {
 927         fr->qsum[1]   = fr->qsum[0];
 928         fr->q2sum[1]  = fr->q2sum[0];
 929         fr->c6sum[1]  = fr->c6sum[0];
 930     }
 931     if (log)
 932     {
 933         if (fr->efep == efepNO)
 934         {
 935             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 936         }
 937         else
 938         {
 939             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 940                     fr->qsum[0], fr->qsum[1]);
 941         }
 942     }
 943 }
 944
 945 void update_forcerec(t_forcerec *fr, matrix box)
 946 {
 947     if (fr->eeltype == eelGRF)
 948     {
 949         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 950                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 951                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 952     }
 953 }
 954
 955 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 956 {
 957     const t_atoms  *atoms, *atoms_tpi;
 958     const t_blocka *excl;
 959     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 960     gmx_int64_t     npair, npair_ij, tmpi, tmpj;
 961     double          csix, ctwelve;
 962     int             ntp, *typecount;
 963     gmx_bool        bBHAM;
 964     real           *nbfp;
 965     real           *nbfp_comb = NULL;
 966
 967     ntp   = fr->ntype;
 968     bBHAM = fr->bBHAM;
 969     nbfp  = fr->nbfp;
 970
 971     /* For LJ-PME, we want to correct for the difference between the
 972      * actual C6 values and the C6 values used by the LJ-PME based on
 973      * combination rules. */
 974
 975     if (EVDW_PME(fr->vdwtype))
 976     {
 977         nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
 978                                              (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
 979         for (tpi = 0; tpi < ntp; ++tpi)
 980         {
 981             for (tpj = 0; tpj < ntp; ++tpj)
 982             {
 983                 C6(nbfp_comb, ntp, tpi, tpj) =
 984                     C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
 985                 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
 986             }
 987         }
 988         nbfp = nbfp_comb;
 989     }
 990     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 991     {
 992         csix    = 0;
 993         ctwelve = 0;
 994         npair   = 0;
 995         nexcl   = 0;
 996         if (!fr->n_tpi)
 997         {
 998             /* Count the types so we avoid natoms^2 operations */
 999             snew(typecount, ntp);
1000             gmx_mtop_count_atomtypes(mtop, q, typecount);
1001
1002             for (tpi = 0; tpi < ntp; tpi++)
1003             {
1004                 for (tpj = tpi; tpj < ntp; tpj++)
1005                 {
1006                     tmpi = typecount[tpi];
1007                     tmpj = typecount[tpj];
1008                     if (tpi != tpj)
1009                     {
1010                         npair_ij = tmpi*tmpj;
1011                     }
1012                     else
1013                     {
1014                         npair_ij = tmpi*(tmpi - 1)/2;
1015                     }
1016                     if (bBHAM)
1017                     {
1018                         /* nbfp now includes the 6.0 derivative prefactor */
1019                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1020                     }
1021                     else
1022                     {
1023                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
1024                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
1025                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
1026                     }
1027                     npair += npair_ij;
1028                 }
1029             }
1030             sfree(typecount);
1031             /* Subtract the excluded pairs.
1032              * The main reason for substracting exclusions is that in some cases
1033              * some combinations might never occur and the parameters could have
1034              * any value. These unused values should not influence the dispersion
1035              * correction.
1036              */
1037             for (mb = 0; mb < mtop->nmolblock; mb++)
1038             {
1039                 nmol  = mtop->molblock[mb].nmol;
1040                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1041                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
1042                 for (i = 0; (i < atoms->nr); i++)
1043                 {
1044                     if (q == 0)
1045                     {
1046                         tpi = atoms->atom[i].type;
1047                     }
1048                     else
1049                     {
1050                         tpi = atoms->atom[i].typeB;
1051                     }
1052                     j1  = excl->index[i];
1053                     j2  = excl->index[i+1];
1054                     for (j = j1; j < j2; j++)
1055                     {
1056                         k = excl->a[j];
1057                         if (k > i)
1058                         {
1059                             if (q == 0)
1060                             {
1061                                 tpj = atoms->atom[k].type;
1062                             }
1063                             else
1064                             {
1065                                 tpj = atoms->atom[k].typeB;
1066                             }
1067                             if (bBHAM)
1068                             {
1069                                 /* nbfp now includes the 6.0 derivative prefactor */
1070                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1071                             }
1072                             else
1073                             {
1074                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1075                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1076                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1077                             }
1078                             nexcl += nmol;
1079                         }
1080                     }
1081                 }
1082             }
1083         }
1084         else
1085         {
1086             /* Only correct for the interaction of the test particle
1087              * with the rest of the system.
1088              */
1089             atoms_tpi =
1090                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1091
1092             npair = 0;
1093             for (mb = 0; mb < mtop->nmolblock; mb++)
1094             {
1095                 nmol  = mtop->molblock[mb].nmol;
1096                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1097                 for (j = 0; j < atoms->nr; j++)
1098                 {
1099                     nmolc = nmol;
1100                     /* Remove the interaction of the test charge group
1101                      * with itself.
1102                      */
1103                     if (mb == mtop->nmolblock-1)
1104                     {
1105                         nmolc--;
1106
1107                         if (mb == 0 && nmol == 1)
1108                         {
1109                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1110                         }
1111                     }
1112                     if (q == 0)
1113                     {
1114                         tpj = atoms->atom[j].type;
1115                     }
1116                     else
1117                     {
1118                         tpj = atoms->atom[j].typeB;
1119                     }
1120                     for (i = 0; i < fr->n_tpi; i++)
1121                     {
1122                         if (q == 0)
1123                         {
1124                             tpi = atoms_tpi->atom[i].type;
1125                         }
1126                         else
1127                         {
1128                             tpi = atoms_tpi->atom[i].typeB;
1129                         }
1130                         if (bBHAM)
1131                         {
1132                             /* nbfp now includes the 6.0 derivative prefactor */
1133                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1134                         }
1135                         else
1136                         {
1137                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1138                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1139                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1140                         }
1141                         npair += nmolc;
1142                     }
1143                 }
1144             }
1145         }
1146         if (npair - nexcl <= 0 && fplog)
1147         {
1148             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1149             csix     = 0;
1150             ctwelve  = 0;
1151         }
1152         else
1153         {
1154             csix    /= npair - nexcl;
1155             ctwelve /= npair - nexcl;
1156         }
1157         if (debug)
1158         {
1159             fprintf(debug, "Counted %d exclusions\n", nexcl);
1160             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1161             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1162         }
1163         fr->avcsix[q]    = csix;
1164         fr->avctwelve[q] = ctwelve;
1165     }
1166
1167     if (EVDW_PME(fr->vdwtype))
1168     {
1169         sfree(nbfp_comb);
1170     }
1171
1172     if (fplog != NULL)
1173     {
1174         if (fr->eDispCorr == edispcAllEner ||
1175             fr->eDispCorr == edispcAllEnerPres)
1176         {
1177             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1178                     fr->avcsix[0], fr->avctwelve[0]);
1179         }
1180         else
1181         {
1182             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1183         }
1184     }
1185 }
1186
1187
1188 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1189                            const gmx_mtop_t *mtop)
1190 {
1191     const t_atoms *at1, *at2;
1192     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1193     real           b, bmin;
1194     real          *nbfp;
1195
1196     if (fplog)
1197     {
1198         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1199     }
1200     nbfp   = fr->nbfp;
1201     ntypes = fr->ntype;
1202
1203     bmin           = -1;
1204     fr->bham_b_max = 0;
1205     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1206     {
1207         at1 = &mtop->moltype[mt1].atoms;
1208         for (i = 0; (i < at1->nr); i++)
1209         {
1210             tpi = at1->atom[i].type;
1211             if (tpi >= ntypes)
1212             {
1213                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1214             }
1215
1216             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1217             {
1218                 at2 = &mtop->moltype[mt2].atoms;
1219                 for (j = 0; (j < at2->nr); j++)
1220                 {
1221                     tpj = at2->atom[j].type;
1222                     if (tpj >= ntypes)
1223                     {
1224                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1225                     }
1226                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1227                     if (b > fr->bham_b_max)
1228                     {
1229                         fr->bham_b_max = b;
1230                     }
1231                     if ((b < bmin) || (bmin == -1))
1232                     {
1233                         bmin = b;
1234                     }
1235                 }
1236             }
1237         }
1238     }
1239     if (fplog)
1240     {
1241         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1242                 bmin, fr->bham_b_max);
1243     }
1244 }
1245
1246 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1247                             t_forcerec *fr, real rtab,
1248                             const t_commrec *cr,
1249                             const char *tabfn, char *eg1, char *eg2,
1250                             t_nblists *nbl)
1251 {
1252     char buf[STRLEN];
1253     int  i, j;
1254
1255     if (tabfn == NULL)
1256     {
1257         if (debug)
1258         {
1259             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1260         }
1261         return;
1262     }
1263
1264     sprintf(buf, "%s", tabfn);
1265     if (eg1 && eg2)
1266     {
1267         /* Append the two energy group names */
1268         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1269                 eg1, eg2, ftp2ext(efXVG));
1270     }
1271     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1272     /* Copy the contents of the table to separate coulomb and LJ tables too,
1273      * to improve cache performance.
1274      */
1275     /* For performance reasons we want
1276      * the table data to be aligned to 16-byte. The pointers could be freed
1277      * but currently aren't.
1278      */
1279     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1280     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1281     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1282     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1283     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1284     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1285     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1286     nbl->table_elec.ninteractions = 1;
1287     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1288     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1289
1290     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1291     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1292     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1293     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1294     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1295     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1296     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1297     nbl->table_vdw.ninteractions = 2;
1298     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1299     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1300
1301     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1302     {
1303         for (j = 0; j < 4; j++)
1304         {
1305             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1306         }
1307         for (j = 0; j < 8; j++)
1308         {
1309             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1310         }
1311     }
1312 }
1313
1314 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1315                          int *ncount, int **count)
1316 {
1317     const gmx_moltype_t *molt;
1318     const t_ilist       *il;
1319     int                  mt, ftype, stride, i, j, tabnr;
1320
1321     for (mt = 0; mt < mtop->nmoltype; mt++)
1322     {
1323         molt = &mtop->moltype[mt];
1324         for (ftype = 0; ftype < F_NRE; ftype++)
1325         {
1326             if (ftype == ftype1 || ftype == ftype2)
1327             {
1328                 il     = &molt->ilist[ftype];
1329                 stride = 1 + NRAL(ftype);
1330                 for (i = 0; i < il->nr; i += stride)
1331                 {
1332                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1333                     if (tabnr < 0)
1334                     {
1335                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1336                     }
1337                     if (tabnr >= *ncount)
1338                     {
1339                         srenew(*count, tabnr+1);
1340                         for (j = *ncount; j < tabnr+1; j++)
1341                         {
1342                             (*count)[j] = 0;
1343                         }
1344                         *ncount = tabnr+1;
1345                     }
1346                     (*count)[tabnr]++;
1347                 }
1348             }
1349         }
1350     }
1351 }
1352
1353 static bondedtable_t *make_bonded_tables(FILE *fplog,
1354                                          int ftype1, int ftype2,
1355                                          const gmx_mtop_t *mtop,
1356                                          const char *basefn, const char *tabext)
1357 {
1358     int            i, ncount, *count;
1359     char           tabfn[STRLEN];
1360     bondedtable_t *tab;
1361
1362     tab = NULL;
1363
1364     ncount = 0;
1365     count  = NULL;
1366     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1367
1368     if (ncount > 0)
1369     {
1370         snew(tab, ncount);
1371         for (i = 0; i < ncount; i++)
1372         {
1373             if (count[i] > 0)
1374             {
1375                 sprintf(tabfn, "%s", basefn);
1376                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1377                         tabext, i, ftp2ext(efXVG));
1378                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1379             }
1380         }
1381         sfree(count);
1382     }
1383
1384     return tab;
1385 }
1386
1387 void forcerec_set_ranges(t_forcerec *fr,
1388                          int ncg_home, int ncg_force,
1389                          int natoms_force,
1390                          int natoms_force_constr, int natoms_f_novirsum)
1391 {
1392     fr->cg0 = 0;
1393     fr->hcg = ncg_home;
1394
1395     /* fr->ncg_force is unused in the standard code,
1396      * but it can be useful for modified code dealing with charge groups.
1397      */
1398     fr->ncg_force           = ncg_force;
1399     fr->natoms_force        = natoms_force;
1400     fr->natoms_force_constr = natoms_force_constr;
1401
1402     if (fr->natoms_force_constr > fr->nalloc_force)
1403     {
1404         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1405
1406         if (fr->bTwinRange)
1407         {
1408             srenew(fr->f_twin, fr->nalloc_force);
1409         }
1410     }
1411
1412     if (fr->bF_NoVirSum)
1413     {
1414         fr->f_novirsum_n = natoms_f_novirsum;
1415         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1416         {
1417             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1418             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1419         }
1420     }
1421     else
1422     {
1423         fr->f_novirsum_n = 0;
1424     }
1425 }
1426
1427 static real cutoff_inf(real cutoff)
1428 {
1429     if (cutoff == 0)
1430     {
1431         cutoff = GMX_CUTOFF_INF;
1432     }
1433
1434     return cutoff;
1435 }
1436
1437 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1438                                   t_forcerec *fr, const t_inputrec *ir,
1439                                   const char *tabfn, const gmx_mtop_t *mtop,
1440                                   matrix     box)
1441 {
1442     char buf[STRLEN];
1443     int  i, j;
1444
1445     if (tabfn == NULL)
1446     {
1447         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1448         return;
1449     }
1450
1451     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1452
1453     sprintf(buf, "%s", tabfn);
1454     for (i = 0; i < ir->adress->n_tf_grps; i++)
1455     {
1456         j = ir->adress->tf_table_index[i]; /* get energy group index */
1457         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1458                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1459         if (fp)
1460         {
1461             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1462         }
1463         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1464     }
1465
1466 }
1467
1468 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1469 {
1470     gmx_bool bAllvsAll;
1471
1472     bAllvsAll =
1473         (
1474             ir->rlist == 0            &&
1475             ir->rcoulomb == 0         &&
1476             ir->rvdw == 0             &&
1477             ir->ePBC == epbcNONE      &&
1478             ir->vdwtype == evdwCUT    &&
1479             ir->coulombtype == eelCUT &&
1480             ir->efep == efepNO        &&
1481             (ir->implicit_solvent == eisNO ||
1482              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1483                                                   ir->gb_algorithm == egbHCT   ||
1484                                                   ir->gb_algorithm == egbOBC))) &&
1485             getenv("GMX_NO_ALLVSALL") == NULL
1486         );
1487
1488     if (bAllvsAll && ir->opts.ngener > 1)
1489     {
1490         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1491
1492         if (bPrintNote)
1493         {
1494             if (MASTER(cr))
1495             {
1496                 fprintf(stderr, "\n%s\n", note);
1497             }
1498             if (fp != NULL)
1499             {
1500                 fprintf(fp, "\n%s\n", note);
1501             }
1502         }
1503         bAllvsAll = FALSE;
1504     }
1505
1506     if (bAllvsAll && fp && MASTER(cr))
1507     {
1508         fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1509     }
1510
1511     return bAllvsAll;
1512 }
1513
1514
1515 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1516 {
1517     int t, i;
1518
1519     /* These thread local data structures are used for bondeds only */
1520     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1521
1522     if (fr->nthreads > 1)
1523     {
1524         snew(fr->f_t, fr->nthreads);
1525         /* Thread 0 uses the global force and energy arrays */
1526         for (t = 1; t < fr->nthreads; t++)
1527         {
1528             fr->f_t[t].f        = NULL;
1529             fr->f_t[t].f_nalloc = 0;
1530             snew(fr->f_t[t].fshift, SHIFTS);
1531             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1532             for (i = 0; i < egNR; i++)
1533             {
1534                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1535             }
1536         }
1537     }
1538 }
1539
1540
1541 gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
1542                                       const t_commrec  *cr,
1543                                       const t_inputrec *ir,
1544                                       gmx_bool          bGPU)
1545 {
1546     if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1547     {
1548         md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1549                       bGPU ? "GPUs" : "SIMD kernels",
1550                       bGPU ? "CPU only" : "plain-C kernels");
1551         return FALSE;
1552     }
1553
1554     return TRUE;
1555 }
1556
1557
1558 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1559                                   int                         *kernel_type,
1560                                   int                         *ewald_excl)
1561 {
1562     *kernel_type = nbnxnk4x4_PlainC;
1563     *ewald_excl  = ewaldexclTable;
1564
1565 #ifdef GMX_NBNXN_SIMD
1566     {
1567 #ifdef GMX_NBNXN_SIMD_4XN
1568         *kernel_type = nbnxnk4xN_SIMD_4xN;
1569 #endif
1570 #ifdef GMX_NBNXN_SIMD_2XNN
1571         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1572 #endif
1573
1574 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1575         /* We need to choose if we want 2x(N+N) or 4xN kernels.
1576          * Currently this is based on the SIMD acceleration choice,
1577          * but it might be better to decide this at runtime based on CPU.
1578          *
1579          * 4xN calculates more (zero) interactions, but has less pair-search
1580          * work and much better kernel instruction scheduling.
1581          *
1582          * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1583          * which doesn't have FMA, both the analytical and tabulated Ewald
1584          * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1585          * 2x(4+4) because it results in significantly fewer pairs.
1586          * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1587          * 10% with HT, 50% without HT. As we currently don't detect the actual
1588          * use of HT, use 4x8 to avoid a potential performance hit.
1589          * On Intel Haswell 4x8 is always faster.
1590          */
1591         *kernel_type = nbnxnk4xN_SIMD_4xN;
1592
1593 #ifndef GMX_SIMD_HAVE_FMA
1594         if (EEL_PME_EWALD(ir->coulombtype) ||
1595             EVDW_PME(ir->vdwtype))
1596         {
1597             /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1598              * There are enough instructions to make 2x(4+4) efficient.
1599              */
1600             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1601         }
1602 #endif
1603 #endif  /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1604
1605
1606         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1607         {
1608 #ifdef GMX_NBNXN_SIMD_4XN
1609             *kernel_type = nbnxnk4xN_SIMD_4xN;
1610 #else
1611             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1612 #endif
1613         }
1614         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1615         {
1616 #ifdef GMX_NBNXN_SIMD_2XNN
1617             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1618 #else
1619             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1620 #endif
1621         }
1622
1623         /* Analytical Ewald exclusion correction is only an option in
1624          * the SIMD kernel.
1625          * Since table lookup's don't parallelize with SIMD, analytical
1626          * will probably always be faster for a SIMD width of 8 or more.
1627          * With FMA analytical is sometimes faster for a width if 4 as well.
1628          * On BlueGene/Q, this is faster regardless of precision.
1629          * In single precision, this is faster on Bulldozer.
1630          */
1631 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1632         (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1633         defined GMX_SIMD_IBM_QPX
1634         *ewald_excl = ewaldexclAnalytical;
1635 #endif
1636         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1637         {
1638             *ewald_excl = ewaldexclTable;
1639         }
1640         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1641         {
1642             *ewald_excl = ewaldexclAnalytical;
1643         }
1644
1645     }
1646 #endif /* GMX_NBNXN_SIMD */
1647 }
1648
1649
1650 const char *lookup_nbnxn_kernel_name(int kernel_type)
1651 {
1652     const char *returnvalue = NULL;
1653     switch (kernel_type)
1654     {
1655         case nbnxnkNotSet:
1656             returnvalue = "not set";
1657             break;
1658         case nbnxnk4x4_PlainC:
1659             returnvalue = "plain C";
1660             break;
1661         case nbnxnk4xN_SIMD_4xN:
1662         case nbnxnk4xN_SIMD_2xNN:
1663 #ifdef GMX_NBNXN_SIMD
1664 #if defined GMX_SIMD_X86_SSE2
1665             returnvalue = "SSE2";
1666 #elif defined GMX_SIMD_X86_SSE4_1
1667             returnvalue = "SSE4.1";
1668 #elif defined GMX_SIMD_X86_AVX_128_FMA
1669             returnvalue = "AVX_128_FMA";
1670 #elif defined GMX_SIMD_X86_AVX_256
1671             returnvalue = "AVX_256";
1672 #elif defined GMX_SIMD_X86_AVX2_256
1673             returnvalue = "AVX2_256";
1674 #else
1675             returnvalue = "SIMD";
1676 #endif
1677 #else  /* GMX_NBNXN_SIMD */
1678             returnvalue = "not available";
1679 #endif /* GMX_NBNXN_SIMD */
1680             break;
1681         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1682         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1683
1684         case nbnxnkNR:
1685         default:
1686             gmx_fatal(FARGS, "Illegal kernel type selected");
1687             returnvalue = NULL;
1688             break;
1689     }
1690     return returnvalue;
1691 };
1692
1693 static void pick_nbnxn_kernel(FILE                *fp,
1694                               const t_commrec     *cr,
1695                               gmx_bool             use_simd_kernels,
1696                               gmx_bool             bUseGPU,
1697                               gmx_bool             bEmulateGPU,
1698                               const t_inputrec    *ir,
1699                               int                 *kernel_type,
1700                               int                 *ewald_excl,
1701                               gmx_bool             bDoNonbonded)
1702 {
1703     assert(kernel_type);
1704
1705     *kernel_type = nbnxnkNotSet;
1706     *ewald_excl  = ewaldexclTable;
1707
1708     if (bEmulateGPU)
1709     {
1710         *kernel_type = nbnxnk8x8x8_PlainC;
1711
1712         if (bDoNonbonded)
1713         {
1714             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1715         }
1716     }
1717     else if (bUseGPU)
1718     {
1719         *kernel_type = nbnxnk8x8x8_CUDA;
1720     }
1721
1722     if (*kernel_type == nbnxnkNotSet)
1723     {
1724         /* LJ PME with LB combination rule does 7 mesh operations.
1725          * This so slow that we don't compile SIMD non-bonded kernels for that.
1726          */
1727         if (use_simd_kernels &&
1728             nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1729         {
1730             pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1731         }
1732         else
1733         {
1734             *kernel_type = nbnxnk4x4_PlainC;
1735         }
1736     }
1737
1738     if (bDoNonbonded && fp != NULL)
1739     {
1740         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1741                 lookup_nbnxn_kernel_name(*kernel_type),
1742                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1743                 nbnxn_kernel_to_cj_size(*kernel_type));
1744     }
1745 }
1746
1747 static void pick_nbnxn_resources(const t_commrec     *cr,
1748                                  const gmx_hw_info_t *hwinfo,
1749                                  gmx_bool             bDoNonbonded,
1750                                  gmx_bool            *bUseGPU,
1751                                  gmx_bool            *bEmulateGPU,
1752                                  const gmx_gpu_opt_t *gpu_opt)
1753 {
1754     gmx_bool bEmulateGPUEnvVarSet;
1755     char     gpu_err_str[STRLEN];
1756
1757     *bUseGPU = FALSE;
1758
1759     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1760
1761     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1762      * GPUs (currently) only handle non-bonded calculations, we will
1763      * automatically switch to emulation if non-bonded calculations are
1764      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1765      * way to turn off GPU initialization, data movement, and cleanup.
1766      *
1767      * GPU emulation can be useful to assess the performance one can expect by
1768      * adding GPU(s) to the machine. The conditional below allows this even
1769      * if mdrun is compiled without GPU acceleration support.
1770      * Note that you should freezing the system as otherwise it will explode.
1771      */
1772     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1773                     (!bDoNonbonded &&
1774                      gpu_opt->ncuda_dev_use > 0));
1775
1776     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1777      */
1778     if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1779     {
1780         /* Each PP node will use the intra-node id-th device from the
1781          * list of detected/selected GPUs. */
1782         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1783                       &hwinfo->gpu_info, gpu_opt))
1784         {
1785             /* At this point the init should never fail as we made sure that
1786              * we have all the GPUs we need. If it still does, we'll bail. */
1787             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1788                       cr->nodeid,
1789                       get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1790                                         cr->rank_pp_intranode),
1791                       gpu_err_str);
1792         }
1793
1794         /* Here we actually turn on hardware GPU acceleration */
1795         *bUseGPU = TRUE;
1796     }
1797 }
1798
1799 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1800                             nonbonded_verlet_t *nbv,
1801                             int                 group)
1802 {
1803     gmx_bool bUsesSimpleTables = TRUE;
1804     int      grp_index;
1805
1806     switch (cutoff_scheme)
1807     {
1808         case ecutsGROUP:
1809             bUsesSimpleTables = TRUE;
1810             break;
1811         case ecutsVERLET:
1812             assert(NULL != nbv && NULL != nbv->grp);
1813             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1814             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1815             break;
1816         default:
1817             gmx_incons("unimplemented");
1818     }
1819     return bUsesSimpleTables;
1820 }
1821
1822 static void init_ewald_f_table(interaction_const_t *ic,
1823                                gmx_bool             bUsesSimpleTables,
1824                                real                 rtab)
1825 {
1826     real maxr;
1827
1828     if (bUsesSimpleTables)
1829     {
1830         /* With a spacing of 0.0005 we are at the force summation accuracy
1831          * for the SSE kernels for "normal" atomistic simulations.
1832          */
1833         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1834                                                    ic->rcoulomb);
1835
1836         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1837         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1838     }
1839     else
1840     {
1841         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1842         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1843         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1844     }
1845
1846     sfree_aligned(ic->tabq_coul_FDV0);
1847     sfree_aligned(ic->tabq_coul_F);
1848     sfree_aligned(ic->tabq_coul_V);
1849
1850     /* Create the original table data in FDV0 */
1851     snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1852     snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1853     snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1854     table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1855                                 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1856 }
1857
1858 void init_interaction_const_tables(FILE                *fp,
1859                                    interaction_const_t *ic,
1860                                    gmx_bool             bUsesSimpleTables,
1861                                    real                 rtab)
1862 {
1863     real spacing;
1864
1865     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1866     {
1867         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1868
1869         if (fp != NULL)
1870         {
1871             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1872                     1/ic->tabq_scale, ic->tabq_size);
1873         }
1874     }
1875 }
1876
1877 static void clear_force_switch_constants(shift_consts_t *sc)
1878 {
1879     sc->c2   = 0;
1880     sc->c3   = 0;
1881     sc->cpot = 0;
1882 }
1883
1884 static void force_switch_constants(real p,
1885                                    real rsw, real rc,
1886                                    shift_consts_t *sc)
1887 {
1888     /* Here we determine the coefficient for shifting the force to zero
1889      * between distance rsw and the cut-off rc.
1890      * For a potential of r^-p, we have force p*r^-(p+1).
1891      * But to save flops we absorb p in the coefficient.
1892      * Thus we get:
1893      * force/p   = r^-(p+1) + c2*r^2 + c3*r^3
1894      * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1895      */
1896     sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1897     sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1898     sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1899 }
1900
1901 static void potential_switch_constants(real rsw, real rc,
1902                                        switch_consts_t *sc)
1903 {
1904     /* The switch function is 1 at rsw and 0 at rc.
1905      * The derivative and second derivate are zero at both ends.
1906      * rsw        = max(r - r_switch, 0)
1907      * sw         = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1908      * dsw        = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1909      * force      = force*dsw - potential*sw
1910      * potential *= sw
1911      */
1912     sc->c3 = -10*pow(rc - rsw, -3);
1913     sc->c4 =  15*pow(rc - rsw, -4);
1914     sc->c5 =  -6*pow(rc - rsw, -5);
1915 }
1916
1917 static void
1918 init_interaction_const(FILE                       *fp,
1919                        const t_commrec gmx_unused *cr,
1920                        interaction_const_t       **interaction_const,
1921                        const t_forcerec           *fr,
1922                        real                        rtab)
1923 {
1924     interaction_const_t *ic;
1925     gmx_bool             bUsesSimpleTables = TRUE;
1926
1927     snew(ic, 1);
1928
1929     /* Just allocate something so we can free it */
1930     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1931     snew_aligned(ic->tabq_coul_F, 16, 32);
1932     snew_aligned(ic->tabq_coul_V, 16, 32);
1933
1934     ic->rlist           = fr->rlist;
1935     ic->rlistlong       = fr->rlistlong;
1936
1937     /* Lennard-Jones */
1938     ic->vdwtype         = fr->vdwtype;
1939     ic->vdw_modifier    = fr->vdw_modifier;
1940     ic->rvdw            = fr->rvdw;
1941     ic->rvdw_switch     = fr->rvdw_switch;
1942     ic->ewaldcoeff_lj   = fr->ewaldcoeff_lj;
1943     ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1944     ic->sh_lj_ewald     = 0;
1945     clear_force_switch_constants(&ic->dispersion_shift);
1946     clear_force_switch_constants(&ic->repulsion_shift);
1947
1948     switch (ic->vdw_modifier)
1949     {
1950         case eintmodPOTSHIFT:
1951             /* Only shift the potential, don't touch the force */
1952             ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
1953             ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
1954             if (EVDW_PME(ic->vdwtype))
1955             {
1956                 real crc2;
1957
1958                 if (fr->cutoff_scheme == ecutsGROUP)
1959                 {
1960                     gmx_fatal(FARGS, "Potential-shift is not (yet) implemented for LJ-PME with cutoff-scheme=group");
1961                 }
1962                 crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
1963                 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
1964             }
1965             break;
1966         case eintmodFORCESWITCH:
1967             /* Switch the force, switch and shift the potential */
1968             force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1969                                    &ic->dispersion_shift);
1970             force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1971                                    &ic->repulsion_shift);
1972             break;
1973         case eintmodPOTSWITCH:
1974             /* Switch the potential and force */
1975             potential_switch_constants(ic->rvdw_switch, ic->rvdw,
1976                                        &ic->vdw_switch);
1977             break;
1978         case eintmodNONE:
1979         case eintmodEXACTCUTOFF:
1980             /* Nothing to do here */
1981             break;
1982         default:
1983             gmx_incons("unimplemented potential modifier");
1984     }
1985
1986     ic->sh_invrc6 = -ic->dispersion_shift.cpot;
1987
1988     /* Electrostatics */
1989     ic->eeltype          = fr->eeltype;
1990     ic->coulomb_modifier = fr->coulomb_modifier;
1991     ic->rcoulomb         = fr->rcoulomb;
1992     ic->epsilon_r        = fr->epsilon_r;
1993     ic->epsfac           = fr->epsfac;
1994     ic->ewaldcoeff_q     = fr->ewaldcoeff_q;
1995
1996     if (fr->coulomb_modifier == eintmodPOTSHIFT)
1997     {
1998         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1999     }
2000     else
2001     {
2002         ic->sh_ewald = 0;
2003     }
2004
2005     /* Reaction-field */
2006     if (EEL_RF(ic->eeltype))
2007     {
2008         ic->epsilon_rf = fr->epsilon_rf;
2009         ic->k_rf       = fr->k_rf;
2010         ic->c_rf       = fr->c_rf;
2011     }
2012     else
2013     {
2014         /* For plain cut-off we might use the reaction-field kernels */
2015         ic->epsilon_rf = ic->epsilon_r;
2016         ic->k_rf       = 0;
2017         if (fr->coulomb_modifier == eintmodPOTSHIFT)
2018         {
2019             ic->c_rf   = 1/ic->rcoulomb;
2020         }
2021         else
2022         {
2023             ic->c_rf   = 0;
2024         }
2025     }
2026
2027     if (fp != NULL)
2028     {
2029         real dispersion_shift;
2030
2031         dispersion_shift = ic->dispersion_shift.cpot;
2032         if (EVDW_PME(ic->vdwtype))
2033         {
2034             dispersion_shift -= ic->sh_lj_ewald;
2035         }
2036         fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2037                 ic->repulsion_shift.cpot, dispersion_shift);
2038
2039         if (ic->eeltype == eelCUT)
2040         {
2041             fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2042         }
2043         else if (EEL_PME(ic->eeltype))
2044         {
2045             fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2046         }
2047         fprintf(fp, "\n");
2048     }
2049
2050     *interaction_const = ic;
2051
2052     if (fr->nbv != NULL && fr->nbv->bUseGPU)
2053     {
2054         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2055
2056         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2057          * also sharing texture references. To keep the code simple, we don't
2058          * treat texture references as shared resources, but this means that
2059          * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2060          * Hence, to ensure that the non-bonded kernels don't start before all
2061          * texture binding operations are finished, we need to wait for all ranks
2062          * to arrive here before continuing.
2063          *
2064          * Note that we could omit this barrier if GPUs are not shared (or
2065          * texture objects are used), but as this is initialization code, there
2066          * is not point in complicating things.
2067          */
2068 #ifdef GMX_THREAD_MPI
2069         if (PAR(cr))
2070         {
2071             gmx_barrier(cr);
2072         }
2073 #endif  /* GMX_THREAD_MPI */
2074     }
2075
2076     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2077     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2078 }
2079
2080 static void init_nb_verlet(FILE                *fp,
2081                            nonbonded_verlet_t **nb_verlet,
2082                            gmx_bool             bFEP_NonBonded,
2083                            const t_inputrec    *ir,
2084                            const t_forcerec    *fr,
2085                            const t_commrec     *cr,
2086                            const char          *nbpu_opt)
2087 {
2088     nonbonded_verlet_t *nbv;
2089     int                 i;
2090     char               *env;
2091     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
2092
2093     nbnxn_alloc_t      *nb_alloc;
2094     nbnxn_free_t       *nb_free;
2095
2096     snew(nbv, 1);
2097
2098     pick_nbnxn_resources(cr, fr->hwinfo,
2099                          fr->bNonbonded,
2100                          &nbv->bUseGPU,
2101                          &bEmulateGPU,
2102                          fr->gpu_opt);
2103
2104     nbv->nbs = NULL;
2105
2106     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2107     for (i = 0; i < nbv->ngrp; i++)
2108     {
2109         nbv->grp[i].nbl_lists.nnbl = 0;
2110         nbv->grp[i].nbat           = NULL;
2111         nbv->grp[i].kernel_type    = nbnxnkNotSet;
2112
2113         if (i == 0) /* local */
2114         {
2115             pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2116                               nbv->bUseGPU, bEmulateGPU, ir,
2117                               &nbv->grp[i].kernel_type,
2118                               &nbv->grp[i].ewald_excl,
2119                               fr->bNonbonded);
2120         }
2121         else /* non-local */
2122         {
2123             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2124             {
2125                 /* Use GPU for local, select a CPU kernel for non-local */
2126                 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2127                                   FALSE, FALSE, ir,
2128                                   &nbv->grp[i].kernel_type,
2129                                   &nbv->grp[i].ewald_excl,
2130                                   fr->bNonbonded);
2131
2132                 bHybridGPURun = TRUE;
2133             }
2134             else
2135             {
2136                 /* Use the same kernel for local and non-local interactions */
2137                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2138                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
2139             }
2140         }
2141     }
2142
2143     if (nbv->bUseGPU)
2144     {
2145         /* init the NxN GPU data; the last argument tells whether we'll have
2146          * both local and non-local NB calculation on GPU */
2147         nbnxn_cuda_init(fp, &nbv->cu_nbv,
2148                         &fr->hwinfo->gpu_info, fr->gpu_opt,
2149                         cr->rank_pp_intranode,
2150                         (nbv->ngrp > 1) && !bHybridGPURun);
2151
2152         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2153         {
2154             char *end;
2155
2156             nbv->min_ci_balanced = strtol(env, &end, 10);
2157             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2158             {
2159                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2160             }
2161
2162             if (debug)
2163             {
2164                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2165                         nbv->min_ci_balanced);
2166             }
2167         }
2168         else
2169         {
2170             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2171             if (debug)
2172             {
2173                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2174                         nbv->min_ci_balanced);
2175             }
2176         }
2177     }
2178     else
2179     {
2180         nbv->min_ci_balanced = 0;
2181     }
2182
2183     *nb_verlet = nbv;
2184
2185     nbnxn_init_search(&nbv->nbs,
2186                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2187                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2188                       bFEP_NonBonded,
2189                       gmx_omp_nthreads_get(emntNonbonded));
2190
2191     for (i = 0; i < nbv->ngrp; i++)
2192     {
2193         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2194         {
2195             nb_alloc = &pmalloc;
2196             nb_free  = &pfree;
2197         }
2198         else
2199         {
2200             nb_alloc = NULL;
2201             nb_free  = NULL;
2202         }
2203
2204         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2205                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2206                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2207                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2208                                 nb_alloc, nb_free);
2209
2210         if (i == 0 ||
2211             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2212         {
2213             gmx_bool bSimpleList;
2214             int      enbnxninitcombrule;
2215
2216             bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2217
2218             if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2219             {
2220                 /* Plain LJ cut-off: we can optimize with combination rules */
2221                 enbnxninitcombrule = enbnxninitcombruleDETECT;
2222             }
2223             else if (fr->vdwtype == evdwPME)
2224             {
2225                 /* LJ-PME: we need to use a combination rule for the grid */
2226                 if (fr->ljpme_combination_rule == eljpmeGEOM)
2227                 {
2228                     enbnxninitcombrule = enbnxninitcombruleGEOM;
2229                 }
2230                 else
2231                 {
2232                     enbnxninitcombrule = enbnxninitcombruleLB;
2233                 }
2234             }
2235             else
2236             {
2237                 /* We use a full combination matrix: no rule required */
2238                 enbnxninitcombrule = enbnxninitcombruleNONE;
2239             }
2240
2241
2242             snew(nbv->grp[i].nbat, 1);
2243             nbnxn_atomdata_init(fp,
2244                                 nbv->grp[i].nbat,
2245                                 nbv->grp[i].kernel_type,
2246                                 enbnxninitcombrule,
2247                                 fr->ntype, fr->nbfp,
2248                                 ir->opts.ngener,
2249                                 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2250                                 nb_alloc, nb_free);
2251         }
2252         else
2253         {
2254             nbv->grp[i].nbat = nbv->grp[0].nbat;
2255         }
2256     }
2257 }
2258
2259 void init_forcerec(FILE              *fp,
2260                    const output_env_t oenv,
2261                    t_forcerec        *fr,
2262                    t_fcdata          *fcd,
2263                    const t_inputrec  *ir,
2264                    const gmx_mtop_t  *mtop,
2265                    const t_commrec   *cr,
2266                    matrix             box,
2267                    const char        *tabfn,
2268                    const char        *tabafn,
2269                    const char        *tabpfn,
2270                    const char        *tabbfn,
2271                    const char        *nbpu_opt,
2272                    gmx_bool           bNoSolvOpt,
2273                    real               print_force)
2274 {
2275     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2276     real           rtab;
2277     char          *env;
2278     double         dbl;
2279     const t_block *cgs;
2280     gmx_bool       bGenericKernelOnly;
2281     gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2282     gmx_bool       bFEP_NonBonded;
2283     t_nblists     *nbl;
2284     int           *nm_ind, egp_flags;
2285
2286     if (fr->hwinfo == NULL)
2287     {
2288         /* Detect hardware, gather information.
2289          * In mdrun, hwinfo has already been set before calling init_forcerec.
2290          * Here we ignore GPUs, as tools will not use them anyhow.
2291          */
2292         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2293     }
2294
2295     /* By default we turn SIMD kernels on, but it might be turned off further down... */
2296     fr->use_simd_kernels = TRUE;
2297
2298     fr->bDomDec = DOMAINDECOMP(cr);
2299
2300     natoms = mtop->natoms;
2301
2302     if (check_box(ir->ePBC, box))
2303     {
2304         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2305     }
2306
2307     /* Test particle insertion ? */
2308     if (EI_TPI(ir->eI))
2309     {
2310         /* Set to the size of the molecule to be inserted (the last one) */
2311         /* Because of old style topologies, we have to use the last cg
2312          * instead of the last molecule type.
2313          */
2314         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2315         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2316         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2317         {
2318             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2319         }
2320     }
2321     else
2322     {
2323         fr->n_tpi = 0;
2324     }
2325
2326     /* Copy AdResS parameters */
2327     if (ir->bAdress)
2328     {
2329         fr->adress_type           = ir->adress->type;
2330         fr->adress_const_wf       = ir->adress->const_wf;
2331         fr->adress_ex_width       = ir->adress->ex_width;
2332         fr->adress_hy_width       = ir->adress->hy_width;
2333         fr->adress_icor           = ir->adress->icor;
2334         fr->adress_site           = ir->adress->site;
2335         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2336         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2337
2338
2339         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2340         for (i = 0; i < ir->adress->n_energy_grps; i++)
2341         {
2342             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2343         }
2344
2345         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2346         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2347         for (i = 0; i < fr->n_adress_tf_grps; i++)
2348         {
2349             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2350         }
2351         copy_rvec(ir->adress->refs, fr->adress_refs);
2352     }
2353     else
2354     {
2355         fr->adress_type           = eAdressOff;
2356         fr->adress_do_hybridpairs = FALSE;
2357     }
2358
2359     /* Copy the user determined parameters */
2360     fr->userint1  = ir->userint1;
2361     fr->userint2  = ir->userint2;
2362     fr->userint3  = ir->userint3;
2363     fr->userint4  = ir->userint4;
2364     fr->userreal1 = ir->userreal1;
2365     fr->userreal2 = ir->userreal2;
2366     fr->userreal3 = ir->userreal3;
2367     fr->userreal4 = ir->userreal4;
2368
2369     /* Shell stuff */
2370     fr->fc_stepsize = ir->fc_stepsize;
2371
2372     /* Free energy */
2373     fr->efep        = ir->efep;
2374     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2375     if (ir->fepvals->bScCoul)
2376     {
2377         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2378         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2379     }
2380     else
2381     {
2382         fr->sc_alphacoul  = 0;
2383         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2384     }
2385     fr->sc_power      = ir->fepvals->sc_power;
2386     fr->sc_r_power    = ir->fepvals->sc_r_power;
2387     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2388
2389     env = getenv("GMX_SCSIGMA_MIN");
2390     if (env != NULL)
2391     {
2392         dbl = 0;
2393         sscanf(env, "%lf", &dbl);
2394         fr->sc_sigma6_min = pow(dbl, 6);
2395         if (fp)
2396         {
2397             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2398         }
2399     }
2400
2401     fr->bNonbonded = TRUE;
2402     if (getenv("GMX_NO_NONBONDED") != NULL)
2403     {
2404         /* turn off non-bonded calculations */
2405         fr->bNonbonded = FALSE;
2406         md_print_warn(cr, fp,
2407                       "Found environment variable GMX_NO_NONBONDED.\n"
2408                       "Disabling nonbonded calculations.\n");
2409     }
2410
2411     bGenericKernelOnly = FALSE;
2412
2413     /* We now check in the NS code whether a particular combination of interactions
2414      * can be used with water optimization, and disable it if that is not the case.
2415      */
2416
2417     if (getenv("GMX_NB_GENERIC") != NULL)
2418     {
2419         if (fp != NULL)
2420         {
2421             fprintf(fp,
2422                     "Found environment variable GMX_NB_GENERIC.\n"
2423                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2424                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2425         }
2426         bGenericKernelOnly = TRUE;
2427     }
2428
2429     if (bGenericKernelOnly == TRUE)
2430     {
2431         bNoSolvOpt         = TRUE;
2432     }
2433
2434     if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2435     {
2436         fr->use_simd_kernels = FALSE;
2437         if (fp != NULL)
2438         {
2439             fprintf(fp,
2440                     "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2441                     "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2442         }
2443     }
2444
2445     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2446
2447     /* Check if we can/should do all-vs-all kernels */
2448     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
2449     fr->AllvsAll_work   = NULL;
2450     fr->AllvsAll_workgb = NULL;
2451
2452     /* All-vs-all kernels have not been implemented in 4.6, and
2453      * the SIMD group kernels are also buggy in this case. Non-SIMD
2454      * group kernels are OK. See Redmine #1249. */
2455     if (fr->bAllvsAll)
2456     {
2457         fr->bAllvsAll            = FALSE;
2458         fr->use_simd_kernels     = FALSE;
2459         if (fp != NULL)
2460         {
2461             fprintf(fp,
2462                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2463                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2464                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2465                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2466                     "we are proceeding by disabling all CPU architecture-specific\n"
2467                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2468                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2469         }
2470     }
2471
2472     /* Neighbour searching stuff */
2473     fr->cutoff_scheme = ir->cutoff_scheme;
2474     fr->bGrid         = (ir->ns_type == ensGRID);
2475     fr->ePBC          = ir->ePBC;
2476
2477     if (fr->cutoff_scheme == ecutsGROUP)
2478     {
2479         const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2480             "removed in a future release when 'verlet' supports all interaction forms.\n";
2481
2482         if (MASTER(cr))
2483         {
2484             fprintf(stderr, "\n%s\n", note);
2485         }
2486         if (fp != NULL)
2487         {
2488             fprintf(fp, "\n%s\n", note);
2489         }
2490     }
2491
2492     /* Determine if we will do PBC for distances in bonded interactions */
2493     if (fr->ePBC == epbcNONE)
2494     {
2495         fr->bMolPBC = FALSE;
2496     }
2497     else
2498     {
2499         if (!DOMAINDECOMP(cr))
2500         {
2501             /* The group cut-off scheme and SHAKE assume charge groups
2502              * are whole, but not using molpbc is faster in most cases.
2503              */
2504             if (fr->cutoff_scheme == ecutsGROUP ||
2505                 (ir->eConstrAlg == econtSHAKE &&
2506                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2507                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2508             {
2509                 fr->bMolPBC = ir->bPeriodicMols;
2510             }
2511             else
2512             {
2513                 fr->bMolPBC = TRUE;
2514                 if (getenv("GMX_USE_GRAPH") != NULL)
2515                 {
2516                     fr->bMolPBC = FALSE;
2517                     if (fp)
2518                     {
2519                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2520                     }
2521                 }
2522             }
2523         }
2524         else
2525         {
2526             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2527         }
2528     }
2529     fr->bGB = (ir->implicit_solvent == eisGBSA);
2530
2531     fr->rc_scaling = ir->refcoord_scaling;
2532     copy_rvec(ir->posres_com, fr->posres_com);
2533     copy_rvec(ir->posres_comB, fr->posres_comB);
2534     fr->rlist                    = cutoff_inf(ir->rlist);
2535     fr->rlistlong                = cutoff_inf(ir->rlistlong);
2536     fr->eeltype                  = ir->coulombtype;
2537     fr->vdwtype                  = ir->vdwtype;
2538     fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
2539
2540     fr->coulomb_modifier = ir->coulomb_modifier;
2541     fr->vdw_modifier     = ir->vdw_modifier;
2542
2543     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2544     switch (fr->eeltype)
2545     {
2546         case eelCUT:
2547             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2548             break;
2549
2550         case eelRF:
2551         case eelGRF:
2552         case eelRF_NEC:
2553             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2554             break;
2555
2556         case eelRF_ZERO:
2557             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2558             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2559             break;
2560
2561         case eelSWITCH:
2562         case eelSHIFT:
2563         case eelUSER:
2564         case eelENCADSHIFT:
2565         case eelPMESWITCH:
2566         case eelPMEUSER:
2567         case eelPMEUSERSWITCH:
2568             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2569             break;
2570
2571         case eelPME:
2572         case eelEWALD:
2573             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2574             break;
2575
2576         default:
2577             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2578             break;
2579     }
2580
2581     /* Vdw: Translate from mdp settings to kernel format */
2582     switch (fr->vdwtype)
2583     {
2584         case evdwCUT:
2585         case evdwPME:
2586             if (fr->bBHAM)
2587             {
2588                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2589             }
2590             else
2591             {
2592                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2593             }
2594             break;
2595
2596         case evdwSWITCH:
2597         case evdwSHIFT:
2598         case evdwUSER:
2599         case evdwENCADSHIFT:
2600             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2601             break;
2602
2603         default:
2604             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2605             break;
2606     }
2607
2608     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2609     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2610     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2611
2612     fr->bTwinRange = fr->rlistlong > fr->rlist;
2613     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2614
2615     fr->reppow     = mtop->ffparams.reppow;
2616
2617     if (ir->cutoff_scheme == ecutsGROUP)
2618     {
2619         fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
2620                           !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2621         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2622         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2623                            fr->eeltype == eelEWALD ||
2624                            fr->eeltype == eelPME ||
2625                            fr->eeltype == eelRF ||
2626                            fr->eeltype == eelRF_ZERO);
2627
2628         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2629          * going to be faster to tabulate the interaction than calling the generic kernel.
2630          */
2631         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2632         {
2633             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2634             {
2635                 fr->bcoultab = TRUE;
2636             }
2637         }
2638         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2639                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2640                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2641                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2642         {
2643             if (fr->rcoulomb != fr->rvdw)
2644             {
2645                 fr->bcoultab = TRUE;
2646             }
2647         }
2648
2649         if (getenv("GMX_REQUIRE_TABLES"))
2650         {
2651             fr->bvdwtab  = TRUE;
2652             fr->bcoultab = TRUE;
2653         }
2654
2655         if (fp)
2656         {
2657             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2658             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2659         }
2660
2661         if (fr->bvdwtab == TRUE)
2662         {
2663             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2664             fr->nbkernel_vdw_modifier    = eintmodNONE;
2665         }
2666         if (fr->bcoultab == TRUE)
2667         {
2668             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2669             fr->nbkernel_elec_modifier    = eintmodNONE;
2670         }
2671     }
2672
2673     if (ir->cutoff_scheme == ecutsVERLET)
2674     {
2675         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2676         {
2677             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2678         }
2679         fr->bvdwtab  = FALSE;
2680         fr->bcoultab = FALSE;
2681     }
2682
2683     /* Tables are used for direct ewald sum */
2684     if (fr->bEwald)
2685     {
2686         if (EEL_PME(ir->coulombtype))
2687         {
2688             if (fp)
2689             {
2690                 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2691             }
2692             if (ir->coulombtype == eelP3M_AD)
2693             {
2694                 please_cite(fp, "Hockney1988");
2695                 please_cite(fp, "Ballenegger2012");
2696             }
2697             else
2698             {
2699                 please_cite(fp, "Essmann95a");
2700             }
2701
2702             if (ir->ewald_geometry == eewg3DC)
2703             {
2704                 if (fp)
2705                 {
2706                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2707                 }
2708                 please_cite(fp, "In-Chul99a");
2709             }
2710         }
2711         fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2712         init_ewald_tab(&(fr->ewald_table), ir, fp);
2713         if (fp)
2714         {
2715             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2716                     1/fr->ewaldcoeff_q);
2717         }
2718     }
2719
2720     if (EVDW_PME(ir->vdwtype))
2721     {
2722         if (fp)
2723         {
2724             fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2725         }
2726         please_cite(fp, "Essmann95a");
2727         fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2728         if (fp)
2729         {
2730             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2731                     1/fr->ewaldcoeff_lj);
2732         }
2733     }
2734
2735     /* Electrostatics */
2736     fr->epsilon_r       = ir->epsilon_r;
2737     fr->epsilon_rf      = ir->epsilon_rf;
2738     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2739     fr->rcoulomb_switch = ir->rcoulomb_switch;
2740     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2741
2742     /* Parameters for generalized RF */
2743     fr->zsquare = 0.0;
2744     fr->temp    = 0.0;
2745
2746     if (fr->eeltype == eelGRF)
2747     {
2748         init_generalized_rf(fp, mtop, ir, fr);
2749     }
2750
2751     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2752                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2753                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2754                        IR_ELEC_FIELD(*ir) ||
2755                        (fr->adress_icor != eAdressICOff)
2756                        );
2757
2758     if (fr->cutoff_scheme == ecutsGROUP &&
2759         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2760     {
2761         /* Count the total number of charge groups */
2762         fr->cg_nalloc = ncg_mtop(mtop);
2763         srenew(fr->cg_cm, fr->cg_nalloc);
2764     }
2765     if (fr->shift_vec == NULL)
2766     {
2767         snew(fr->shift_vec, SHIFTS);
2768     }
2769
2770     if (fr->fshift == NULL)
2771     {
2772         snew(fr->fshift, SHIFTS);
2773     }
2774
2775     if (fr->nbfp == NULL)
2776     {
2777         fr->ntype = mtop->ffparams.atnr;
2778         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2779     }
2780
2781     /* Copy the energy group exclusions */
2782     fr->egp_flags = ir->opts.egp_flags;
2783
2784     /* Van der Waals stuff */
2785     fr->rvdw        = cutoff_inf(ir->rvdw);
2786     fr->rvdw_switch = ir->rvdw_switch;
2787     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2788     {
2789         if (fr->rvdw_switch >= fr->rvdw)
2790         {
2791             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2792                       fr->rvdw_switch, fr->rvdw);
2793         }
2794         if (fp)
2795         {
2796             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2797                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2798                     fr->rvdw_switch, fr->rvdw);
2799         }
2800     }
2801
2802     if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2803     {
2804         gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2805     }
2806
2807     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2808     {
2809         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2810     }
2811
2812     if (fp)
2813     {
2814         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2815                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2816     }
2817
2818     fr->eDispCorr = ir->eDispCorr;
2819     if (ir->eDispCorr != edispcNO)
2820     {
2821         set_avcsixtwelve(fp, fr, mtop);
2822     }
2823
2824     if (fr->bBHAM)
2825     {
2826         set_bham_b_max(fp, fr, mtop);
2827     }
2828
2829     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2830
2831     /* Copy the GBSA data (radius, volume and surftens for each
2832      * atomtype) from the topology atomtype section to forcerec.
2833      */
2834     snew(fr->atype_radius, fr->ntype);
2835     snew(fr->atype_vol, fr->ntype);
2836     snew(fr->atype_surftens, fr->ntype);
2837     snew(fr->atype_gb_radius, fr->ntype);
2838     snew(fr->atype_S_hct, fr->ntype);
2839
2840     if (mtop->atomtypes.nr > 0)
2841     {
2842         for (i = 0; i < fr->ntype; i++)
2843         {
2844             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2845         }
2846         for (i = 0; i < fr->ntype; i++)
2847         {
2848             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2849         }
2850         for (i = 0; i < fr->ntype; i++)
2851         {
2852             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2853         }
2854         for (i = 0; i < fr->ntype; i++)
2855         {
2856             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2857         }
2858         for (i = 0; i < fr->ntype; i++)
2859         {
2860             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2861         }
2862     }
2863
2864     /* Generate the GB table if needed */
2865     if (fr->bGB)
2866     {
2867 #ifdef GMX_DOUBLE
2868         fr->gbtabscale = 2000;
2869 #else
2870         fr->gbtabscale = 500;
2871 #endif
2872
2873         fr->gbtabr = 100;
2874         fr->gbtab  = make_gb_table(oenv, fr);
2875
2876         init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2877
2878         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2879         if (!DOMAINDECOMP(cr))
2880         {
2881             make_local_gb(cr, fr->born, ir->gb_algorithm);
2882         }
2883     }
2884
2885     /* Set the charge scaling */
2886     if (fr->epsilon_r != 0)
2887     {
2888         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2889     }
2890     else
2891     {
2892         /* eps = 0 is infinite dieletric: no coulomb interactions */
2893         fr->epsfac = 0;
2894     }
2895
2896     /* Reaction field constants */
2897     if (EEL_RF(fr->eeltype))
2898     {
2899         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2900                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2901                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2902     }
2903
2904     /*This now calculates sum for q and c6*/
2905     set_chargesum(fp, fr, mtop);
2906
2907     /* if we are using LR electrostatics, and they are tabulated,
2908      * the tables will contain modified coulomb interactions.
2909      * Since we want to use the non-shifted ones for 1-4
2910      * coulombic interactions, we must have an extra set of tables.
2911      */
2912
2913     /* Construct tables.
2914      * A little unnecessary to make both vdw and coul tables sometimes,
2915      * but what the heck... */
2916
2917     bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2918         (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2919
2920     bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2921                              fr->bBHAM || fr->bEwald) &&
2922                             (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2923                              gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2924                              gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2925
2926     negp_pp   = ir->opts.ngener - ir->nwall;
2927     negptable = 0;
2928     if (!bMakeTables)
2929     {
2930         bSomeNormalNbListsAreInUse = TRUE;
2931         fr->nnblists               = 1;
2932     }
2933     else
2934     {
2935         bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2936         for (egi = 0; egi < negp_pp; egi++)
2937         {
2938             for (egj = egi; egj < negp_pp; egj++)
2939             {
2940                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2941                 if (!(egp_flags & EGP_EXCL))
2942                 {
2943                     if (egp_flags & EGP_TABLE)
2944                     {
2945                         negptable++;
2946                     }
2947                     else
2948                     {
2949                         bSomeNormalNbListsAreInUse = TRUE;
2950                     }
2951                 }
2952             }
2953         }
2954         if (bSomeNormalNbListsAreInUse)
2955         {
2956             fr->nnblists = negptable + 1;
2957         }
2958         else
2959         {
2960             fr->nnblists = negptable;
2961         }
2962         if (fr->nnblists > 1)
2963         {
2964             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2965         }
2966     }
2967
2968     if (ir->adress)
2969     {
2970         fr->nnblists *= 2;
2971     }
2972
2973     snew(fr->nblists, fr->nnblists);
2974
2975     /* This code automatically gives table length tabext without cut-off's,
2976      * in that case grompp should already have checked that we do not need
2977      * normal tables and we only generate tables for 1-4 interactions.
2978      */
2979     rtab = ir->rlistlong + ir->tabext;
2980
2981     if (bMakeTables)
2982     {
2983         /* make tables for ordinary interactions */
2984         if (bSomeNormalNbListsAreInUse)
2985         {
2986             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2987             if (ir->adress)
2988             {
2989                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2990             }
2991             if (!bMakeSeparate14Table)
2992             {
2993                 fr->tab14 = fr->nblists[0].table_elec_vdw;
2994             }
2995             m = 1;
2996         }
2997         else
2998         {
2999             m = 0;
3000         }
3001         if (negptable > 0)
3002         {
3003             /* Read the special tables for certain energy group pairs */
3004             nm_ind = mtop->groups.grps[egcENER].nm_ind;
3005             for (egi = 0; egi < negp_pp; egi++)
3006             {
3007                 for (egj = egi; egj < negp_pp; egj++)
3008                 {
3009                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3010                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3011                     {
3012                         nbl = &(fr->nblists[m]);
3013                         if (fr->nnblists > 1)
3014                         {
3015                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3016                         }
3017                         /* Read the table file with the two energy groups names appended */
3018                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3019                                         *mtop->groups.grpname[nm_ind[egi]],
3020                                         *mtop->groups.grpname[nm_ind[egj]],
3021                                         &fr->nblists[m]);
3022                         if (ir->adress)
3023                         {
3024                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3025                                             *mtop->groups.grpname[nm_ind[egi]],
3026                                             *mtop->groups.grpname[nm_ind[egj]],
3027                                             &fr->nblists[fr->nnblists/2+m]);
3028                         }
3029                         m++;
3030                     }
3031                     else if (fr->nnblists > 1)
3032                     {
3033                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3034                     }
3035                 }
3036             }
3037         }
3038     }
3039     if (bMakeSeparate14Table)
3040     {
3041         /* generate extra tables with plain Coulomb for 1-4 interactions only */
3042         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3043                                 GMX_MAKETABLES_14ONLY);
3044     }
3045
3046     /* Read AdResS Thermo Force table if needed */
3047     if (fr->adress_icor == eAdressICThermoForce)
3048     {
3049         /* old todo replace */
3050
3051         if (ir->adress->n_tf_grps > 0)
3052         {
3053             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3054
3055         }
3056         else
3057         {
3058             /* load the default table */
3059             snew(fr->atf_tabs, 1);
3060             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3061         }
3062     }
3063
3064     /* Wall stuff */
3065     fr->nwall = ir->nwall;
3066     if (ir->nwall && ir->wall_type == ewtTABLE)
3067     {
3068         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3069     }
3070
3071     if (fcd && tabbfn)
3072     {
3073         fcd->bondtab  = make_bonded_tables(fp,
3074                                            F_TABBONDS, F_TABBONDSNC,
3075                                            mtop, tabbfn, "b");
3076         fcd->angletab = make_bonded_tables(fp,
3077                                            F_TABANGLES, -1,
3078                                            mtop, tabbfn, "a");
3079         fcd->dihtab   = make_bonded_tables(fp,
3080                                            F_TABDIHS, -1,
3081                                            mtop, tabbfn, "d");
3082     }
3083     else
3084     {
3085         if (debug)
3086         {
3087             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3088         }
3089     }
3090
3091     /* QM/MM initialization if requested
3092      */
3093     if (ir->bQMMM)
3094     {
3095         fprintf(stderr, "QM/MM calculation requested.\n");
3096     }
3097
3098     fr->bQMMM      = ir->bQMMM;
3099     fr->qr         = mk_QMMMrec();
3100
3101     /* Set all the static charge group info */
3102     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3103                                    &bFEP_NonBonded,
3104                                    &fr->bExcl_IntraCGAll_InterCGNone);
3105     if (DOMAINDECOMP(cr))
3106     {
3107         fr->cginfo = NULL;
3108     }
3109     else
3110     {
3111         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3112     }
3113
3114     if (!DOMAINDECOMP(cr))
3115     {
3116         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3117                             mtop->natoms, mtop->natoms, mtop->natoms);
3118     }
3119
3120     fr->print_force = print_force;
3121
3122
3123     /* coarse load balancing vars */
3124     fr->t_fnbf    = 0.;
3125     fr->t_wait    = 0.;
3126     fr->timesteps = 0;
3127
3128     /* Initialize neighbor search */
3129     init_ns(fp, cr, &fr->ns, fr, mtop);
3130
3131     if (cr->duty & DUTY_PP)
3132     {
3133         gmx_nonbonded_setup(fr, bGenericKernelOnly);
3134         /*
3135            if (ir->bAdress)
3136             {
3137                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3138             }
3139          */
3140     }
3141
3142     /* Initialize the thread working data for bonded interactions */
3143     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3144
3145     snew(fr->excl_load, fr->nthreads+1);
3146
3147     if (fr->cutoff_scheme == ecutsVERLET)
3148     {
3149         if (ir->rcoulomb != ir->rvdw)
3150         {
3151             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3152         }
3153
3154         init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3155     }
3156
3157     /* fr->ic is used both by verlet and group kernels (to some extent) now */
3158     init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3159
3160     if (ir->eDispCorr != edispcNO)
3161     {
3162         calc_enervirdiff(fp, ir->eDispCorr, fr);
3163     }
3164 }
3165
3166 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3167 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
3168 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3169
3170 void pr_forcerec(FILE *fp, t_forcerec *fr)
3171 {
3172     int i;
3173
3174     pr_real(fp, fr->rlist);
3175     pr_real(fp, fr->rcoulomb);
3176     pr_real(fp, fr->fudgeQQ);
3177     pr_bool(fp, fr->bGrid);
3178     pr_bool(fp, fr->bTwinRange);
3179     /*pr_int(fp,fr->cg0);
3180        pr_int(fp,fr->hcg);*/
3181     for (i = 0; i < fr->nnblists; i++)
3182     {
3183         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3184     }
3185     pr_real(fp, fr->rcoulomb_switch);
3186     pr_real(fp, fr->rcoulomb);
3187
3188     fflush(fp);
3189 }
3190
3191 void forcerec_set_excl_load(t_forcerec           *fr,
3192                             const gmx_localtop_t *top)
3193 {
3194     const int *ind, *a;
3195     int        t, i, j, ntot, n, ntarget;
3196
3197     ind = top->excls.index;
3198     a   = top->excls.a;
3199
3200     ntot = 0;
3201     for (i = 0; i < top->excls.nr; i++)
3202     {
3203         for (j = ind[i]; j < ind[i+1]; j++)
3204         {
3205             if (a[j] > i)
3206             {
3207                 ntot++;
3208             }
3209         }
3210     }
3211
3212     fr->excl_load[0] = 0;
3213     n                = 0;
3214     i                = 0;
3215     for (t = 1; t <= fr->nthreads; t++)
3216     {
3217         ntarget = (ntot*t)/fr->nthreads;
3218         while (i < top->excls.nr && n < ntarget)
3219         {
3220             for (j = ind[i]; j < ind[i+1]; j++)
3221             {
3222                 if (a[j] > i)
3223                 {
3224                     n++;
3225                 }
3226             }
3227             i++;
3228         }
3229         fr->excl_load[t] = i;
3230     }
3231 }