src/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <math.h>
  43 #include <string.h>
  44 #include "assert.h"
  45 #include "sysstuff.h"
  46 #include "typedefs.h"
  47 #include "vec.h"
  48 #include "maths.h"
  49 #include "macros.h"
  50 #include "smalloc.h"
  51 #include "macros.h"
  52 #include "gmx_fatal.h"
  53 #include "gmx_fatal_collective.h"
  54 #include "physics.h"
  55 #include "force.h"
  56 #include "tables.h"
  57 #include "nonbonded.h"
  58 #include "invblock.h"
  59 #include "names.h"
  60 #include "network.h"
  61 #include "pbc.h"
  62 #include "ns.h"
  63 #include "mshift.h"
  64 #include "txtdump.h"
  65 #include "coulomb.h"
  66 #include "md_support.h"
  67 #include "md_logging.h"
  68 #include "domdec.h"
  69 #include "partdec.h"
  70 #include "qmmm.h"
  71 #include "copyrite.h"
  72 #include "mtop_util.h"
  73 #include "nbnxn_search.h"
  74 #include "nbnxn_atomdata.h"
  75 #include "nbnxn_consts.h"
  76 #include "statutil.h"
  77 #include "gmx_omp_nthreads.h"
  78 #include "gmx_detect_hardware.h"
  79
  80 #ifdef _MSC_VER
  81 /* MSVC definition for __cpuid() */
  82 #include <intrin.h>
  83 #endif
  84
  85 #include "types/nbnxn_cuda_types_ext.h"
  86 #include "gpu_utils.h"
  87 #include "nbnxn_cuda_data_mgmt.h"
  88 #include "pmalloc_cuda.h"
  89
  90 t_forcerec *mk_forcerec(void)
  91 {
  92     t_forcerec *fr;
  93
  94     snew(fr, 1);
  95
  96     return fr;
  97 }
  98
  99 #ifdef DEBUG
 100 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
 101 {
 102     int i, j;
 103
 104     for (i = 0; (i < atnr); i++)
 105     {
 106         for (j = 0; (j < atnr); j++)
 107         {
 108             fprintf(fp, "%2d - %2d", i, j);
 109             if (bBHAM)
 110             {
 111                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 112                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 113             }
 114             else
 115             {
 116                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 117                         C12(nbfp, atnr, i, j)/12.0);
 118             }
 119         }
 120     }
 121 }
 122 #endif
 123
 124 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 125 {
 126     real *nbfp;
 127     int   i, j, k, atnr;
 128
 129     atnr = idef->atnr;
 130     if (bBHAM)
 131     {
 132         snew(nbfp, 3*atnr*atnr);
 133         for (i = k = 0; (i < atnr); i++)
 134         {
 135             for (j = 0; (j < atnr); j++, k++)
 136             {
 137                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 138                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 139                 /* nbfp now includes the 6.0 derivative prefactor */
 140                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 141             }
 142         }
 143     }
 144     else
 145     {
 146         snew(nbfp, 2*atnr*atnr);
 147         for (i = k = 0; (i < atnr); i++)
 148         {
 149             for (j = 0; (j < atnr); j++, k++)
 150             {
 151                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 152                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 153                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 154             }
 155         }
 156     }
 157
 158     return nbfp;
 159 }
 160
 161 /* This routine sets fr->solvent_opt to the most common solvent in the
 162  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 163  * the fr->solvent_type array with the correct type (or esolNO).
 164  *
 165  * Charge groups that fulfill the conditions but are not identical to the
 166  * most common one will be marked as esolNO in the solvent_type array.
 167  *
 168  * TIP3p is identical to SPC for these purposes, so we call it
 169  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 170  *
 171  * NOTE: QM particle should not
 172  * become an optimized solvent. Not even if there is only one charge
 173  * group in the Qm
 174  */
 175
 176 typedef struct
 177 {
 178     int    model;
 179     int    count;
 180     int    vdwtype[4];
 181     real   charge[4];
 182 } solvent_parameters_t;
 183
 184 static void
 185 check_solvent_cg(const gmx_moltype_t    *molt,
 186                  int                     cg0,
 187                  int                     nmol,
 188                  const unsigned char    *qm_grpnr,
 189                  const t_grps           *qm_grps,
 190                  t_forcerec   *          fr,
 191                  int                    *n_solvent_parameters,
 192                  solvent_parameters_t  **solvent_parameters_p,
 193                  int                     cginfo,
 194                  int                    *cg_sp)
 195 {
 196     const t_blocka     *  excl;
 197     t_atom               *atom;
 198     int                   j, k;
 199     int                   j0, j1, nj;
 200     gmx_bool              perturbed;
 201     gmx_bool              has_vdw[4];
 202     gmx_bool              match;
 203     real                  tmp_charge[4];
 204     int                   tmp_vdwtype[4];
 205     int                   tjA;
 206     gmx_bool              qm;
 207     solvent_parameters_t *solvent_parameters;
 208
 209     /* We use a list with parameters for each solvent type.
 210      * Every time we discover a new molecule that fulfills the basic
 211      * conditions for a solvent we compare with the previous entries
 212      * in these lists. If the parameters are the same we just increment
 213      * the counter for that type, and otherwise we create a new type
 214      * based on the current molecule.
 215      *
 216      * Once we've finished going through all molecules we check which
 217      * solvent is most common, and mark all those molecules while we
 218      * clear the flag on all others.
 219      */
 220
 221     solvent_parameters = *solvent_parameters_p;
 222
 223     /* Mark the cg first as non optimized */
 224     *cg_sp = -1;
 225
 226     /* Check if this cg has no exclusions with atoms in other charge groups
 227      * and all atoms inside the charge group excluded.
 228      * We only have 3 or 4 atom solvent loops.
 229      */
 230     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 231         !GET_CGINFO_EXCL_INTRA(cginfo))
 232     {
 233         return;
 234     }
 235
 236     /* Get the indices of the first atom in this charge group */
 237     j0     = molt->cgs.index[cg0];
 238     j1     = molt->cgs.index[cg0+1];
 239
 240     /* Number of atoms in our molecule */
 241     nj     = j1 - j0;
 242
 243     if (debug)
 244     {
 245         fprintf(debug,
 246                 "Moltype '%s': there are %d atoms in this charge group\n",
 247                 *molt->name, nj);
 248     }
 249
 250     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 251      * otherwise skip it.
 252      */
 253     if (nj < 3 || nj > 4)
 254     {
 255         return;
 256     }
 257
 258     /* Check if we are doing QM on this group */
 259     qm = FALSE;
 260     if (qm_grpnr != NULL)
 261     {
 262         for (j = j0; j < j1 && !qm; j++)
 263         {
 264             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 265         }
 266     }
 267     /* Cannot use solvent optimization with QM */
 268     if (qm)
 269     {
 270         return;
 271     }
 272
 273     atom = molt->atoms.atom;
 274
 275     /* Still looks like a solvent, time to check parameters */
 276
 277     /* If it is perturbed (free energy) we can't use the solvent loops,
 278      * so then we just skip to the next molecule.
 279      */
 280     perturbed = FALSE;
 281
 282     for (j = j0; j < j1 && !perturbed; j++)
 283     {
 284         perturbed = PERTURBED(atom[j]);
 285     }
 286
 287     if (perturbed)
 288     {
 289         return;
 290     }
 291
 292     /* Now it's only a question if the VdW and charge parameters
 293      * are OK. Before doing the check we compare and see if they are
 294      * identical to a possible previous solvent type.
 295      * First we assign the current types and charges.
 296      */
 297     for (j = 0; j < nj; j++)
 298     {
 299         tmp_vdwtype[j] = atom[j0+j].type;
 300         tmp_charge[j]  = atom[j0+j].q;
 301     }
 302
 303     /* Does it match any previous solvent type? */
 304     for (k = 0; k < *n_solvent_parameters; k++)
 305     {
 306         match = TRUE;
 307
 308
 309         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 310         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 311              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 312         {
 313             match = FALSE;
 314         }
 315
 316         /* Check that types & charges match for all atoms in molecule */
 317         for (j = 0; j < nj && match == TRUE; j++)
 318         {
 319             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 320             {
 321                 match = FALSE;
 322             }
 323             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 324             {
 325                 match = FALSE;
 326             }
 327         }
 328         if (match == TRUE)
 329         {
 330             /* Congratulations! We have a matched solvent.
 331              * Flag it with this type for later processing.
 332              */
 333             *cg_sp = k;
 334             solvent_parameters[k].count += nmol;
 335
 336             /* We are done with this charge group */
 337             return;
 338         }
 339     }
 340
 341     /* If we get here, we have a tentative new solvent type.
 342      * Before we add it we must check that it fulfills the requirements
 343      * of the solvent optimized loops. First determine which atoms have
 344      * VdW interactions.
 345      */
 346     for (j = 0; j < nj; j++)
 347     {
 348         has_vdw[j] = FALSE;
 349         tjA        = tmp_vdwtype[j];
 350
 351         /* Go through all other tpes and see if any have non-zero
 352          * VdW parameters when combined with this one.
 353          */
 354         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 355         {
 356             /* We already checked that the atoms weren't perturbed,
 357              * so we only need to check state A now.
 358              */
 359             if (fr->bBHAM)
 360             {
 361                 has_vdw[j] = (has_vdw[j] ||
 362                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 363                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 364                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 365             }
 366             else
 367             {
 368                 /* Standard LJ */
 369                 has_vdw[j] = (has_vdw[j] ||
 370                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 371                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 372             }
 373         }
 374     }
 375
 376     /* Now we know all we need to make the final check and assignment. */
 377     if (nj == 3)
 378     {
 379         /* So, is it an SPC?
 380          * For this we require thatn all atoms have charge,
 381          * the charges on atom 2 & 3 should be the same, and only
 382          * atom 1 might have VdW.
 383          */
 384         if (has_vdw[1] == FALSE &&
 385             has_vdw[2] == FALSE &&
 386             tmp_charge[0]  != 0 &&
 387             tmp_charge[1]  != 0 &&
 388             tmp_charge[2]  == tmp_charge[1])
 389         {
 390             srenew(solvent_parameters, *n_solvent_parameters+1);
 391             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 392             solvent_parameters[*n_solvent_parameters].count = nmol;
 393             for (k = 0; k < 3; k++)
 394             {
 395                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 396                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 397             }
 398
 399             *cg_sp = *n_solvent_parameters;
 400             (*n_solvent_parameters)++;
 401         }
 402     }
 403     else if (nj == 4)
 404     {
 405         /* Or could it be a TIP4P?
 406          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 407          * Only atom 1 mght have VdW.
 408          */
 409         if (has_vdw[1] == FALSE &&
 410             has_vdw[2] == FALSE &&
 411             has_vdw[3] == FALSE &&
 412             tmp_charge[0]  == 0 &&
 413             tmp_charge[1]  != 0 &&
 414             tmp_charge[2]  == tmp_charge[1] &&
 415             tmp_charge[3]  != 0)
 416         {
 417             srenew(solvent_parameters, *n_solvent_parameters+1);
 418             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 419             solvent_parameters[*n_solvent_parameters].count = nmol;
 420             for (k = 0; k < 4; k++)
 421             {
 422                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 423                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 424             }
 425
 426             *cg_sp = *n_solvent_parameters;
 427             (*n_solvent_parameters)++;
 428         }
 429     }
 430
 431     *solvent_parameters_p = solvent_parameters;
 432 }
 433
 434 static void
 435 check_solvent(FILE  *                fp,
 436               const gmx_mtop_t  *    mtop,
 437               t_forcerec  *          fr,
 438               cginfo_mb_t           *cginfo_mb)
 439 {
 440     const t_block     *   cgs;
 441     const t_block     *   mols;
 442     const gmx_moltype_t  *molt;
 443     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 444     int                   n_solvent_parameters;
 445     solvent_parameters_t *solvent_parameters;
 446     int                 **cg_sp;
 447     int                   bestsp, bestsol;
 448
 449     if (debug)
 450     {
 451         fprintf(debug, "Going to determine what solvent types we have.\n");
 452     }
 453
 454     mols = &mtop->mols;
 455
 456     n_solvent_parameters = 0;
 457     solvent_parameters   = NULL;
 458     /* Allocate temporary array for solvent type */
 459     snew(cg_sp, mtop->nmolblock);
 460
 461     cg_offset = 0;
 462     at_offset = 0;
 463     for (mb = 0; mb < mtop->nmolblock; mb++)
 464     {
 465         molt = &mtop->moltype[mtop->molblock[mb].type];
 466         cgs  = &molt->cgs;
 467         /* Here we have to loop over all individual molecules
 468          * because we need to check for QMMM particles.
 469          */
 470         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 471         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 472         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 473         for (mol = 0; mol < nmol_ch; mol++)
 474         {
 475             cgm = mol*cgs->nr;
 476             am  = mol*cgs->index[cgs->nr];
 477             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 478             {
 479                 check_solvent_cg(molt, cg_mol, nmol,
 480                                  mtop->groups.grpnr[egcQMMM] ?
 481                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 482                                  &mtop->groups.grps[egcQMMM],
 483                                  fr,
 484                                  &n_solvent_parameters, &solvent_parameters,
 485                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 486                                  &cg_sp[mb][cgm+cg_mol]);
 487             }
 488         }
 489         cg_offset += cgs->nr;
 490         at_offset += cgs->index[cgs->nr];
 491     }
 492
 493     /* Puh! We finished going through all charge groups.
 494      * Now find the most common solvent model.
 495      */
 496
 497     /* Most common solvent this far */
 498     bestsp = -2;
 499     for (i = 0; i < n_solvent_parameters; i++)
 500     {
 501         if (bestsp == -2 ||
 502             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 503         {
 504             bestsp = i;
 505         }
 506     }
 507
 508     if (bestsp >= 0)
 509     {
 510         bestsol = solvent_parameters[bestsp].model;
 511     }
 512     else
 513     {
 514         bestsol = esolNO;
 515     }
 516
 517 #ifdef DISABLE_WATER_NLIST
 518     bestsol = esolNO;
 519 #endif
 520
 521     fr->nWatMol = 0;
 522     for (mb = 0; mb < mtop->nmolblock; mb++)
 523     {
 524         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 525         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 526         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 527         {
 528             if (cg_sp[mb][i] == bestsp)
 529             {
 530                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 531                 fr->nWatMol += nmol;
 532             }
 533             else
 534             {
 535                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 536             }
 537         }
 538         sfree(cg_sp[mb]);
 539     }
 540     sfree(cg_sp);
 541
 542     if (bestsol != esolNO && fp != NULL)
 543     {
 544         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 545                 esol_names[bestsol],
 546                 solvent_parameters[bestsp].count);
 547     }
 548
 549     sfree(solvent_parameters);
 550     fr->solvent_opt = bestsol;
 551 }
 552
 553 enum {
 554     acNONE = 0, acCONSTRAINT, acSETTLE
 555 };
 556
 557 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 558                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 559                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 560 {
 561     const t_block        *cgs;
 562     const t_blocka       *excl;
 563     const gmx_moltype_t  *molt;
 564     const gmx_molblock_t *molb;
 565     cginfo_mb_t          *cginfo_mb;
 566     gmx_bool             *type_VDW;
 567     int                  *cginfo;
 568     int                   cg_offset, a_offset, cgm, am;
 569     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 570     int                  *a_con;
 571     int                   ftype;
 572     int                   ia;
 573     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
 574
 575     ncg_tot = ncg_mtop(mtop);
 576     snew(cginfo_mb, mtop->nmolblock);
 577
 578     snew(type_VDW, fr->ntype);
 579     for (ai = 0; ai < fr->ntype; ai++)
 580     {
 581         type_VDW[ai] = FALSE;
 582         for (j = 0; j < fr->ntype; j++)
 583         {
 584             type_VDW[ai] = type_VDW[ai] ||
 585                 fr->bBHAM ||
 586                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 587                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 588         }
 589     }
 590
 591     *bExcl_IntraCGAll_InterCGNone = TRUE;
 592
 593     excl_nalloc = 10;
 594     snew(bExcl, excl_nalloc);
 595     cg_offset = 0;
 596     a_offset  = 0;
 597     for (mb = 0; mb < mtop->nmolblock; mb++)
 598     {
 599         molb = &mtop->molblock[mb];
 600         molt = &mtop->moltype[molb->type];
 601         cgs  = &molt->cgs;
 602         excl = &molt->excls;
 603
 604         /* Check if the cginfo is identical for all molecules in this block.
 605          * If so, we only need an array of the size of one molecule.
 606          * Otherwise we make an array of #mol times #cgs per molecule.
 607          */
 608         bId = TRUE;
 609         am  = 0;
 610         for (m = 0; m < molb->nmol; m++)
 611         {
 612             am = m*cgs->index[cgs->nr];
 613             for (cg = 0; cg < cgs->nr; cg++)
 614             {
 615                 a0 = cgs->index[cg];
 616                 a1 = cgs->index[cg+1];
 617                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 618                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 619                 {
 620                     bId = FALSE;
 621                 }
 622                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 623                 {
 624                     for (ai = a0; ai < a1; ai++)
 625                     {
 626                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 627                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 628                         {
 629                             bId = FALSE;
 630                         }
 631                     }
 632                 }
 633             }
 634         }
 635
 636         cginfo_mb[mb].cg_start = cg_offset;
 637         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 638         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 639         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 640         cginfo = cginfo_mb[mb].cginfo;
 641
 642         /* Set constraints flags for constrained atoms */
 643         snew(a_con, molt->atoms.nr);
 644         for (ftype = 0; ftype < F_NRE; ftype++)
 645         {
 646             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 647             {
 648                 int nral;
 649
 650                 nral = NRAL(ftype);
 651                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 652                 {
 653                     int a;
 654
 655                     for (a = 0; a < nral; a++)
 656                     {
 657                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 658                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 659                     }
 660                 }
 661             }
 662         }
 663
 664         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 665         {
 666             cgm = m*cgs->nr;
 667             am  = m*cgs->index[cgs->nr];
 668             for (cg = 0; cg < cgs->nr; cg++)
 669             {
 670                 a0 = cgs->index[cg];
 671                 a1 = cgs->index[cg+1];
 672
 673                 /* Store the energy group in cginfo */
 674                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 675                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 676
 677                 /* Check the intra/inter charge group exclusions */
 678                 if (a1-a0 > excl_nalloc)
 679                 {
 680                     excl_nalloc = a1 - a0;
 681                     srenew(bExcl, excl_nalloc);
 682                 }
 683                 /* bExclIntraAll: all intra cg interactions excluded
 684                  * bExclInter:    any inter cg interactions excluded
 685                  */
 686                 bExclIntraAll = TRUE;
 687                 bExclInter    = FALSE;
 688                 bHaveVDW      = FALSE;
 689                 bHaveQ        = FALSE;
 690                 for (ai = a0; ai < a1; ai++)
 691                 {
 692                     /* Check VDW and electrostatic interactions */
 693                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 694                                             type_VDW[molt->atoms.atom[ai].typeB]);
 695                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 696                                             molt->atoms.atom[ai].qB != 0);
 697
 698                     /* Clear the exclusion list for atom ai */
 699                     for (aj = a0; aj < a1; aj++)
 700                     {
 701                         bExcl[aj-a0] = FALSE;
 702                     }
 703                     /* Loop over all the exclusions of atom ai */
 704                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 705                     {
 706                         aj = excl->a[j];
 707                         if (aj < a0 || aj >= a1)
 708                         {
 709                             bExclInter = TRUE;
 710                         }
 711                         else
 712                         {
 713                             bExcl[aj-a0] = TRUE;
 714                         }
 715                     }
 716                     /* Check if ai excludes a0 to a1 */
 717                     for (aj = a0; aj < a1; aj++)
 718                     {
 719                         if (!bExcl[aj-a0])
 720                         {
 721                             bExclIntraAll = FALSE;
 722                         }
 723                     }
 724
 725                     switch (a_con[ai])
 726                     {
 727                         case acCONSTRAINT:
 728                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 729                             break;
 730                         case acSETTLE:
 731                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 732                             break;
 733                         default:
 734                             break;
 735                     }
 736                 }
 737                 if (bExclIntraAll)
 738                 {
 739                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 740                 }
 741                 if (bExclInter)
 742                 {
 743                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 744                 }
 745                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 746                 {
 747                     /* The size in cginfo is currently only read with DD */
 748                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 749                 }
 750                 if (bHaveVDW)
 751                 {
 752                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 753                 }
 754                 if (bHaveQ)
 755                 {
 756                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 757                 }
 758                 /* Store the charge group size */
 759                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 760
 761                 if (!bExclIntraAll || bExclInter)
 762                 {
 763                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 764                 }
 765             }
 766         }
 767
 768         sfree(a_con);
 769
 770         cg_offset += molb->nmol*cgs->nr;
 771         a_offset  += molb->nmol*cgs->index[cgs->nr];
 772     }
 773     sfree(bExcl);
 774
 775     /* the solvent optimizer is called after the QM is initialized,
 776      * because we don't want to have the QM subsystemto become an
 777      * optimized solvent
 778      */
 779
 780     check_solvent(fplog, mtop, fr, cginfo_mb);
 781
 782     if (getenv("GMX_NO_SOLV_OPT"))
 783     {
 784         if (fplog)
 785         {
 786             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 787                     "Disabling all solvent optimization\n");
 788         }
 789         fr->solvent_opt = esolNO;
 790     }
 791     if (bNoSolvOpt)
 792     {
 793         fr->solvent_opt = esolNO;
 794     }
 795     if (!fr->solvent_opt)
 796     {
 797         for (mb = 0; mb < mtop->nmolblock; mb++)
 798         {
 799             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 800             {
 801                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 802             }
 803         }
 804     }
 805
 806     return cginfo_mb;
 807 }
 808
 809 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 810 {
 811     int  ncg, mb, cg;
 812     int *cginfo;
 813
 814     ncg = cgi_mb[nmb-1].cg_end;
 815     snew(cginfo, ncg);
 816     mb = 0;
 817     for (cg = 0; cg < ncg; cg++)
 818     {
 819         while (cg >= cgi_mb[mb].cg_end)
 820         {
 821             mb++;
 822         }
 823         cginfo[cg] =
 824             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 825     }
 826
 827     return cginfo;
 828 }
 829
 830 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 831 {
 832     double         qsum, q2sum, q;
 833     int            mb, nmol, i;
 834     const t_atoms *atoms;
 835
 836     qsum  = 0;
 837     q2sum = 0;
 838     for (mb = 0; mb < mtop->nmolblock; mb++)
 839     {
 840         nmol  = mtop->molblock[mb].nmol;
 841         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 842         for (i = 0; i < atoms->nr; i++)
 843         {
 844             q      = atoms->atom[i].q;
 845             qsum  += nmol*q;
 846             q2sum += nmol*q*q;
 847         }
 848     }
 849     fr->qsum[0]  = qsum;
 850     fr->q2sum[0] = q2sum;
 851     if (fr->efep != efepNO)
 852     {
 853         qsum  = 0;
 854         q2sum = 0;
 855         for (mb = 0; mb < mtop->nmolblock; mb++)
 856         {
 857             nmol  = mtop->molblock[mb].nmol;
 858             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 859             for (i = 0; i < atoms->nr; i++)
 860             {
 861                 q      = atoms->atom[i].qB;
 862                 qsum  += nmol*q;
 863                 q2sum += nmol*q*q;
 864             }
 865             fr->qsum[1]  = qsum;
 866             fr->q2sum[1] = q2sum;
 867         }
 868     }
 869     else
 870     {
 871         fr->qsum[1]  = fr->qsum[0];
 872         fr->q2sum[1] = fr->q2sum[0];
 873     }
 874     if (log)
 875     {
 876         if (fr->efep == efepNO)
 877         {
 878             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 879         }
 880         else
 881         {
 882             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 883                     fr->qsum[0], fr->qsum[1]);
 884         }
 885     }
 886 }
 887
 888 void update_forcerec(FILE *log, t_forcerec *fr, matrix box)
 889 {
 890     if (fr->eeltype == eelGRF)
 891     {
 892         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 893                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 894                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 895     }
 896 }
 897
 898 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 899 {
 900     const t_atoms  *atoms, *atoms_tpi;
 901     const t_blocka *excl;
 902     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 903 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 904     long long int   npair, npair_ij, tmpi, tmpj;
 905 #else
 906     double          npair, npair_ij, tmpi, tmpj;
 907 #endif
 908     double          csix, ctwelve;
 909     int             ntp, *typecount;
 910     gmx_bool        bBHAM;
 911     real           *nbfp;
 912
 913     ntp   = fr->ntype;
 914     bBHAM = fr->bBHAM;
 915     nbfp  = fr->nbfp;
 916
 917     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 918     {
 919         csix    = 0;
 920         ctwelve = 0;
 921         npair   = 0;
 922         nexcl   = 0;
 923         if (!fr->n_tpi)
 924         {
 925             /* Count the types so we avoid natoms^2 operations */
 926             snew(typecount, ntp);
 927             for (mb = 0; mb < mtop->nmolblock; mb++)
 928             {
 929                 nmol  = mtop->molblock[mb].nmol;
 930                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 931                 for (i = 0; i < atoms->nr; i++)
 932                 {
 933                     if (q == 0)
 934                     {
 935                         tpi = atoms->atom[i].type;
 936                     }
 937                     else
 938                     {
 939                         tpi = atoms->atom[i].typeB;
 940                     }
 941                     typecount[tpi] += nmol;
 942                 }
 943             }
 944             for (tpi = 0; tpi < ntp; tpi++)
 945             {
 946                 for (tpj = tpi; tpj < ntp; tpj++)
 947                 {
 948                     tmpi = typecount[tpi];
 949                     tmpj = typecount[tpj];
 950                     if (tpi != tpj)
 951                     {
 952                         npair_ij = tmpi*tmpj;
 953                     }
 954                     else
 955                     {
 956                         npair_ij = tmpi*(tmpi - 1)/2;
 957                     }
 958                     if (bBHAM)
 959                     {
 960                         /* nbfp now includes the 6.0 derivative prefactor */
 961                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 962                     }
 963                     else
 964                     {
 965                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
 966                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
 967                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
 968                     }
 969                     npair += npair_ij;
 970                 }
 971             }
 972             sfree(typecount);
 973             /* Subtract the excluded pairs.
 974              * The main reason for substracting exclusions is that in some cases
 975              * some combinations might never occur and the parameters could have
 976              * any value. These unused values should not influence the dispersion
 977              * correction.
 978              */
 979             for (mb = 0; mb < mtop->nmolblock; mb++)
 980             {
 981                 nmol  = mtop->molblock[mb].nmol;
 982                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 983                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 984                 for (i = 0; (i < atoms->nr); i++)
 985                 {
 986                     if (q == 0)
 987                     {
 988                         tpi = atoms->atom[i].type;
 989                     }
 990                     else
 991                     {
 992                         tpi = atoms->atom[i].typeB;
 993                     }
 994                     j1  = excl->index[i];
 995                     j2  = excl->index[i+1];
 996                     for (j = j1; j < j2; j++)
 997                     {
 998                         k = excl->a[j];
 999                         if (k > i)
1000                         {
1001                             if (q == 0)
1002                             {
1003                                 tpj = atoms->atom[k].type;
1004                             }
1005                             else
1006                             {
1007                                 tpj = atoms->atom[k].typeB;
1008                             }
1009                             if (bBHAM)
1010                             {
1011                                 /* nbfp now includes the 6.0 derivative prefactor */
1012                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1013                             }
1014                             else
1015                             {
1016                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1017                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1018                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1019                             }
1020                             nexcl += nmol;
1021                         }
1022                     }
1023                 }
1024             }
1025         }
1026         else
1027         {
1028             /* Only correct for the interaction of the test particle
1029              * with the rest of the system.
1030              */
1031             atoms_tpi =
1032                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1033
1034             npair = 0;
1035             for (mb = 0; mb < mtop->nmolblock; mb++)
1036             {
1037                 nmol  = mtop->molblock[mb].nmol;
1038                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1039                 for (j = 0; j < atoms->nr; j++)
1040                 {
1041                     nmolc = nmol;
1042                     /* Remove the interaction of the test charge group
1043                      * with itself.
1044                      */
1045                     if (mb == mtop->nmolblock-1)
1046                     {
1047                         nmolc--;
1048
1049                         if (mb == 0 && nmol == 1)
1050                         {
1051                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1052                         }
1053                     }
1054                     if (q == 0)
1055                     {
1056                         tpj = atoms->atom[j].type;
1057                     }
1058                     else
1059                     {
1060                         tpj = atoms->atom[j].typeB;
1061                     }
1062                     for (i = 0; i < fr->n_tpi; i++)
1063                     {
1064                         if (q == 0)
1065                         {
1066                             tpi = atoms_tpi->atom[i].type;
1067                         }
1068                         else
1069                         {
1070                             tpi = atoms_tpi->atom[i].typeB;
1071                         }
1072                         if (bBHAM)
1073                         {
1074                             /* nbfp now includes the 6.0 derivative prefactor */
1075                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1076                         }
1077                         else
1078                         {
1079                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1080                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1081                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1082                         }
1083                         npair += nmolc;
1084                     }
1085                 }
1086             }
1087         }
1088         if (npair - nexcl <= 0 && fplog)
1089         {
1090             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1091             csix     = 0;
1092             ctwelve  = 0;
1093         }
1094         else
1095         {
1096             csix    /= npair - nexcl;
1097             ctwelve /= npair - nexcl;
1098         }
1099         if (debug)
1100         {
1101             fprintf(debug, "Counted %d exclusions\n", nexcl);
1102             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1103             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1104         }
1105         fr->avcsix[q]    = csix;
1106         fr->avctwelve[q] = ctwelve;
1107     }
1108     if (fplog != NULL)
1109     {
1110         if (fr->eDispCorr == edispcAllEner ||
1111             fr->eDispCorr == edispcAllEnerPres)
1112         {
1113             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1114                     fr->avcsix[0], fr->avctwelve[0]);
1115         }
1116         else
1117         {
1118             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1119         }
1120     }
1121 }
1122
1123
1124 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1125                            const gmx_mtop_t *mtop)
1126 {
1127     const t_atoms *at1, *at2;
1128     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1129     real           b, bmin;
1130     real          *nbfp;
1131
1132     if (fplog)
1133     {
1134         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1135     }
1136     nbfp   = fr->nbfp;
1137     ntypes = fr->ntype;
1138
1139     bmin           = -1;
1140     fr->bham_b_max = 0;
1141     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1142     {
1143         at1 = &mtop->moltype[mt1].atoms;
1144         for (i = 0; (i < at1->nr); i++)
1145         {
1146             tpi = at1->atom[i].type;
1147             if (tpi >= ntypes)
1148             {
1149                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1150             }
1151
1152             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1153             {
1154                 at2 = &mtop->moltype[mt2].atoms;
1155                 for (j = 0; (j < at2->nr); j++)
1156                 {
1157                     tpj = at2->atom[j].type;
1158                     if (tpj >= ntypes)
1159                     {
1160                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1161                     }
1162                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1163                     if (b > fr->bham_b_max)
1164                     {
1165                         fr->bham_b_max = b;
1166                     }
1167                     if ((b < bmin) || (bmin == -1))
1168                     {
1169                         bmin = b;
1170                     }
1171                 }
1172             }
1173         }
1174     }
1175     if (fplog)
1176     {
1177         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1178                 bmin, fr->bham_b_max);
1179     }
1180 }
1181
1182 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1183                             t_forcerec *fr, real rtab,
1184                             const t_commrec *cr,
1185                             const char *tabfn, char *eg1, char *eg2,
1186                             t_nblists *nbl)
1187 {
1188     char buf[STRLEN];
1189     int  i, j;
1190
1191     if (tabfn == NULL)
1192     {
1193         if (debug)
1194         {
1195             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1196         }
1197         return;
1198     }
1199
1200     sprintf(buf, "%s", tabfn);
1201     if (eg1 && eg2)
1202     {
1203         /* Append the two energy group names */
1204         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1205                 eg1, eg2, ftp2ext(efXVG));
1206     }
1207     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1208     /* Copy the contents of the table to separate coulomb and LJ tables too,
1209      * to improve cache performance.
1210      */
1211     /* For performance reasons we want
1212      * the table data to be aligned to 16-byte. The pointers could be freed
1213      * but currently aren't.
1214      */
1215     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1216     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1217     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1218     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1219     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1220     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1221     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1222     nbl->table_elec.ninteractions = 1;
1223     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1224     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1225
1226     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1227     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1228     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1229     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1230     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1231     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1232     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1233     nbl->table_vdw.ninteractions = 2;
1234     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1235     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1236
1237     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1238     {
1239         for (j = 0; j < 4; j++)
1240         {
1241             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1242         }
1243         for (j = 0; j < 8; j++)
1244         {
1245             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1246         }
1247     }
1248 }
1249
1250 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1251                          int *ncount, int **count)
1252 {
1253     const gmx_moltype_t *molt;
1254     const t_ilist       *il;
1255     int                  mt, ftype, stride, i, j, tabnr;
1256
1257     for (mt = 0; mt < mtop->nmoltype; mt++)
1258     {
1259         molt = &mtop->moltype[mt];
1260         for (ftype = 0; ftype < F_NRE; ftype++)
1261         {
1262             if (ftype == ftype1 || ftype == ftype2)
1263             {
1264                 il     = &molt->ilist[ftype];
1265                 stride = 1 + NRAL(ftype);
1266                 for (i = 0; i < il->nr; i += stride)
1267                 {
1268                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1269                     if (tabnr < 0)
1270                     {
1271                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1272                     }
1273                     if (tabnr >= *ncount)
1274                     {
1275                         srenew(*count, tabnr+1);
1276                         for (j = *ncount; j < tabnr+1; j++)
1277                         {
1278                             (*count)[j] = 0;
1279                         }
1280                         *ncount = tabnr+1;
1281                     }
1282                     (*count)[tabnr]++;
1283                 }
1284             }
1285         }
1286     }
1287 }
1288
1289 static bondedtable_t *make_bonded_tables(FILE *fplog,
1290                                          int ftype1, int ftype2,
1291                                          const gmx_mtop_t *mtop,
1292                                          const char *basefn, const char *tabext)
1293 {
1294     int            i, ncount, *count;
1295     char           tabfn[STRLEN];
1296     bondedtable_t *tab;
1297
1298     tab = NULL;
1299
1300     ncount = 0;
1301     count  = NULL;
1302     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1303
1304     if (ncount > 0)
1305     {
1306         snew(tab, ncount);
1307         for (i = 0; i < ncount; i++)
1308         {
1309             if (count[i] > 0)
1310             {
1311                 sprintf(tabfn, "%s", basefn);
1312                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1313                         tabext, i, ftp2ext(efXVG));
1314                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1315             }
1316         }
1317         sfree(count);
1318     }
1319
1320     return tab;
1321 }
1322
1323 void forcerec_set_ranges(t_forcerec *fr,
1324                          int ncg_home, int ncg_force,
1325                          int natoms_force,
1326                          int natoms_force_constr, int natoms_f_novirsum)
1327 {
1328     fr->cg0 = 0;
1329     fr->hcg = ncg_home;
1330
1331     /* fr->ncg_force is unused in the standard code,
1332      * but it can be useful for modified code dealing with charge groups.
1333      */
1334     fr->ncg_force           = ncg_force;
1335     fr->natoms_force        = natoms_force;
1336     fr->natoms_force_constr = natoms_force_constr;
1337
1338     if (fr->natoms_force_constr > fr->nalloc_force)
1339     {
1340         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1341
1342         if (fr->bTwinRange)
1343         {
1344             srenew(fr->f_twin, fr->nalloc_force);
1345         }
1346     }
1347
1348     if (fr->bF_NoVirSum)
1349     {
1350         fr->f_novirsum_n = natoms_f_novirsum;
1351         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1352         {
1353             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1354             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1355         }
1356     }
1357     else
1358     {
1359         fr->f_novirsum_n = 0;
1360     }
1361 }
1362
1363 static real cutoff_inf(real cutoff)
1364 {
1365     if (cutoff == 0)
1366     {
1367         cutoff = GMX_CUTOFF_INF;
1368     }
1369
1370     return cutoff;
1371 }
1372
1373 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1374                                   t_forcerec *fr, const t_inputrec *ir,
1375                                   const char *tabfn, const gmx_mtop_t *mtop,
1376                                   matrix     box)
1377 {
1378     char buf[STRLEN];
1379     int  i, j;
1380
1381     if (tabfn == NULL)
1382     {
1383         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1384         return;
1385     }
1386
1387     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1388
1389     sprintf(buf, "%s", tabfn);
1390     for (i = 0; i < ir->adress->n_tf_grps; i++)
1391     {
1392         j = ir->adress->tf_table_index[i]; /* get energy group index */
1393         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1394                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1395         if (fp)
1396         {
1397             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1398         }
1399         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1400     }
1401
1402 }
1403
1404 gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
1405                           gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1406 {
1407     gmx_bool bAllvsAll;
1408
1409     bAllvsAll =
1410         (
1411             ir->rlist == 0            &&
1412             ir->rcoulomb == 0         &&
1413             ir->rvdw == 0             &&
1414             ir->ePBC == epbcNONE      &&
1415             ir->vdwtype == evdwCUT    &&
1416             ir->coulombtype == eelCUT &&
1417             ir->efep == efepNO        &&
1418             (ir->implicit_solvent == eisNO ||
1419              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1420                                                   ir->gb_algorithm == egbHCT   ||
1421                                                   ir->gb_algorithm == egbOBC))) &&
1422             getenv("GMX_NO_ALLVSALL") == NULL
1423         );
1424
1425     if (bAllvsAll && ir->opts.ngener > 1)
1426     {
1427         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1428
1429         if (bPrintNote)
1430         {
1431             if (MASTER(cr))
1432             {
1433                 fprintf(stderr, "\n%s\n", note);
1434             }
1435             if (fp != NULL)
1436             {
1437                 fprintf(fp, "\n%s\n", note);
1438             }
1439         }
1440         bAllvsAll = FALSE;
1441     }
1442
1443     if (bAllvsAll && fp && MASTER(cr))
1444     {
1445         fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1446     }
1447
1448     return bAllvsAll;
1449 }
1450
1451
1452 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1453 {
1454     int t, i;
1455
1456     /* These thread local data structures are used for bondeds only */
1457     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1458
1459     if (fr->nthreads > 1)
1460     {
1461         snew(fr->f_t, fr->nthreads);
1462         /* Thread 0 uses the global force and energy arrays */
1463         for (t = 1; t < fr->nthreads; t++)
1464         {
1465             fr->f_t[t].f        = NULL;
1466             fr->f_t[t].f_nalloc = 0;
1467             snew(fr->f_t[t].fshift, SHIFTS);
1468             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1469             for (i = 0; i < egNR; i++)
1470             {
1471                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1472             }
1473         }
1474     }
1475 }
1476
1477
1478 static void pick_nbnxn_kernel_cpu(FILE             *fp,
1479                                   const t_commrec  *cr,
1480                                   const gmx_cpuid_t cpuid_info,
1481                                   const t_inputrec *ir,
1482                                   int              *kernel_type,
1483                                   int              *ewald_excl)
1484 {
1485     *kernel_type = nbnxnk4x4_PlainC;
1486     *ewald_excl  = ewaldexclTable;
1487
1488 #ifdef GMX_NBNXN_SIMD
1489     {
1490 #ifdef GMX_NBNXN_SIMD_4XN
1491         *kernel_type = nbnxnk4xN_SIMD_4xN;
1492 #endif
1493 #ifdef GMX_NBNXN_SIMD_2XNN
1494         /* We expect the 2xNN kernels to be faster in most cases */
1495         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1496 #endif
1497
1498 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1499         if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1500         {
1501             /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1502              * 10% with HT, 50% without HT, but extra zeros interactions
1503              * can compensate. As we currently don't detect the actual use
1504              * of HT, switch to 4x8 to avoid a potential performance hit.
1505              */
1506             *kernel_type = nbnxnk4xN_SIMD_4xN;
1507         }
1508 #endif
1509         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1510         {
1511 #ifdef GMX_NBNXN_SIMD_4XN
1512             *kernel_type = nbnxnk4xN_SIMD_4xN;
1513 #else
1514             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1515 #endif
1516         }
1517         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1518         {
1519 #ifdef GMX_NBNXN_SIMD_2XNN
1520             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1521 #else
1522             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1523 #endif
1524         }
1525
1526         /* Analytical Ewald exclusion correction is only an option in
1527          * the SIMD kernel. On BlueGene/Q, this is faster regardless
1528          * of precision. In single precision, this is faster on
1529          * Bulldozer, and slightly faster on Sandy Bridge.
1530          */
1531 #if ((defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE) || (defined GMX_CPU_ACCELERATION_IBM_QPX)
1532         *ewald_excl = ewaldexclAnalytical;
1533 #endif
1534         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1535         {
1536             *ewald_excl = ewaldexclTable;
1537         }
1538         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1539         {
1540             *ewald_excl = ewaldexclAnalytical;
1541         }
1542
1543     }
1544 #endif /* GMX_NBNXN_SIMD */
1545 }
1546
1547
1548 const char *lookup_nbnxn_kernel_name(int kernel_type)
1549 {
1550     const char *returnvalue = NULL;
1551     switch (kernel_type)
1552     {
1553         case nbnxnkNotSet:
1554             returnvalue = "not set";
1555             break;
1556         case nbnxnk4x4_PlainC:
1557             returnvalue = "plain C";
1558             break;
1559         case nbnxnk4xN_SIMD_4xN:
1560         case nbnxnk4xN_SIMD_2xNN:
1561 #ifdef GMX_NBNXN_SIMD
1562 #ifdef GMX_X86_SSE2
1563             /* We have x86 SSE2 compatible SIMD */
1564 #ifdef GMX_X86_AVX_128_FMA
1565             returnvalue = "AVX-128-FMA";
1566 #else
1567 #if defined GMX_X86_AVX_256 || defined __AVX__
1568             /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1569              * on compiler flags. As we use nearly identical intrinsics,
1570              * compiling for AVX without an AVX macros effectively results
1571              * in AVX kernels.
1572              * For gcc we check for __AVX__
1573              * At least a check for icc should be added (if there is a macro)
1574              */
1575 #if defined GMX_X86_AVX_256 && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1576             returnvalue = "AVX-256";
1577 #else
1578             returnvalue = "AVX-128";
1579 #endif
1580 #else
1581 #ifdef GMX_X86_SSE4_1
1582             returnvalue  = "SSE4.1";
1583 #else
1584             returnvalue  = "SSE2";
1585 #endif
1586 #endif
1587 #endif
1588 #else   /* GMX_X86_SSE2 */
1589             /* not GMX_X86_SSE2, but other SIMD */
1590             returnvalue  = "SIMD";
1591 #endif /* GMX_X86_SSE2 */
1592 #else  /* GMX_NBNXN_SIMD */
1593             returnvalue = "not available";
1594 #endif /* GMX_NBNXN_SIMD */
1595             break;
1596         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1597         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1598
1599         case nbnxnkNR:
1600         default:
1601             gmx_fatal(FARGS, "Illegal kernel type selected");
1602             returnvalue = NULL;
1603             break;
1604     }
1605     return returnvalue;
1606 };
1607
1608 static void pick_nbnxn_kernel(FILE                *fp,
1609                               const t_commrec     *cr,
1610                               const gmx_hw_info_t *hwinfo,
1611                               gmx_bool             use_cpu_acceleration,
1612                               gmx_bool             bUseGPU,
1613                               gmx_bool             bEmulateGPU,
1614                               const t_inputrec    *ir,
1615                               int                 *kernel_type,
1616                               int                 *ewald_excl,
1617                               gmx_bool             bDoNonbonded)
1618 {
1619     assert(kernel_type);
1620
1621     *kernel_type = nbnxnkNotSet;
1622     *ewald_excl  = ewaldexclTable;
1623
1624     if (bEmulateGPU)
1625     {
1626         *kernel_type = nbnxnk8x8x8_PlainC;
1627
1628         if (bDoNonbonded)
1629         {
1630             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1631         }
1632     }
1633     else if (bUseGPU)
1634     {
1635         *kernel_type = nbnxnk8x8x8_CUDA;
1636     }
1637
1638     if (*kernel_type == nbnxnkNotSet)
1639     {
1640         if (use_cpu_acceleration)
1641         {
1642             pick_nbnxn_kernel_cpu(fp, cr, hwinfo->cpuid_info, ir,
1643                                   kernel_type, ewald_excl);
1644         }
1645         else
1646         {
1647             *kernel_type = nbnxnk4x4_PlainC;
1648         }
1649     }
1650
1651     if (bDoNonbonded && fp != NULL)
1652     {
1653         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1654                 lookup_nbnxn_kernel_name(*kernel_type),
1655                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1656                 nbnxn_kernel_to_cj_size(*kernel_type));
1657     }
1658 }
1659
1660 static void pick_nbnxn_resources(FILE                *fp,
1661                                  const t_commrec     *cr,
1662                                  const gmx_hw_info_t *hwinfo,
1663                                  gmx_bool             bDoNonbonded,
1664                                  gmx_bool            *bUseGPU,
1665                                  gmx_bool            *bEmulateGPU)
1666 {
1667     gmx_bool bEmulateGPUEnvVarSet;
1668     char     gpu_err_str[STRLEN];
1669
1670     *bUseGPU = FALSE;
1671
1672     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1673
1674     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1675      * GPUs (currently) only handle non-bonded calculations, we will
1676      * automatically switch to emulation if non-bonded calculations are
1677      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1678      * way to turn off GPU initialization, data movement, and cleanup.
1679      *
1680      * GPU emulation can be useful to assess the performance one can expect by
1681      * adding GPU(s) to the machine. The conditional below allows this even
1682      * if mdrun is compiled without GPU acceleration support.
1683      * Note that you should freezing the system as otherwise it will explode.
1684      */
1685     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1686                     (!bDoNonbonded && hwinfo->bCanUseGPU));
1687
1688     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1689      */
1690     if (hwinfo->bCanUseGPU && !(*bEmulateGPU))
1691     {
1692         /* Each PP node will use the intra-node id-th device from the
1693          * list of detected/selected GPUs. */
1694         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, &hwinfo->gpu_info))
1695         {
1696             /* At this point the init should never fail as we made sure that
1697              * we have all the GPUs we need. If it still does, we'll bail. */
1698             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1699                       cr->nodeid,
1700                       get_gpu_device_id(&hwinfo->gpu_info, cr->rank_pp_intranode),
1701                       gpu_err_str);
1702         }
1703
1704         /* Here we actually turn on hardware GPU acceleration */
1705         *bUseGPU = TRUE;
1706     }
1707 }
1708
1709 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1710                             nonbonded_verlet_t *nbv,
1711                             int                 group)
1712 {
1713     gmx_bool bUsesSimpleTables = TRUE;
1714     int      grp_index;
1715
1716     switch (cutoff_scheme)
1717     {
1718         case ecutsGROUP:
1719             bUsesSimpleTables = TRUE;
1720             break;
1721         case ecutsVERLET:
1722             assert(NULL != nbv && NULL != nbv->grp);
1723             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1724             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1725             break;
1726         default:
1727             gmx_incons("unimplemented");
1728     }
1729     return bUsesSimpleTables;
1730 }
1731
1732 static void init_ewald_f_table(interaction_const_t *ic,
1733                                gmx_bool             bUsesSimpleTables,
1734                                real                 rtab)
1735 {
1736     real maxr;
1737
1738     if (bUsesSimpleTables)
1739     {
1740         /* With a spacing of 0.0005 we are at the force summation accuracy
1741          * for the SSE kernels for "normal" atomistic simulations.
1742          */
1743         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
1744                                                    ic->rcoulomb);
1745
1746         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1747         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1748     }
1749     else
1750     {
1751         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1752         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1753         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1754     }
1755
1756     sfree_aligned(ic->tabq_coul_FDV0);
1757     sfree_aligned(ic->tabq_coul_F);
1758     sfree_aligned(ic->tabq_coul_V);
1759
1760     /* Create the original table data in FDV0 */
1761     snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1762     snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1763     snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1764     table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1765                                 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff);
1766 }
1767
1768 void init_interaction_const_tables(FILE                *fp,
1769                                    interaction_const_t *ic,
1770                                    gmx_bool             bUsesSimpleTables,
1771                                    real                 rtab)
1772 {
1773     real spacing;
1774
1775     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1776     {
1777         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1778
1779         if (fp != NULL)
1780         {
1781             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1782                     1/ic->tabq_scale, ic->tabq_size);
1783         }
1784     }
1785 }
1786
1787 void init_interaction_const(FILE                 *fp,
1788                             interaction_const_t **interaction_const,
1789                             const t_forcerec     *fr,
1790                             real                  rtab)
1791 {
1792     interaction_const_t *ic;
1793     gmx_bool             bUsesSimpleTables = TRUE;
1794
1795     snew(ic, 1);
1796
1797     /* Just allocate something so we can free it */
1798     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1799     snew_aligned(ic->tabq_coul_F, 16, 32);
1800     snew_aligned(ic->tabq_coul_V, 16, 32);
1801
1802     ic->rlist       = fr->rlist;
1803     ic->rlistlong   = fr->rlistlong;
1804
1805     /* Lennard-Jones */
1806     ic->rvdw        = fr->rvdw;
1807     if (fr->vdw_modifier == eintmodPOTSHIFT)
1808     {
1809         ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1810     }
1811     else
1812     {
1813         ic->sh_invrc6 = 0;
1814     }
1815
1816     /* Electrostatics */
1817     ic->eeltype     = fr->eeltype;
1818     ic->rcoulomb    = fr->rcoulomb;
1819     ic->epsilon_r   = fr->epsilon_r;
1820     ic->epsfac      = fr->epsfac;
1821
1822     /* Ewald */
1823     ic->ewaldcoeff  = fr->ewaldcoeff;
1824     if (fr->coulomb_modifier == eintmodPOTSHIFT)
1825     {
1826         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
1827     }
1828     else
1829     {
1830         ic->sh_ewald = 0;
1831     }
1832
1833     /* Reaction-field */
1834     if (EEL_RF(ic->eeltype))
1835     {
1836         ic->epsilon_rf = fr->epsilon_rf;
1837         ic->k_rf       = fr->k_rf;
1838         ic->c_rf       = fr->c_rf;
1839     }
1840     else
1841     {
1842         /* For plain cut-off we might use the reaction-field kernels */
1843         ic->epsilon_rf = ic->epsilon_r;
1844         ic->k_rf       = 0;
1845         if (fr->coulomb_modifier == eintmodPOTSHIFT)
1846         {
1847             ic->c_rf   = 1/ic->rcoulomb;
1848         }
1849         else
1850         {
1851             ic->c_rf   = 0;
1852         }
1853     }
1854
1855     if (fp != NULL)
1856     {
1857         fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1858                 sqr(ic->sh_invrc6), ic->sh_invrc6);
1859         if (ic->eeltype == eelCUT)
1860         {
1861             fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1862         }
1863         else if (EEL_PME(ic->eeltype))
1864         {
1865             fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1866         }
1867         fprintf(fp, "\n");
1868     }
1869
1870     *interaction_const = ic;
1871
1872     if (fr->nbv != NULL && fr->nbv->bUseGPU)
1873     {
1874         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1875     }
1876
1877     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1878     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1879 }
1880
1881 static void init_nb_verlet(FILE                *fp,
1882                            nonbonded_verlet_t **nb_verlet,
1883                            const t_inputrec    *ir,
1884                            const t_forcerec    *fr,
1885                            const t_commrec     *cr,
1886                            const char          *nbpu_opt)
1887 {
1888     nonbonded_verlet_t *nbv;
1889     int                 i;
1890     char               *env;
1891     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
1892
1893     nbnxn_alloc_t      *nb_alloc;
1894     nbnxn_free_t       *nb_free;
1895
1896     snew(nbv, 1);
1897
1898     pick_nbnxn_resources(fp, cr, fr->hwinfo,
1899                          fr->bNonbonded,
1900                          &nbv->bUseGPU,
1901                          &bEmulateGPU);
1902
1903     nbv->nbs = NULL;
1904
1905     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1906     for (i = 0; i < nbv->ngrp; i++)
1907     {
1908         nbv->grp[i].nbl_lists.nnbl = 0;
1909         nbv->grp[i].nbat           = NULL;
1910         nbv->grp[i].kernel_type    = nbnxnkNotSet;
1911
1912         if (i == 0) /* local */
1913         {
1914             pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1915                               nbv->bUseGPU, bEmulateGPU,
1916                               ir,
1917                               &nbv->grp[i].kernel_type,
1918                               &nbv->grp[i].ewald_excl,
1919                               fr->bNonbonded);
1920         }
1921         else /* non-local */
1922         {
1923             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
1924             {
1925                 /* Use GPU for local, select a CPU kernel for non-local */
1926                 pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1927                                   FALSE, FALSE,
1928                                   ir,
1929                                   &nbv->grp[i].kernel_type,
1930                                   &nbv->grp[i].ewald_excl,
1931                                   fr->bNonbonded);
1932
1933                 bHybridGPURun = TRUE;
1934             }
1935             else
1936             {
1937                 /* Use the same kernel for local and non-local interactions */
1938                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
1939                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
1940             }
1941         }
1942     }
1943
1944     if (nbv->bUseGPU)
1945     {
1946         /* init the NxN GPU data; the last argument tells whether we'll have
1947          * both local and non-local NB calculation on GPU */
1948         nbnxn_cuda_init(fp, &nbv->cu_nbv,
1949                         &fr->hwinfo->gpu_info, cr->rank_pp_intranode,
1950                         (nbv->ngrp > 1) && !bHybridGPURun);
1951
1952         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
1953         {
1954             char *end;
1955
1956             nbv->min_ci_balanced = strtol(env, &end, 10);
1957             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
1958             {
1959                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
1960             }
1961
1962             if (debug)
1963             {
1964                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
1965                         nbv->min_ci_balanced);
1966             }
1967         }
1968         else
1969         {
1970             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
1971             if (debug)
1972             {
1973                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
1974                         nbv->min_ci_balanced);
1975             }
1976         }
1977     }
1978     else
1979     {
1980         nbv->min_ci_balanced = 0;
1981     }
1982
1983     *nb_verlet = nbv;
1984
1985     nbnxn_init_search(&nbv->nbs,
1986                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
1987                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
1988                       gmx_omp_nthreads_get(emntNonbonded));
1989
1990     for (i = 0; i < nbv->ngrp; i++)
1991     {
1992         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
1993         {
1994             nb_alloc = &pmalloc;
1995             nb_free  = &pfree;
1996         }
1997         else
1998         {
1999             nb_alloc = NULL;
2000             nb_free  = NULL;
2001         }
2002
2003         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2004                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2005                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2006                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2007                                 nb_alloc, nb_free);
2008
2009         if (i == 0 ||
2010             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2011         {
2012             snew(nbv->grp[i].nbat, 1);
2013             nbnxn_atomdata_init(fp,
2014                                 nbv->grp[i].nbat,
2015                                 nbv->grp[i].kernel_type,
2016                                 fr->ntype, fr->nbfp,
2017                                 ir->opts.ngener,
2018                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2019                                 nb_alloc, nb_free);
2020         }
2021         else
2022         {
2023             nbv->grp[i].nbat = nbv->grp[0].nbat;
2024         }
2025     }
2026 }
2027
2028 void init_forcerec(FILE              *fp,
2029                    const output_env_t oenv,
2030                    t_forcerec        *fr,
2031                    t_fcdata          *fcd,
2032                    const t_inputrec  *ir,
2033                    const gmx_mtop_t  *mtop,
2034                    const t_commrec   *cr,
2035                    matrix             box,
2036                    gmx_bool           bMolEpot,
2037                    const char        *tabfn,
2038                    const char        *tabafn,
2039                    const char        *tabpfn,
2040                    const char        *tabbfn,
2041                    const char        *nbpu_opt,
2042                    gmx_bool           bNoSolvOpt,
2043                    real               print_force)
2044 {
2045     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2046     real           rtab;
2047     char          *env;
2048     double         dbl;
2049     rvec           box_size;
2050     const t_block *cgs;
2051     gmx_bool       bGenericKernelOnly;
2052     gmx_bool       bTab, bSep14tab, bNormalnblists;
2053     t_nblists     *nbl;
2054     int           *nm_ind, egp_flags;
2055
2056     if (fr->hwinfo == NULL)
2057     {
2058         /* Detect hardware, gather information.
2059          * In mdrun, hwinfo has already been set before calling init_forcerec.
2060          * Here we ignore GPUs, as tools will not use them anyhow.
2061          */
2062         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE, FALSE, NULL);
2063     }
2064
2065     /* By default we turn acceleration on, but it might be turned off further down... */
2066     fr->use_cpu_acceleration = TRUE;
2067
2068     fr->bDomDec = DOMAINDECOMP(cr);
2069
2070     natoms = mtop->natoms;
2071
2072     if (check_box(ir->ePBC, box))
2073     {
2074         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2075     }
2076
2077     /* Test particle insertion ? */
2078     if (EI_TPI(ir->eI))
2079     {
2080         /* Set to the size of the molecule to be inserted (the last one) */
2081         /* Because of old style topologies, we have to use the last cg
2082          * instead of the last molecule type.
2083          */
2084         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2085         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2086         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2087         {
2088             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2089         }
2090     }
2091     else
2092     {
2093         fr->n_tpi = 0;
2094     }
2095
2096     /* Copy AdResS parameters */
2097     if (ir->bAdress)
2098     {
2099         fr->adress_type           = ir->adress->type;
2100         fr->adress_const_wf       = ir->adress->const_wf;
2101         fr->adress_ex_width       = ir->adress->ex_width;
2102         fr->adress_hy_width       = ir->adress->hy_width;
2103         fr->adress_icor           = ir->adress->icor;
2104         fr->adress_site           = ir->adress->site;
2105         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2106         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2107
2108
2109         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2110         for (i = 0; i < ir->adress->n_energy_grps; i++)
2111         {
2112             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2113         }
2114
2115         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2116         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2117         for (i = 0; i < fr->n_adress_tf_grps; i++)
2118         {
2119             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2120         }
2121         copy_rvec(ir->adress->refs, fr->adress_refs);
2122     }
2123     else
2124     {
2125         fr->adress_type           = eAdressOff;
2126         fr->adress_do_hybridpairs = FALSE;
2127     }
2128
2129     /* Copy the user determined parameters */
2130     fr->userint1  = ir->userint1;
2131     fr->userint2  = ir->userint2;
2132     fr->userint3  = ir->userint3;
2133     fr->userint4  = ir->userint4;
2134     fr->userreal1 = ir->userreal1;
2135     fr->userreal2 = ir->userreal2;
2136     fr->userreal3 = ir->userreal3;
2137     fr->userreal4 = ir->userreal4;
2138
2139     /* Shell stuff */
2140     fr->fc_stepsize = ir->fc_stepsize;
2141
2142     /* Free energy */
2143     fr->efep        = ir->efep;
2144     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2145     if (ir->fepvals->bScCoul)
2146     {
2147         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2148         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2149     }
2150     else
2151     {
2152         fr->sc_alphacoul  = 0;
2153         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2154     }
2155     fr->sc_power      = ir->fepvals->sc_power;
2156     fr->sc_r_power    = ir->fepvals->sc_r_power;
2157     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2158
2159     env = getenv("GMX_SCSIGMA_MIN");
2160     if (env != NULL)
2161     {
2162         dbl = 0;
2163         sscanf(env, "%lf", &dbl);
2164         fr->sc_sigma6_min = pow(dbl, 6);
2165         if (fp)
2166         {
2167             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2168         }
2169     }
2170
2171     fr->bNonbonded = TRUE;
2172     if (getenv("GMX_NO_NONBONDED") != NULL)
2173     {
2174         /* turn off non-bonded calculations */
2175         fr->bNonbonded = FALSE;
2176         md_print_warn(cr, fp,
2177                       "Found environment variable GMX_NO_NONBONDED.\n"
2178                       "Disabling nonbonded calculations.\n");
2179     }
2180
2181     bGenericKernelOnly = FALSE;
2182
2183     /* We now check in the NS code whether a particular combination of interactions
2184      * can be used with water optimization, and disable it if that is not the case.
2185      */
2186
2187     if (getenv("GMX_NB_GENERIC") != NULL)
2188     {
2189         if (fp != NULL)
2190         {
2191             fprintf(fp,
2192                     "Found environment variable GMX_NB_GENERIC.\n"
2193                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2194                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2195         }
2196         bGenericKernelOnly = TRUE;
2197     }
2198
2199     if (bGenericKernelOnly == TRUE)
2200     {
2201         bNoSolvOpt         = TRUE;
2202     }
2203
2204     if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2205     {
2206         fr->use_cpu_acceleration = FALSE;
2207         if (fp != NULL)
2208         {
2209             fprintf(fp,
2210                     "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2211                     "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2212         }
2213     }
2214
2215     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2216
2217     /* Check if we can/should do all-vs-all kernels */
2218     fr->bAllvsAll       = can_use_allvsall(ir, mtop, FALSE, NULL, NULL);
2219     fr->AllvsAll_work   = NULL;
2220     fr->AllvsAll_workgb = NULL;
2221
2222     /* All-vs-all kernels have not been implemented in 4.6, and
2223      * the SIMD group kernels are also buggy in this case. Non-accelerated
2224      * group kernels are OK. See Redmine #1249. */
2225     if (fr->bAllvsAll)
2226     {
2227         fr->bAllvsAll            = FALSE;
2228         fr->use_cpu_acceleration = FALSE;
2229         if (fp != NULL)
2230         {
2231             fprintf(fp,
2232                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2233                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2234                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2235                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2236                     "we are proceeding by disabling all CPU architecture-specific\n"
2237                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2238                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2239         }
2240     }
2241
2242     /* Neighbour searching stuff */
2243     fr->cutoff_scheme = ir->cutoff_scheme;
2244     fr->bGrid         = (ir->ns_type == ensGRID);
2245     fr->ePBC          = ir->ePBC;
2246
2247     /* Determine if we will do PBC for distances in bonded interactions */
2248     if (fr->ePBC == epbcNONE)
2249     {
2250         fr->bMolPBC = FALSE;
2251     }
2252     else
2253     {
2254         if (!DOMAINDECOMP(cr))
2255         {
2256             /* The group cut-off scheme and SHAKE assume charge groups
2257              * are whole, but not using molpbc is faster in most cases.
2258              */
2259             if (fr->cutoff_scheme == ecutsGROUP ||
2260                 (ir->eConstrAlg == econtSHAKE &&
2261                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2262                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2263             {
2264                 fr->bMolPBC = ir->bPeriodicMols;
2265             }
2266             else
2267             {
2268                 fr->bMolPBC = TRUE;
2269                 if (getenv("GMX_USE_GRAPH") != NULL)
2270                 {
2271                     fr->bMolPBC = FALSE;
2272                     if (fp)
2273                     {
2274                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2275                     }
2276                 }
2277             }
2278         }
2279         else
2280         {
2281             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2282         }
2283     }
2284     fr->bGB = (ir->implicit_solvent == eisGBSA);
2285
2286     fr->rc_scaling = ir->refcoord_scaling;
2287     copy_rvec(ir->posres_com, fr->posres_com);
2288     copy_rvec(ir->posres_comB, fr->posres_comB);
2289     fr->rlist      = cutoff_inf(ir->rlist);
2290     fr->rlistlong  = cutoff_inf(ir->rlistlong);
2291     fr->eeltype    = ir->coulombtype;
2292     fr->vdwtype    = ir->vdwtype;
2293
2294     fr->coulomb_modifier = ir->coulomb_modifier;
2295     fr->vdw_modifier     = ir->vdw_modifier;
2296
2297     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2298     switch (fr->eeltype)
2299     {
2300         case eelCUT:
2301             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2302             break;
2303
2304         case eelRF:
2305         case eelGRF:
2306         case eelRF_NEC:
2307             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2308             break;
2309
2310         case eelRF_ZERO:
2311             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2312             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2313             break;
2314
2315         case eelSWITCH:
2316         case eelSHIFT:
2317         case eelUSER:
2318         case eelENCADSHIFT:
2319         case eelPMESWITCH:
2320         case eelPMEUSER:
2321         case eelPMEUSERSWITCH:
2322             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2323             break;
2324
2325         case eelPME:
2326         case eelEWALD:
2327             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2328             break;
2329
2330         default:
2331             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2332             break;
2333     }
2334
2335     /* Vdw: Translate from mdp settings to kernel format */
2336     switch (fr->vdwtype)
2337     {
2338         case evdwCUT:
2339             if (fr->bBHAM)
2340             {
2341                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2342             }
2343             else
2344             {
2345                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2346             }
2347             break;
2348
2349         case evdwSWITCH:
2350         case evdwSHIFT:
2351         case evdwUSER:
2352         case evdwENCADSHIFT:
2353             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2354             break;
2355
2356         default:
2357             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2358             break;
2359     }
2360
2361     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2362     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2363     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2364
2365     fr->bTwinRange = fr->rlistlong > fr->rlist;
2366     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2367
2368     fr->reppow     = mtop->ffparams.reppow;
2369
2370     if (ir->cutoff_scheme == ecutsGROUP)
2371     {
2372         fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
2373                           !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2374         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2375         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2376                            fr->eeltype == eelEWALD ||
2377                            fr->eeltype == eelPME ||
2378                            fr->eeltype == eelRF ||
2379                            fr->eeltype == eelRF_ZERO);
2380
2381         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2382          * going to be faster to tabulate the interaction than calling the generic kernel.
2383          */
2384         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2385         {
2386             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2387             {
2388                 fr->bcoultab = TRUE;
2389             }
2390         }
2391         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2392                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2393                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2394                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2395         {
2396             if (fr->rcoulomb != fr->rvdw)
2397             {
2398                 fr->bcoultab = TRUE;
2399             }
2400         }
2401
2402         if (getenv("GMX_REQUIRE_TABLES"))
2403         {
2404             fr->bvdwtab  = TRUE;
2405             fr->bcoultab = TRUE;
2406         }
2407
2408         if (fp)
2409         {
2410             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2411             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2412         }
2413
2414         if (fr->bvdwtab == TRUE)
2415         {
2416             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2417             fr->nbkernel_vdw_modifier    = eintmodNONE;
2418         }
2419         if (fr->bcoultab == TRUE)
2420         {
2421             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2422             fr->nbkernel_elec_modifier    = eintmodNONE;
2423         }
2424     }
2425
2426     if (ir->cutoff_scheme == ecutsVERLET)
2427     {
2428         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2429         {
2430             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2431         }
2432         fr->bvdwtab  = FALSE;
2433         fr->bcoultab = FALSE;
2434     }
2435
2436     /* Tables are used for direct ewald sum */
2437     if (fr->bEwald)
2438     {
2439         if (EEL_PME(ir->coulombtype))
2440         {
2441             if (fp)
2442             {
2443                 fprintf(fp, "Will do PME sum in reciprocal space.\n");
2444             }
2445             if (ir->coulombtype == eelP3M_AD)
2446             {
2447                 please_cite(fp, "Hockney1988");
2448                 please_cite(fp, "Ballenegger2012");
2449             }
2450             else
2451             {
2452                 please_cite(fp, "Essmann95a");
2453             }
2454
2455             if (ir->ewald_geometry == eewg3DC)
2456             {
2457                 if (fp)
2458                 {
2459                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2460                 }
2461                 please_cite(fp, "In-Chul99a");
2462             }
2463         }
2464         fr->ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
2465         init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
2466         if (fp)
2467         {
2468             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2469                     1/fr->ewaldcoeff);
2470         }
2471     }
2472
2473     /* Electrostatics */
2474     fr->epsilon_r       = ir->epsilon_r;
2475     fr->epsilon_rf      = ir->epsilon_rf;
2476     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2477     fr->rcoulomb_switch = ir->rcoulomb_switch;
2478     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2479
2480     /* Parameters for generalized RF */
2481     fr->zsquare = 0.0;
2482     fr->temp    = 0.0;
2483
2484     if (fr->eeltype == eelGRF)
2485     {
2486         init_generalized_rf(fp, mtop, ir, fr);
2487     }
2488     else if (fr->eeltype == eelSHIFT)
2489     {
2490         for (m = 0; (m < DIM); m++)
2491         {
2492             box_size[m] = box[m][m];
2493         }
2494
2495         if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
2496         {
2497             set_shift_consts(fp, fr->rcoulomb_switch, fr->rcoulomb, box_size, fr);
2498         }
2499     }
2500
2501     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
2502                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2503                        IR_ELEC_FIELD(*ir) ||
2504                        (fr->adress_icor != eAdressICOff)
2505                        );
2506
2507     if (fr->cutoff_scheme == ecutsGROUP &&
2508         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2509     {
2510         /* Count the total number of charge groups */
2511         fr->cg_nalloc = ncg_mtop(mtop);
2512         srenew(fr->cg_cm, fr->cg_nalloc);
2513     }
2514     if (fr->shift_vec == NULL)
2515     {
2516         snew(fr->shift_vec, SHIFTS);
2517     }
2518
2519     if (fr->fshift == NULL)
2520     {
2521         snew(fr->fshift, SHIFTS);
2522     }
2523
2524     if (fr->nbfp == NULL)
2525     {
2526         fr->ntype = mtop->ffparams.atnr;
2527         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2528     }
2529
2530     /* Copy the energy group exclusions */
2531     fr->egp_flags = ir->opts.egp_flags;
2532
2533     /* Van der Waals stuff */
2534     fr->rvdw        = cutoff_inf(ir->rvdw);
2535     fr->rvdw_switch = ir->rvdw_switch;
2536     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2537     {
2538         if (fr->rvdw_switch >= fr->rvdw)
2539         {
2540             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2541                       fr->rvdw_switch, fr->rvdw);
2542         }
2543         if (fp)
2544         {
2545             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2546                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2547                     fr->rvdw_switch, fr->rvdw);
2548         }
2549     }
2550
2551     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2552     {
2553         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2554     }
2555
2556     if (fp)
2557     {
2558         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2559                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2560     }
2561
2562     fr->eDispCorr = ir->eDispCorr;
2563     if (ir->eDispCorr != edispcNO)
2564     {
2565         set_avcsixtwelve(fp, fr, mtop);
2566     }
2567
2568     if (fr->bBHAM)
2569     {
2570         set_bham_b_max(fp, fr, mtop);
2571     }
2572
2573     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2574
2575     /* Copy the GBSA data (radius, volume and surftens for each
2576      * atomtype) from the topology atomtype section to forcerec.
2577      */
2578     snew(fr->atype_radius, fr->ntype);
2579     snew(fr->atype_vol, fr->ntype);
2580     snew(fr->atype_surftens, fr->ntype);
2581     snew(fr->atype_gb_radius, fr->ntype);
2582     snew(fr->atype_S_hct, fr->ntype);
2583
2584     if (mtop->atomtypes.nr > 0)
2585     {
2586         for (i = 0; i < fr->ntype; i++)
2587         {
2588             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2589         }
2590         for (i = 0; i < fr->ntype; i++)
2591         {
2592             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2593         }
2594         for (i = 0; i < fr->ntype; i++)
2595         {
2596             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2597         }
2598         for (i = 0; i < fr->ntype; i++)
2599         {
2600             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2601         }
2602         for (i = 0; i < fr->ntype; i++)
2603         {
2604             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2605         }
2606     }
2607
2608     /* Generate the GB table if needed */
2609     if (fr->bGB)
2610     {
2611 #ifdef GMX_DOUBLE
2612         fr->gbtabscale = 2000;
2613 #else
2614         fr->gbtabscale = 500;
2615 #endif
2616
2617         fr->gbtabr = 100;
2618         fr->gbtab  = make_gb_table(fp, oenv, fr, tabpfn, fr->gbtabscale);
2619
2620         init_gb(&fr->born, cr, fr, ir, mtop, ir->rgbradii, ir->gb_algorithm);
2621
2622         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2623         if (!DOMAINDECOMP(cr))
2624         {
2625             make_local_gb(cr, fr->born, ir->gb_algorithm);
2626         }
2627     }
2628
2629     /* Set the charge scaling */
2630     if (fr->epsilon_r != 0)
2631     {
2632         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2633     }
2634     else
2635     {
2636         /* eps = 0 is infinite dieletric: no coulomb interactions */
2637         fr->epsfac = 0;
2638     }
2639
2640     /* Reaction field constants */
2641     if (EEL_RF(fr->eeltype))
2642     {
2643         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2644                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2645                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2646     }
2647
2648     set_chargesum(fp, fr, mtop);
2649
2650     /* if we are using LR electrostatics, and they are tabulated,
2651      * the tables will contain modified coulomb interactions.
2652      * Since we want to use the non-shifted ones for 1-4
2653      * coulombic interactions, we must have an extra set of tables.
2654      */
2655
2656     /* Construct tables.
2657      * A little unnecessary to make both vdw and coul tables sometimes,
2658      * but what the heck... */
2659
2660     bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2661
2662     bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2663                   fr->bBHAM || fr->bEwald) &&
2664                  (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2665                   gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2666                   gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2667
2668     negp_pp   = ir->opts.ngener - ir->nwall;
2669     negptable = 0;
2670     if (!bTab)
2671     {
2672         bNormalnblists = TRUE;
2673         fr->nnblists   = 1;
2674     }
2675     else
2676     {
2677         bNormalnblists = (ir->eDispCorr != edispcNO);
2678         for (egi = 0; egi < negp_pp; egi++)
2679         {
2680             for (egj = egi; egj < negp_pp; egj++)
2681             {
2682                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2683                 if (!(egp_flags & EGP_EXCL))
2684                 {
2685                     if (egp_flags & EGP_TABLE)
2686                     {
2687                         negptable++;
2688                     }
2689                     else
2690                     {
2691                         bNormalnblists = TRUE;
2692                     }
2693                 }
2694             }
2695         }
2696         if (bNormalnblists)
2697         {
2698             fr->nnblists = negptable + 1;
2699         }
2700         else
2701         {
2702             fr->nnblists = negptable;
2703         }
2704         if (fr->nnblists > 1)
2705         {
2706             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2707         }
2708     }
2709
2710     if (ir->adress)
2711     {
2712         fr->nnblists *= 2;
2713     }
2714
2715     snew(fr->nblists, fr->nnblists);
2716
2717     /* This code automatically gives table length tabext without cut-off's,
2718      * in that case grompp should already have checked that we do not need
2719      * normal tables and we only generate tables for 1-4 interactions.
2720      */
2721     rtab = ir->rlistlong + ir->tabext;
2722
2723     if (bTab)
2724     {
2725         /* make tables for ordinary interactions */
2726         if (bNormalnblists)
2727         {
2728             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2729             if (ir->adress)
2730             {
2731                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2732             }
2733             if (!bSep14tab)
2734             {
2735                 fr->tab14 = fr->nblists[0].table_elec_vdw;
2736             }
2737             m = 1;
2738         }
2739         else
2740         {
2741             m = 0;
2742         }
2743         if (negptable > 0)
2744         {
2745             /* Read the special tables for certain energy group pairs */
2746             nm_ind = mtop->groups.grps[egcENER].nm_ind;
2747             for (egi = 0; egi < negp_pp; egi++)
2748             {
2749                 for (egj = egi; egj < negp_pp; egj++)
2750                 {
2751                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2752                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2753                     {
2754                         nbl = &(fr->nblists[m]);
2755                         if (fr->nnblists > 1)
2756                         {
2757                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2758                         }
2759                         /* Read the table file with the two energy groups names appended */
2760                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2761                                         *mtop->groups.grpname[nm_ind[egi]],
2762                                         *mtop->groups.grpname[nm_ind[egj]],
2763                                         &fr->nblists[m]);
2764                         if (ir->adress)
2765                         {
2766                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2767                                             *mtop->groups.grpname[nm_ind[egi]],
2768                                             *mtop->groups.grpname[nm_ind[egj]],
2769                                             &fr->nblists[fr->nnblists/2+m]);
2770                         }
2771                         m++;
2772                     }
2773                     else if (fr->nnblists > 1)
2774                     {
2775                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2776                     }
2777                 }
2778             }
2779         }
2780     }
2781     if (bSep14tab)
2782     {
2783         /* generate extra tables with plain Coulomb for 1-4 interactions only */
2784         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2785                                 GMX_MAKETABLES_14ONLY);
2786     }
2787
2788     /* Read AdResS Thermo Force table if needed */
2789     if (fr->adress_icor == eAdressICThermoForce)
2790     {
2791         /* old todo replace */
2792
2793         if (ir->adress->n_tf_grps > 0)
2794         {
2795             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2796
2797         }
2798         else
2799         {
2800             /* load the default table */
2801             snew(fr->atf_tabs, 1);
2802             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2803         }
2804     }
2805
2806     /* Wall stuff */
2807     fr->nwall = ir->nwall;
2808     if (ir->nwall && ir->wall_type == ewtTABLE)
2809     {
2810         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2811     }
2812
2813     if (fcd && tabbfn)
2814     {
2815         fcd->bondtab  = make_bonded_tables(fp,
2816                                            F_TABBONDS, F_TABBONDSNC,
2817                                            mtop, tabbfn, "b");
2818         fcd->angletab = make_bonded_tables(fp,
2819                                            F_TABANGLES, -1,
2820                                            mtop, tabbfn, "a");
2821         fcd->dihtab   = make_bonded_tables(fp,
2822                                            F_TABDIHS, -1,
2823                                            mtop, tabbfn, "d");
2824     }
2825     else
2826     {
2827         if (debug)
2828         {
2829             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2830         }
2831     }
2832
2833     /* QM/MM initialization if requested
2834      */
2835     if (ir->bQMMM)
2836     {
2837         fprintf(stderr, "QM/MM calculation requested.\n");
2838     }
2839
2840     fr->bQMMM      = ir->bQMMM;
2841     fr->qr         = mk_QMMMrec();
2842
2843     /* Set all the static charge group info */
2844     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2845                                    &fr->bExcl_IntraCGAll_InterCGNone);
2846     if (DOMAINDECOMP(cr))
2847     {
2848         fr->cginfo = NULL;
2849     }
2850     else
2851     {
2852         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2853     }
2854
2855     if (!DOMAINDECOMP(cr))
2856     {
2857         /* When using particle decomposition, the effect of the second argument,
2858          * which sets fr->hcg, is corrected later in do_md and init_em.
2859          */
2860         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2861                             mtop->natoms, mtop->natoms, mtop->natoms);
2862     }
2863
2864     fr->print_force = print_force;
2865
2866
2867     /* coarse load balancing vars */
2868     fr->t_fnbf    = 0.;
2869     fr->t_wait    = 0.;
2870     fr->timesteps = 0;
2871
2872     /* Initialize neighbor search */
2873     init_ns(fp, cr, &fr->ns, fr, mtop, box);
2874
2875     if (cr->duty & DUTY_PP)
2876     {
2877         gmx_nonbonded_setup(fp, fr, bGenericKernelOnly);
2878         /*
2879            if (ir->bAdress)
2880             {
2881                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2882             }
2883          */
2884     }
2885
2886     /* Initialize the thread working data for bonded interactions */
2887     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2888
2889     snew(fr->excl_load, fr->nthreads+1);
2890
2891     if (fr->cutoff_scheme == ecutsVERLET)
2892     {
2893         if (ir->rcoulomb != ir->rvdw)
2894         {
2895             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2896         }
2897
2898         init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2899     }
2900
2901     /* fr->ic is used both by verlet and group kernels (to some extent) now */
2902     init_interaction_const(fp, &fr->ic, fr, rtab);
2903     if (ir->eDispCorr != edispcNO)
2904     {
2905         calc_enervirdiff(fp, ir->eDispCorr, fr);
2906     }
2907 }
2908
2909 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2910 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
2911 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
2912
2913 void pr_forcerec(FILE *fp, t_forcerec *fr, t_commrec *cr)
2914 {
2915     int i;
2916
2917     pr_real(fp, fr->rlist);
2918     pr_real(fp, fr->rcoulomb);
2919     pr_real(fp, fr->fudgeQQ);
2920     pr_bool(fp, fr->bGrid);
2921     pr_bool(fp, fr->bTwinRange);
2922     /*pr_int(fp,fr->cg0);
2923        pr_int(fp,fr->hcg);*/
2924     for (i = 0; i < fr->nnblists; i++)
2925     {
2926         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
2927     }
2928     pr_real(fp, fr->rcoulomb_switch);
2929     pr_real(fp, fr->rcoulomb);
2930
2931     fflush(fp);
2932 }
2933
2934 void forcerec_set_excl_load(t_forcerec *fr,
2935                             const gmx_localtop_t *top, const t_commrec *cr)
2936 {
2937     const int *ind, *a;
2938     int        t, i, j, ntot, n, ntarget;
2939
2940     if (cr != NULL && PARTDECOMP(cr))
2941     {
2942         /* No OpenMP with particle decomposition */
2943         pd_at_range(cr,
2944                     &fr->excl_load[0],
2945                     &fr->excl_load[1]);
2946
2947         return;
2948     }
2949
2950     ind = top->excls.index;
2951     a   = top->excls.a;
2952
2953     ntot = 0;
2954     for (i = 0; i < top->excls.nr; i++)
2955     {
2956         for (j = ind[i]; j < ind[i+1]; j++)
2957         {
2958             if (a[j] > i)
2959             {
2960                 ntot++;
2961             }
2962         }
2963     }
2964
2965     fr->excl_load[0] = 0;
2966     n                = 0;
2967     i                = 0;
2968     for (t = 1; t <= fr->nthreads; t++)
2969     {
2970         ntarget = (ntot*t)/fr->nthreads;
2971         while (i < top->excls.nr && n < ntarget)
2972         {
2973             for (j = ind[i]; j < ind[i+1]; j++)
2974             {
2975                 if (a[j] > i)
2976                 {
2977                     n++;
2978                 }
2979             }
2980             i++;
2981         }
2982         fr->excl_load[t] = i;
2983     }
2984 }