src/gromacs/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "typedefs.h"
  46 #include "vec.h"
  47 #include "gromacs/math/utilities.h"
  48 #include "macros.h"
  49 #include "smalloc.h"
  50 #include "macros.h"
  51 #include "gmx_fatal.h"
  52 #include "gmx_fatal_collective.h"
  53 #include "physics.h"
  54 #include "force.h"
  55 #include "tables.h"
  56 #include "nonbonded.h"
  57 #include "invblock.h"
  58 #include "names.h"
  59 #include "network.h"
  60 #include "pbc.h"
  61 #include "ns.h"
  62 #include "mshift.h"
  63 #include "txtdump.h"
  64 #include "coulomb.h"
  65 #include "md_support.h"
  66 #include "md_logging.h"
  67 #include "domdec.h"
  68 #include "qmmm.h"
  69 #include "copyrite.h"
  70 #include "mtop_util.h"
  71 #include "nbnxn_simd.h"
  72 #include "nbnxn_search.h"
  73 #include "nbnxn_atomdata.h"
  74 #include "nbnxn_consts.h"
  75 #include "gmx_omp_nthreads.h"
  76 #include "gmx_detect_hardware.h"
  77 #include "inputrec.h"
  78
  79 #ifdef _MSC_VER
  80 /* MSVC definition for __cpuid() */
  81 #include <intrin.h>
  82 #endif
  83
  84 #include "types/nbnxn_cuda_types_ext.h"
  85 #include "gpu_utils.h"
  86 #include "nbnxn_cuda_data_mgmt.h"
  87 #include "pmalloc_cuda.h"
  88
  89 t_forcerec *mk_forcerec(void)
  90 {
  91     t_forcerec *fr;
  92
  93     snew(fr, 1);
  94
  95     return fr;
  96 }
  97
  98 #ifdef DEBUG
  99 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
 100 {
 101     int i, j;
 102
 103     for (i = 0; (i < atnr); i++)
 104     {
 105         for (j = 0; (j < atnr); j++)
 106         {
 107             fprintf(fp, "%2d - %2d", i, j);
 108             if (bBHAM)
 109             {
 110                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 111                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 112             }
 113             else
 114             {
 115                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 116                         C12(nbfp, atnr, i, j)/12.0);
 117             }
 118         }
 119     }
 120 }
 121 #endif
 122
 123 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 124 {
 125     real *nbfp;
 126     int   i, j, k, atnr;
 127
 128     atnr = idef->atnr;
 129     if (bBHAM)
 130     {
 131         snew(nbfp, 3*atnr*atnr);
 132         for (i = k = 0; (i < atnr); i++)
 133         {
 134             for (j = 0; (j < atnr); j++, k++)
 135             {
 136                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 137                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 138                 /* nbfp now includes the 6.0 derivative prefactor */
 139                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 140             }
 141         }
 142     }
 143     else
 144     {
 145         snew(nbfp, 2*atnr*atnr);
 146         for (i = k = 0; (i < atnr); i++)
 147         {
 148             for (j = 0; (j < atnr); j++, k++)
 149             {
 150                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 151                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 152                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 153             }
 154         }
 155     }
 156
 157     return nbfp;
 158 }
 159
 160 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 161 {
 162     real *nbfp;
 163     int   i, j, k, atnr;
 164     real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 165     real  c6, c12;
 166
 167     atnr = idef->atnr;
 168     snew(nbfp, 2*atnr*atnr);
 169     for (i = 0; i < atnr; ++i)
 170     {
 171         for (j = 0; j < atnr; ++j)
 172         {
 173             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 174             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 175             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 176             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 177             c6   = sqrt(c6i  * c6j);
 178             c12  = sqrt(c12i * c12j);
 179             if (comb_rule == eCOMB_ARITHMETIC
 180                 && !gmx_numzero(c6) && !gmx_numzero(c12))
 181             {
 182                 sigmai = pow(c12i / c6i, 1.0/6.0);
 183                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 184                 epsi   = c6i * c6i / c12i;
 185                 epsj   = c6j * c6j / c12j;
 186                 c6     = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
 187                 c12    = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
 188             }
 189             C6(nbfp, atnr, i, j)   = c6*6.0;
 190             C12(nbfp, atnr, i, j)  = c12*12.0;
 191         }
 192     }
 193     return nbfp;
 194 }
 195
 196 /* This routine sets fr->solvent_opt to the most common solvent in the
 197  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 198  * the fr->solvent_type array with the correct type (or esolNO).
 199  *
 200  * Charge groups that fulfill the conditions but are not identical to the
 201  * most common one will be marked as esolNO in the solvent_type array.
 202  *
 203  * TIP3p is identical to SPC for these purposes, so we call it
 204  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 205  *
 206  * NOTE: QM particle should not
 207  * become an optimized solvent. Not even if there is only one charge
 208  * group in the Qm
 209  */
 210
 211 typedef struct
 212 {
 213     int    model;
 214     int    count;
 215     int    vdwtype[4];
 216     real   charge[4];
 217 } solvent_parameters_t;
 218
 219 static void
 220 check_solvent_cg(const gmx_moltype_t    *molt,
 221                  int                     cg0,
 222                  int                     nmol,
 223                  const unsigned char    *qm_grpnr,
 224                  const t_grps           *qm_grps,
 225                  t_forcerec   *          fr,
 226                  int                    *n_solvent_parameters,
 227                  solvent_parameters_t  **solvent_parameters_p,
 228                  int                     cginfo,
 229                  int                    *cg_sp)
 230 {
 231     const t_blocka       *excl;
 232     t_atom               *atom;
 233     int                   j, k;
 234     int                   j0, j1, nj;
 235     gmx_bool              perturbed;
 236     gmx_bool              has_vdw[4];
 237     gmx_bool              match;
 238     real                  tmp_charge[4]  = { 0.0 }; /* init to zero to make gcc4.8 happy */
 239     int                   tmp_vdwtype[4] = { 0 };   /* init to zero to make gcc4.8 happy */
 240     int                   tjA;
 241     gmx_bool              qm;
 242     solvent_parameters_t *solvent_parameters;
 243
 244     /* We use a list with parameters for each solvent type.
 245      * Every time we discover a new molecule that fulfills the basic
 246      * conditions for a solvent we compare with the previous entries
 247      * in these lists. If the parameters are the same we just increment
 248      * the counter for that type, and otherwise we create a new type
 249      * based on the current molecule.
 250      *
 251      * Once we've finished going through all molecules we check which
 252      * solvent is most common, and mark all those molecules while we
 253      * clear the flag on all others.
 254      */
 255
 256     solvent_parameters = *solvent_parameters_p;
 257
 258     /* Mark the cg first as non optimized */
 259     *cg_sp = -1;
 260
 261     /* Check if this cg has no exclusions with atoms in other charge groups
 262      * and all atoms inside the charge group excluded.
 263      * We only have 3 or 4 atom solvent loops.
 264      */
 265     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 266         !GET_CGINFO_EXCL_INTRA(cginfo))
 267     {
 268         return;
 269     }
 270
 271     /* Get the indices of the first atom in this charge group */
 272     j0     = molt->cgs.index[cg0];
 273     j1     = molt->cgs.index[cg0+1];
 274
 275     /* Number of atoms in our molecule */
 276     nj     = j1 - j0;
 277
 278     if (debug)
 279     {
 280         fprintf(debug,
 281                 "Moltype '%s': there are %d atoms in this charge group\n",
 282                 *molt->name, nj);
 283     }
 284
 285     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 286      * otherwise skip it.
 287      */
 288     if (nj < 3 || nj > 4)
 289     {
 290         return;
 291     }
 292
 293     /* Check if we are doing QM on this group */
 294     qm = FALSE;
 295     if (qm_grpnr != NULL)
 296     {
 297         for (j = j0; j < j1 && !qm; j++)
 298         {
 299             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 300         }
 301     }
 302     /* Cannot use solvent optimization with QM */
 303     if (qm)
 304     {
 305         return;
 306     }
 307
 308     atom = molt->atoms.atom;
 309
 310     /* Still looks like a solvent, time to check parameters */
 311
 312     /* If it is perturbed (free energy) we can't use the solvent loops,
 313      * so then we just skip to the next molecule.
 314      */
 315     perturbed = FALSE;
 316
 317     for (j = j0; j < j1 && !perturbed; j++)
 318     {
 319         perturbed = PERTURBED(atom[j]);
 320     }
 321
 322     if (perturbed)
 323     {
 324         return;
 325     }
 326
 327     /* Now it's only a question if the VdW and charge parameters
 328      * are OK. Before doing the check we compare and see if they are
 329      * identical to a possible previous solvent type.
 330      * First we assign the current types and charges.
 331      */
 332     for (j = 0; j < nj; j++)
 333     {
 334         tmp_vdwtype[j] = atom[j0+j].type;
 335         tmp_charge[j]  = atom[j0+j].q;
 336     }
 337
 338     /* Does it match any previous solvent type? */
 339     for (k = 0; k < *n_solvent_parameters; k++)
 340     {
 341         match = TRUE;
 342
 343
 344         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 345         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 346              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 347         {
 348             match = FALSE;
 349         }
 350
 351         /* Check that types & charges match for all atoms in molecule */
 352         for (j = 0; j < nj && match == TRUE; j++)
 353         {
 354             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 355             {
 356                 match = FALSE;
 357             }
 358             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 359             {
 360                 match = FALSE;
 361             }
 362         }
 363         if (match == TRUE)
 364         {
 365             /* Congratulations! We have a matched solvent.
 366              * Flag it with this type for later processing.
 367              */
 368             *cg_sp = k;
 369             solvent_parameters[k].count += nmol;
 370
 371             /* We are done with this charge group */
 372             return;
 373         }
 374     }
 375
 376     /* If we get here, we have a tentative new solvent type.
 377      * Before we add it we must check that it fulfills the requirements
 378      * of the solvent optimized loops. First determine which atoms have
 379      * VdW interactions.
 380      */
 381     for (j = 0; j < nj; j++)
 382     {
 383         has_vdw[j] = FALSE;
 384         tjA        = tmp_vdwtype[j];
 385
 386         /* Go through all other tpes and see if any have non-zero
 387          * VdW parameters when combined with this one.
 388          */
 389         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 390         {
 391             /* We already checked that the atoms weren't perturbed,
 392              * so we only need to check state A now.
 393              */
 394             if (fr->bBHAM)
 395             {
 396                 has_vdw[j] = (has_vdw[j] ||
 397                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 398                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 399                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 400             }
 401             else
 402             {
 403                 /* Standard LJ */
 404                 has_vdw[j] = (has_vdw[j] ||
 405                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 406                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 407             }
 408         }
 409     }
 410
 411     /* Now we know all we need to make the final check and assignment. */
 412     if (nj == 3)
 413     {
 414         /* So, is it an SPC?
 415          * For this we require thatn all atoms have charge,
 416          * the charges on atom 2 & 3 should be the same, and only
 417          * atom 1 might have VdW.
 418          */
 419         if (has_vdw[1] == FALSE &&
 420             has_vdw[2] == FALSE &&
 421             tmp_charge[0]  != 0 &&
 422             tmp_charge[1]  != 0 &&
 423             tmp_charge[2]  == tmp_charge[1])
 424         {
 425             srenew(solvent_parameters, *n_solvent_parameters+1);
 426             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 427             solvent_parameters[*n_solvent_parameters].count = nmol;
 428             for (k = 0; k < 3; k++)
 429             {
 430                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 431                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 432             }
 433
 434             *cg_sp = *n_solvent_parameters;
 435             (*n_solvent_parameters)++;
 436         }
 437     }
 438     else if (nj == 4)
 439     {
 440         /* Or could it be a TIP4P?
 441          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 442          * Only atom 1 mght have VdW.
 443          */
 444         if (has_vdw[1] == FALSE &&
 445             has_vdw[2] == FALSE &&
 446             has_vdw[3] == FALSE &&
 447             tmp_charge[0]  == 0 &&
 448             tmp_charge[1]  != 0 &&
 449             tmp_charge[2]  == tmp_charge[1] &&
 450             tmp_charge[3]  != 0)
 451         {
 452             srenew(solvent_parameters, *n_solvent_parameters+1);
 453             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 454             solvent_parameters[*n_solvent_parameters].count = nmol;
 455             for (k = 0; k < 4; k++)
 456             {
 457                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 458                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 459             }
 460
 461             *cg_sp = *n_solvent_parameters;
 462             (*n_solvent_parameters)++;
 463         }
 464     }
 465
 466     *solvent_parameters_p = solvent_parameters;
 467 }
 468
 469 static void
 470 check_solvent(FILE  *                fp,
 471               const gmx_mtop_t  *    mtop,
 472               t_forcerec  *          fr,
 473               cginfo_mb_t           *cginfo_mb)
 474 {
 475     const t_block     *   cgs;
 476     const t_block     *   mols;
 477     const gmx_moltype_t  *molt;
 478     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 479     int                   n_solvent_parameters;
 480     solvent_parameters_t *solvent_parameters;
 481     int                 **cg_sp;
 482     int                   bestsp, bestsol;
 483
 484     if (debug)
 485     {
 486         fprintf(debug, "Going to determine what solvent types we have.\n");
 487     }
 488
 489     mols = &mtop->mols;
 490
 491     n_solvent_parameters = 0;
 492     solvent_parameters   = NULL;
 493     /* Allocate temporary array for solvent type */
 494     snew(cg_sp, mtop->nmolblock);
 495
 496     cg_offset = 0;
 497     at_offset = 0;
 498     for (mb = 0; mb < mtop->nmolblock; mb++)
 499     {
 500         molt = &mtop->moltype[mtop->molblock[mb].type];
 501         cgs  = &molt->cgs;
 502         /* Here we have to loop over all individual molecules
 503          * because we need to check for QMMM particles.
 504          */
 505         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 506         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 507         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 508         for (mol = 0; mol < nmol_ch; mol++)
 509         {
 510             cgm = mol*cgs->nr;
 511             am  = mol*cgs->index[cgs->nr];
 512             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 513             {
 514                 check_solvent_cg(molt, cg_mol, nmol,
 515                                  mtop->groups.grpnr[egcQMMM] ?
 516                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 517                                  &mtop->groups.grps[egcQMMM],
 518                                  fr,
 519                                  &n_solvent_parameters, &solvent_parameters,
 520                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 521                                  &cg_sp[mb][cgm+cg_mol]);
 522             }
 523         }
 524         cg_offset += cgs->nr;
 525         at_offset += cgs->index[cgs->nr];
 526     }
 527
 528     /* Puh! We finished going through all charge groups.
 529      * Now find the most common solvent model.
 530      */
 531
 532     /* Most common solvent this far */
 533     bestsp = -2;
 534     for (i = 0; i < n_solvent_parameters; i++)
 535     {
 536         if (bestsp == -2 ||
 537             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 538         {
 539             bestsp = i;
 540         }
 541     }
 542
 543     if (bestsp >= 0)
 544     {
 545         bestsol = solvent_parameters[bestsp].model;
 546     }
 547     else
 548     {
 549         bestsol = esolNO;
 550     }
 551
 552 #ifdef DISABLE_WATER_NLIST
 553     bestsol = esolNO;
 554 #endif
 555
 556     fr->nWatMol = 0;
 557     for (mb = 0; mb < mtop->nmolblock; mb++)
 558     {
 559         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 560         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 561         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 562         {
 563             if (cg_sp[mb][i] == bestsp)
 564             {
 565                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 566                 fr->nWatMol += nmol;
 567             }
 568             else
 569             {
 570                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 571             }
 572         }
 573         sfree(cg_sp[mb]);
 574     }
 575     sfree(cg_sp);
 576
 577     if (bestsol != esolNO && fp != NULL)
 578     {
 579         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 580                 esol_names[bestsol],
 581                 solvent_parameters[bestsp].count);
 582     }
 583
 584     sfree(solvent_parameters);
 585     fr->solvent_opt = bestsol;
 586 }
 587
 588 enum {
 589     acNONE = 0, acCONSTRAINT, acSETTLE
 590 };
 591
 592 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 593                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 594                                    gmx_bool *bFEP_NonBonded,
 595                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 596 {
 597     const t_block        *cgs;
 598     const t_blocka       *excl;
 599     const gmx_moltype_t  *molt;
 600     const gmx_molblock_t *molb;
 601     cginfo_mb_t          *cginfo_mb;
 602     gmx_bool             *type_VDW;
 603     int                  *cginfo;
 604     int                   cg_offset, a_offset, cgm, am;
 605     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 606     int                  *a_con;
 607     int                   ftype;
 608     int                   ia;
 609     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bFEP;
 610
 611     ncg_tot = ncg_mtop(mtop);
 612     snew(cginfo_mb, mtop->nmolblock);
 613
 614     snew(type_VDW, fr->ntype);
 615     for (ai = 0; ai < fr->ntype; ai++)
 616     {
 617         type_VDW[ai] = FALSE;
 618         for (j = 0; j < fr->ntype; j++)
 619         {
 620             type_VDW[ai] = type_VDW[ai] ||
 621                 fr->bBHAM ||
 622                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 623                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 624         }
 625     }
 626
 627     *bFEP_NonBonded               = FALSE;
 628     *bExcl_IntraCGAll_InterCGNone = TRUE;
 629
 630     excl_nalloc = 10;
 631     snew(bExcl, excl_nalloc);
 632     cg_offset = 0;
 633     a_offset  = 0;
 634     for (mb = 0; mb < mtop->nmolblock; mb++)
 635     {
 636         molb = &mtop->molblock[mb];
 637         molt = &mtop->moltype[molb->type];
 638         cgs  = &molt->cgs;
 639         excl = &molt->excls;
 640
 641         /* Check if the cginfo is identical for all molecules in this block.
 642          * If so, we only need an array of the size of one molecule.
 643          * Otherwise we make an array of #mol times #cgs per molecule.
 644          */
 645         bId = TRUE;
 646         am  = 0;
 647         for (m = 0; m < molb->nmol; m++)
 648         {
 649             am = m*cgs->index[cgs->nr];
 650             for (cg = 0; cg < cgs->nr; cg++)
 651             {
 652                 a0 = cgs->index[cg];
 653                 a1 = cgs->index[cg+1];
 654                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 655                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 656                 {
 657                     bId = FALSE;
 658                 }
 659                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 660                 {
 661                     for (ai = a0; ai < a1; ai++)
 662                     {
 663                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 664                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 665                         {
 666                             bId = FALSE;
 667                         }
 668                     }
 669                 }
 670             }
 671         }
 672
 673         cginfo_mb[mb].cg_start = cg_offset;
 674         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 675         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 676         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 677         cginfo = cginfo_mb[mb].cginfo;
 678
 679         /* Set constraints flags for constrained atoms */
 680         snew(a_con, molt->atoms.nr);
 681         for (ftype = 0; ftype < F_NRE; ftype++)
 682         {
 683             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 684             {
 685                 int nral;
 686
 687                 nral = NRAL(ftype);
 688                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 689                 {
 690                     int a;
 691
 692                     for (a = 0; a < nral; a++)
 693                     {
 694                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 695                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 696                     }
 697                 }
 698             }
 699         }
 700
 701         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 702         {
 703             cgm = m*cgs->nr;
 704             am  = m*cgs->index[cgs->nr];
 705             for (cg = 0; cg < cgs->nr; cg++)
 706             {
 707                 a0 = cgs->index[cg];
 708                 a1 = cgs->index[cg+1];
 709
 710                 /* Store the energy group in cginfo */
 711                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 712                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 713
 714                 /* Check the intra/inter charge group exclusions */
 715                 if (a1-a0 > excl_nalloc)
 716                 {
 717                     excl_nalloc = a1 - a0;
 718                     srenew(bExcl, excl_nalloc);
 719                 }
 720                 /* bExclIntraAll: all intra cg interactions excluded
 721                  * bExclInter:    any inter cg interactions excluded
 722                  */
 723                 bExclIntraAll = TRUE;
 724                 bExclInter    = FALSE;
 725                 bHaveVDW      = FALSE;
 726                 bHaveQ        = FALSE;
 727                 bFEP          = FALSE;
 728                 for (ai = a0; ai < a1; ai++)
 729                 {
 730                     /* Check VDW and electrostatic interactions */
 731                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 732                                             type_VDW[molt->atoms.atom[ai].typeB]);
 733                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 734                                             molt->atoms.atom[ai].qB != 0);
 735
 736                     bFEP    = bFEP || (PERTURBED(molt->atoms.atom[ai]) != 0);
 737
 738                     /* Clear the exclusion list for atom ai */
 739                     for (aj = a0; aj < a1; aj++)
 740                     {
 741                         bExcl[aj-a0] = FALSE;
 742                     }
 743                     /* Loop over all the exclusions of atom ai */
 744                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 745                     {
 746                         aj = excl->a[j];
 747                         if (aj < a0 || aj >= a1)
 748                         {
 749                             bExclInter = TRUE;
 750                         }
 751                         else
 752                         {
 753                             bExcl[aj-a0] = TRUE;
 754                         }
 755                     }
 756                     /* Check if ai excludes a0 to a1 */
 757                     for (aj = a0; aj < a1; aj++)
 758                     {
 759                         if (!bExcl[aj-a0])
 760                         {
 761                             bExclIntraAll = FALSE;
 762                         }
 763                     }
 764
 765                     switch (a_con[ai])
 766                     {
 767                         case acCONSTRAINT:
 768                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 769                             break;
 770                         case acSETTLE:
 771                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 772                             break;
 773                         default:
 774                             break;
 775                     }
 776                 }
 777                 if (bExclIntraAll)
 778                 {
 779                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 780                 }
 781                 if (bExclInter)
 782                 {
 783                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 784                 }
 785                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 786                 {
 787                     /* The size in cginfo is currently only read with DD */
 788                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 789                 }
 790                 if (bHaveVDW)
 791                 {
 792                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 793                 }
 794                 if (bHaveQ)
 795                 {
 796                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 797                 }
 798                 if (bFEP)
 799                 {
 800                     SET_CGINFO_FEP(cginfo[cgm+cg]);
 801                     *bFEP_NonBonded = TRUE;
 802                 }
 803                 /* Store the charge group size */
 804                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 805
 806                 if (!bExclIntraAll || bExclInter)
 807                 {
 808                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 809                 }
 810             }
 811         }
 812
 813         sfree(a_con);
 814
 815         cg_offset += molb->nmol*cgs->nr;
 816         a_offset  += molb->nmol*cgs->index[cgs->nr];
 817     }
 818     sfree(bExcl);
 819
 820     /* the solvent optimizer is called after the QM is initialized,
 821      * because we don't want to have the QM subsystemto become an
 822      * optimized solvent
 823      */
 824
 825     check_solvent(fplog, mtop, fr, cginfo_mb);
 826
 827     if (getenv("GMX_NO_SOLV_OPT"))
 828     {
 829         if (fplog)
 830         {
 831             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 832                     "Disabling all solvent optimization\n");
 833         }
 834         fr->solvent_opt = esolNO;
 835     }
 836     if (bNoSolvOpt)
 837     {
 838         fr->solvent_opt = esolNO;
 839     }
 840     if (!fr->solvent_opt)
 841     {
 842         for (mb = 0; mb < mtop->nmolblock; mb++)
 843         {
 844             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 845             {
 846                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 847             }
 848         }
 849     }
 850
 851     return cginfo_mb;
 852 }
 853
 854 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 855 {
 856     int  ncg, mb, cg;
 857     int *cginfo;
 858
 859     ncg = cgi_mb[nmb-1].cg_end;
 860     snew(cginfo, ncg);
 861     mb = 0;
 862     for (cg = 0; cg < ncg; cg++)
 863     {
 864         while (cg >= cgi_mb[mb].cg_end)
 865         {
 866             mb++;
 867         }
 868         cginfo[cg] =
 869             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 870     }
 871
 872     return cginfo;
 873 }
 874
 875 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 876 {
 877     /*This now calculates sum for q and c6*/
 878     double         qsum, q2sum, q, c6sum, c6;
 879     int            mb, nmol, i;
 880     const t_atoms *atoms;
 881
 882     qsum   = 0;
 883     q2sum  = 0;
 884     c6sum  = 0;
 885     for (mb = 0; mb < mtop->nmolblock; mb++)
 886     {
 887         nmol  = mtop->molblock[mb].nmol;
 888         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 889         for (i = 0; i < atoms->nr; i++)
 890         {
 891             q       = atoms->atom[i].q;
 892             qsum   += nmol*q;
 893             q2sum  += nmol*q*q;
 894             c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 895             c6sum  += nmol*c6;
 896         }
 897     }
 898     fr->qsum[0]   = qsum;
 899     fr->q2sum[0]  = q2sum;
 900     fr->c6sum[0]  = c6sum;
 901
 902     if (fr->efep != efepNO)
 903     {
 904         qsum   = 0;
 905         q2sum  = 0;
 906         c6sum  = 0;
 907         for (mb = 0; mb < mtop->nmolblock; mb++)
 908         {
 909             nmol  = mtop->molblock[mb].nmol;
 910             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 911             for (i = 0; i < atoms->nr; i++)
 912             {
 913                 q       = atoms->atom[i].qB;
 914                 qsum   += nmol*q;
 915                 q2sum  += nmol*q*q;
 916                 c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 917                 c6sum  += nmol*c6;
 918             }
 919             fr->qsum[1]   = qsum;
 920             fr->q2sum[1]  = q2sum;
 921             fr->c6sum[1]  = c6sum;
 922         }
 923     }
 924     else
 925     {
 926         fr->qsum[1]   = fr->qsum[0];
 927         fr->q2sum[1]  = fr->q2sum[0];
 928         fr->c6sum[1]  = fr->c6sum[0];
 929     }
 930     if (log)
 931     {
 932         if (fr->efep == efepNO)
 933         {
 934             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 935         }
 936         else
 937         {
 938             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 939                     fr->qsum[0], fr->qsum[1]);
 940         }
 941     }
 942 }
 943
 944 void update_forcerec(t_forcerec *fr, matrix box)
 945 {
 946     if (fr->eeltype == eelGRF)
 947     {
 948         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 949                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 950                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 951     }
 952 }
 953
 954 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 955 {
 956     const t_atoms  *atoms, *atoms_tpi;
 957     const t_blocka *excl;
 958     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 959     gmx_int64_t     npair, npair_ij, tmpi, tmpj;
 960     double          csix, ctwelve;
 961     int             ntp, *typecount;
 962     gmx_bool        bBHAM;
 963     real           *nbfp;
 964     real           *nbfp_comb = NULL;
 965
 966     ntp   = fr->ntype;
 967     bBHAM = fr->bBHAM;
 968     nbfp  = fr->nbfp;
 969
 970     /* For LJ-PME, we want to correct for the difference between the
 971      * actual C6 values and the C6 values used by the LJ-PME based on
 972      * combination rules. */
 973
 974     if (EVDW_PME(fr->vdwtype))
 975     {
 976         nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
 977                                              (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
 978         for (tpi = 0; tpi < ntp; ++tpi)
 979         {
 980             for (tpj = 0; tpj < ntp; ++tpj)
 981             {
 982                 C6(nbfp_comb, ntp, tpi, tpj) =
 983                     C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
 984                 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
 985             }
 986         }
 987         nbfp = nbfp_comb;
 988     }
 989     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 990     {
 991         csix    = 0;
 992         ctwelve = 0;
 993         npair   = 0;
 994         nexcl   = 0;
 995         if (!fr->n_tpi)
 996         {
 997             /* Count the types so we avoid natoms^2 operations */
 998             snew(typecount, ntp);
 999             gmx_mtop_count_atomtypes(mtop, q, typecount);
1000
1001             for (tpi = 0; tpi < ntp; tpi++)
1002             {
1003                 for (tpj = tpi; tpj < ntp; tpj++)
1004                 {
1005                     tmpi = typecount[tpi];
1006                     tmpj = typecount[tpj];
1007                     if (tpi != tpj)
1008                     {
1009                         npair_ij = tmpi*tmpj;
1010                     }
1011                     else
1012                     {
1013                         npair_ij = tmpi*(tmpi - 1)/2;
1014                     }
1015                     if (bBHAM)
1016                     {
1017                         /* nbfp now includes the 6.0 derivative prefactor */
1018                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1019                     }
1020                     else
1021                     {
1022                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
1023                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
1024                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
1025                     }
1026                     npair += npair_ij;
1027                 }
1028             }
1029             sfree(typecount);
1030             /* Subtract the excluded pairs.
1031              * The main reason for substracting exclusions is that in some cases
1032              * some combinations might never occur and the parameters could have
1033              * any value. These unused values should not influence the dispersion
1034              * correction.
1035              */
1036             for (mb = 0; mb < mtop->nmolblock; mb++)
1037             {
1038                 nmol  = mtop->molblock[mb].nmol;
1039                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1040                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
1041                 for (i = 0; (i < atoms->nr); i++)
1042                 {
1043                     if (q == 0)
1044                     {
1045                         tpi = atoms->atom[i].type;
1046                     }
1047                     else
1048                     {
1049                         tpi = atoms->atom[i].typeB;
1050                     }
1051                     j1  = excl->index[i];
1052                     j2  = excl->index[i+1];
1053                     for (j = j1; j < j2; j++)
1054                     {
1055                         k = excl->a[j];
1056                         if (k > i)
1057                         {
1058                             if (q == 0)
1059                             {
1060                                 tpj = atoms->atom[k].type;
1061                             }
1062                             else
1063                             {
1064                                 tpj = atoms->atom[k].typeB;
1065                             }
1066                             if (bBHAM)
1067                             {
1068                                 /* nbfp now includes the 6.0 derivative prefactor */
1069                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1070                             }
1071                             else
1072                             {
1073                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1074                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1075                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1076                             }
1077                             nexcl += nmol;
1078                         }
1079                     }
1080                 }
1081             }
1082         }
1083         else
1084         {
1085             /* Only correct for the interaction of the test particle
1086              * with the rest of the system.
1087              */
1088             atoms_tpi =
1089                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1090
1091             npair = 0;
1092             for (mb = 0; mb < mtop->nmolblock; mb++)
1093             {
1094                 nmol  = mtop->molblock[mb].nmol;
1095                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1096                 for (j = 0; j < atoms->nr; j++)
1097                 {
1098                     nmolc = nmol;
1099                     /* Remove the interaction of the test charge group
1100                      * with itself.
1101                      */
1102                     if (mb == mtop->nmolblock-1)
1103                     {
1104                         nmolc--;
1105
1106                         if (mb == 0 && nmol == 1)
1107                         {
1108                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1109                         }
1110                     }
1111                     if (q == 0)
1112                     {
1113                         tpj = atoms->atom[j].type;
1114                     }
1115                     else
1116                     {
1117                         tpj = atoms->atom[j].typeB;
1118                     }
1119                     for (i = 0; i < fr->n_tpi; i++)
1120                     {
1121                         if (q == 0)
1122                         {
1123                             tpi = atoms_tpi->atom[i].type;
1124                         }
1125                         else
1126                         {
1127                             tpi = atoms_tpi->atom[i].typeB;
1128                         }
1129                         if (bBHAM)
1130                         {
1131                             /* nbfp now includes the 6.0 derivative prefactor */
1132                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1133                         }
1134                         else
1135                         {
1136                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1137                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1138                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1139                         }
1140                         npair += nmolc;
1141                     }
1142                 }
1143             }
1144         }
1145         if (npair - nexcl <= 0 && fplog)
1146         {
1147             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1148             csix     = 0;
1149             ctwelve  = 0;
1150         }
1151         else
1152         {
1153             csix    /= npair - nexcl;
1154             ctwelve /= npair - nexcl;
1155         }
1156         if (debug)
1157         {
1158             fprintf(debug, "Counted %d exclusions\n", nexcl);
1159             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1160             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1161         }
1162         fr->avcsix[q]    = csix;
1163         fr->avctwelve[q] = ctwelve;
1164     }
1165
1166     if (EVDW_PME(fr->vdwtype))
1167     {
1168         sfree(nbfp_comb);
1169     }
1170
1171     if (fplog != NULL)
1172     {
1173         if (fr->eDispCorr == edispcAllEner ||
1174             fr->eDispCorr == edispcAllEnerPres)
1175         {
1176             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1177                     fr->avcsix[0], fr->avctwelve[0]);
1178         }
1179         else
1180         {
1181             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1182         }
1183     }
1184 }
1185
1186
1187 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1188                            const gmx_mtop_t *mtop)
1189 {
1190     const t_atoms *at1, *at2;
1191     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1192     real           b, bmin;
1193     real          *nbfp;
1194
1195     if (fplog)
1196     {
1197         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1198     }
1199     nbfp   = fr->nbfp;
1200     ntypes = fr->ntype;
1201
1202     bmin           = -1;
1203     fr->bham_b_max = 0;
1204     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1205     {
1206         at1 = &mtop->moltype[mt1].atoms;
1207         for (i = 0; (i < at1->nr); i++)
1208         {
1209             tpi = at1->atom[i].type;
1210             if (tpi >= ntypes)
1211             {
1212                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1213             }
1214
1215             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1216             {
1217                 at2 = &mtop->moltype[mt2].atoms;
1218                 for (j = 0; (j < at2->nr); j++)
1219                 {
1220                     tpj = at2->atom[j].type;
1221                     if (tpj >= ntypes)
1222                     {
1223                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1224                     }
1225                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1226                     if (b > fr->bham_b_max)
1227                     {
1228                         fr->bham_b_max = b;
1229                     }
1230                     if ((b < bmin) || (bmin == -1))
1231                     {
1232                         bmin = b;
1233                     }
1234                 }
1235             }
1236         }
1237     }
1238     if (fplog)
1239     {
1240         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1241                 bmin, fr->bham_b_max);
1242     }
1243 }
1244
1245 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1246                             t_forcerec *fr, real rtab,
1247                             const t_commrec *cr,
1248                             const char *tabfn, char *eg1, char *eg2,
1249                             t_nblists *nbl)
1250 {
1251     char buf[STRLEN];
1252     int  i, j;
1253
1254     if (tabfn == NULL)
1255     {
1256         if (debug)
1257         {
1258             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1259         }
1260         return;
1261     }
1262
1263     sprintf(buf, "%s", tabfn);
1264     if (eg1 && eg2)
1265     {
1266         /* Append the two energy group names */
1267         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1268                 eg1, eg2, ftp2ext(efXVG));
1269     }
1270     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1271     /* Copy the contents of the table to separate coulomb and LJ tables too,
1272      * to improve cache performance.
1273      */
1274     /* For performance reasons we want
1275      * the table data to be aligned to 16-byte. The pointers could be freed
1276      * but currently aren't.
1277      */
1278     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1279     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1280     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1281     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1282     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1283     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1284     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1285     nbl->table_elec.ninteractions = 1;
1286     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1287     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1288
1289     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1290     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1291     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1292     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1293     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1294     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1295     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1296     nbl->table_vdw.ninteractions = 2;
1297     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1298     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1299
1300     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1301     {
1302         for (j = 0; j < 4; j++)
1303         {
1304             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1305         }
1306         for (j = 0; j < 8; j++)
1307         {
1308             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1309         }
1310     }
1311 }
1312
1313 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1314                          int *ncount, int **count)
1315 {
1316     const gmx_moltype_t *molt;
1317     const t_ilist       *il;
1318     int                  mt, ftype, stride, i, j, tabnr;
1319
1320     for (mt = 0; mt < mtop->nmoltype; mt++)
1321     {
1322         molt = &mtop->moltype[mt];
1323         for (ftype = 0; ftype < F_NRE; ftype++)
1324         {
1325             if (ftype == ftype1 || ftype == ftype2)
1326             {
1327                 il     = &molt->ilist[ftype];
1328                 stride = 1 + NRAL(ftype);
1329                 for (i = 0; i < il->nr; i += stride)
1330                 {
1331                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1332                     if (tabnr < 0)
1333                     {
1334                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1335                     }
1336                     if (tabnr >= *ncount)
1337                     {
1338                         srenew(*count, tabnr+1);
1339                         for (j = *ncount; j < tabnr+1; j++)
1340                         {
1341                             (*count)[j] = 0;
1342                         }
1343                         *ncount = tabnr+1;
1344                     }
1345                     (*count)[tabnr]++;
1346                 }
1347             }
1348         }
1349     }
1350 }
1351
1352 static bondedtable_t *make_bonded_tables(FILE *fplog,
1353                                          int ftype1, int ftype2,
1354                                          const gmx_mtop_t *mtop,
1355                                          const char *basefn, const char *tabext)
1356 {
1357     int            i, ncount, *count;
1358     char           tabfn[STRLEN];
1359     bondedtable_t *tab;
1360
1361     tab = NULL;
1362
1363     ncount = 0;
1364     count  = NULL;
1365     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1366
1367     if (ncount > 0)
1368     {
1369         snew(tab, ncount);
1370         for (i = 0; i < ncount; i++)
1371         {
1372             if (count[i] > 0)
1373             {
1374                 sprintf(tabfn, "%s", basefn);
1375                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1376                         tabext, i, ftp2ext(efXVG));
1377                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1378             }
1379         }
1380         sfree(count);
1381     }
1382
1383     return tab;
1384 }
1385
1386 void forcerec_set_ranges(t_forcerec *fr,
1387                          int ncg_home, int ncg_force,
1388                          int natoms_force,
1389                          int natoms_force_constr, int natoms_f_novirsum)
1390 {
1391     fr->cg0 = 0;
1392     fr->hcg = ncg_home;
1393
1394     /* fr->ncg_force is unused in the standard code,
1395      * but it can be useful for modified code dealing with charge groups.
1396      */
1397     fr->ncg_force           = ncg_force;
1398     fr->natoms_force        = natoms_force;
1399     fr->natoms_force_constr = natoms_force_constr;
1400
1401     if (fr->natoms_force_constr > fr->nalloc_force)
1402     {
1403         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1404
1405         if (fr->bTwinRange)
1406         {
1407             srenew(fr->f_twin, fr->nalloc_force);
1408         }
1409     }
1410
1411     if (fr->bF_NoVirSum)
1412     {
1413         fr->f_novirsum_n = natoms_f_novirsum;
1414         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1415         {
1416             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1417             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1418         }
1419     }
1420     else
1421     {
1422         fr->f_novirsum_n = 0;
1423     }
1424 }
1425
1426 static real cutoff_inf(real cutoff)
1427 {
1428     if (cutoff == 0)
1429     {
1430         cutoff = GMX_CUTOFF_INF;
1431     }
1432
1433     return cutoff;
1434 }
1435
1436 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1437                                   t_forcerec *fr, const t_inputrec *ir,
1438                                   const char *tabfn, const gmx_mtop_t *mtop,
1439                                   matrix     box)
1440 {
1441     char buf[STRLEN];
1442     int  i, j;
1443
1444     if (tabfn == NULL)
1445     {
1446         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1447         return;
1448     }
1449
1450     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1451
1452     sprintf(buf, "%s", tabfn);
1453     for (i = 0; i < ir->adress->n_tf_grps; i++)
1454     {
1455         j = ir->adress->tf_table_index[i]; /* get energy group index */
1456         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1457                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1458         if (fp)
1459         {
1460             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1461         }
1462         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1463     }
1464
1465 }
1466
1467 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1468 {
1469     gmx_bool bAllvsAll;
1470
1471     bAllvsAll =
1472         (
1473             ir->rlist == 0            &&
1474             ir->rcoulomb == 0         &&
1475             ir->rvdw == 0             &&
1476             ir->ePBC == epbcNONE      &&
1477             ir->vdwtype == evdwCUT    &&
1478             ir->coulombtype == eelCUT &&
1479             ir->efep == efepNO        &&
1480             (ir->implicit_solvent == eisNO ||
1481              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1482                                                   ir->gb_algorithm == egbHCT   ||
1483                                                   ir->gb_algorithm == egbOBC))) &&
1484             getenv("GMX_NO_ALLVSALL") == NULL
1485         );
1486
1487     if (bAllvsAll && ir->opts.ngener > 1)
1488     {
1489         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1490
1491         if (bPrintNote)
1492         {
1493             if (MASTER(cr))
1494             {
1495                 fprintf(stderr, "\n%s\n", note);
1496             }
1497             if (fp != NULL)
1498             {
1499                 fprintf(fp, "\n%s\n", note);
1500             }
1501         }
1502         bAllvsAll = FALSE;
1503     }
1504
1505     if (bAllvsAll && fp && MASTER(cr))
1506     {
1507         fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1508     }
1509
1510     return bAllvsAll;
1511 }
1512
1513
1514 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1515 {
1516     int t, i;
1517
1518     /* These thread local data structures are used for bondeds only */
1519     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1520
1521     if (fr->nthreads > 1)
1522     {
1523         snew(fr->f_t, fr->nthreads);
1524         /* Thread 0 uses the global force and energy arrays */
1525         for (t = 1; t < fr->nthreads; t++)
1526         {
1527             fr->f_t[t].f        = NULL;
1528             fr->f_t[t].f_nalloc = 0;
1529             snew(fr->f_t[t].fshift, SHIFTS);
1530             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1531             for (i = 0; i < egNR; i++)
1532             {
1533                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1534             }
1535         }
1536     }
1537 }
1538
1539
1540 gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
1541                                       const t_commrec  *cr,
1542                                       const t_inputrec *ir,
1543                                       gmx_bool          bGPU)
1544 {
1545     if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1546     {
1547         md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1548                       bGPU ? "GPUs" : "SIMD kernels",
1549                       bGPU ? "CPU only" : "plain-C kernels");
1550         return FALSE;
1551     }
1552
1553     return TRUE;
1554 }
1555
1556
1557 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1558                                   int                         *kernel_type,
1559                                   int                         *ewald_excl)
1560 {
1561     *kernel_type = nbnxnk4x4_PlainC;
1562     *ewald_excl  = ewaldexclTable;
1563
1564 #ifdef GMX_NBNXN_SIMD
1565     {
1566 #ifdef GMX_NBNXN_SIMD_4XN
1567         *kernel_type = nbnxnk4xN_SIMD_4xN;
1568 #endif
1569 #ifdef GMX_NBNXN_SIMD_2XNN
1570         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1571 #endif
1572
1573 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1574         /* We need to choose if we want 2x(N+N) or 4xN kernels.
1575          * Currently this is based on the SIMD acceleration choice,
1576          * but it might be better to decide this at runtime based on CPU.
1577          *
1578          * 4xN calculates more (zero) interactions, but has less pair-search
1579          * work and much better kernel instruction scheduling.
1580          *
1581          * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1582          * which doesn't have FMA, both the analytical and tabulated Ewald
1583          * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1584          * 2x(4+4) because it results in significantly fewer pairs.
1585          * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1586          * 10% with HT, 50% without HT. As we currently don't detect the actual
1587          * use of HT, use 4x8 to avoid a potential performance hit.
1588          * On Intel Haswell 4x8 is always faster.
1589          */
1590         *kernel_type = nbnxnk4xN_SIMD_4xN;
1591
1592 #ifndef GMX_SIMD_HAVE_FMA
1593         if (EEL_PME_EWALD(ir->coulombtype) ||
1594             EVDW_PME(ir->vdwtype))
1595         {
1596             /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1597              * There are enough instructions to make 2x(4+4) efficient.
1598              */
1599             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1600         }
1601 #endif
1602 #endif  /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1603
1604
1605         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1606         {
1607 #ifdef GMX_NBNXN_SIMD_4XN
1608             *kernel_type = nbnxnk4xN_SIMD_4xN;
1609 #else
1610             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1611 #endif
1612         }
1613         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1614         {
1615 #ifdef GMX_NBNXN_SIMD_2XNN
1616             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1617 #else
1618             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1619 #endif
1620         }
1621
1622         /* Analytical Ewald exclusion correction is only an option in
1623          * the SIMD kernel.
1624          * Since table lookup's don't parallelize with SIMD, analytical
1625          * will probably always be faster for a SIMD width of 8 or more.
1626          * With FMA analytical is sometimes faster for a width if 4 as well.
1627          * On BlueGene/Q, this is faster regardless of precision.
1628          * In single precision, this is faster on Bulldozer.
1629          */
1630 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1631         (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1632         defined GMX_SIMD_IBM_QPX
1633         *ewald_excl = ewaldexclAnalytical;
1634 #endif
1635         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1636         {
1637             *ewald_excl = ewaldexclTable;
1638         }
1639         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1640         {
1641             *ewald_excl = ewaldexclAnalytical;
1642         }
1643
1644     }
1645 #endif /* GMX_NBNXN_SIMD */
1646 }
1647
1648
1649 const char *lookup_nbnxn_kernel_name(int kernel_type)
1650 {
1651     const char *returnvalue = NULL;
1652     switch (kernel_type)
1653     {
1654         case nbnxnkNotSet:
1655             returnvalue = "not set";
1656             break;
1657         case nbnxnk4x4_PlainC:
1658             returnvalue = "plain C";
1659             break;
1660         case nbnxnk4xN_SIMD_4xN:
1661         case nbnxnk4xN_SIMD_2xNN:
1662 #ifdef GMX_NBNXN_SIMD
1663 #if defined GMX_SIMD_X86_SSE2
1664             returnvalue = "SSE2";
1665 #elif defined GMX_SIMD_X86_SSE4_1
1666             returnvalue = "SSE4.1";
1667 #elif defined GMX_SIMD_X86_AVX_128_FMA
1668             returnvalue = "AVX_128_FMA";
1669 #elif defined GMX_SIMD_X86_AVX_256
1670             returnvalue = "AVX_256";
1671 #elif defined GMX_SIMD_X86_AVX2_256
1672             returnvalue = "AVX2_256";
1673 #else
1674             returnvalue = "SIMD";
1675 #endif
1676 #else  /* GMX_NBNXN_SIMD */
1677             returnvalue = "not available";
1678 #endif /* GMX_NBNXN_SIMD */
1679             break;
1680         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1681         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1682
1683         case nbnxnkNR:
1684         default:
1685             gmx_fatal(FARGS, "Illegal kernel type selected");
1686             returnvalue = NULL;
1687             break;
1688     }
1689     return returnvalue;
1690 };
1691
1692 static void pick_nbnxn_kernel(FILE                *fp,
1693                               const t_commrec     *cr,
1694                               gmx_bool             use_simd_kernels,
1695                               gmx_bool             bUseGPU,
1696                               gmx_bool             bEmulateGPU,
1697                               const t_inputrec    *ir,
1698                               int                 *kernel_type,
1699                               int                 *ewald_excl,
1700                               gmx_bool             bDoNonbonded)
1701 {
1702     assert(kernel_type);
1703
1704     *kernel_type = nbnxnkNotSet;
1705     *ewald_excl  = ewaldexclTable;
1706
1707     if (bEmulateGPU)
1708     {
1709         *kernel_type = nbnxnk8x8x8_PlainC;
1710
1711         if (bDoNonbonded)
1712         {
1713             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1714         }
1715     }
1716     else if (bUseGPU)
1717     {
1718         *kernel_type = nbnxnk8x8x8_CUDA;
1719     }
1720
1721     if (*kernel_type == nbnxnkNotSet)
1722     {
1723         /* LJ PME with LB combination rule does 7 mesh operations.
1724          * This so slow that we don't compile SIMD non-bonded kernels for that.
1725          */
1726         if (use_simd_kernels &&
1727             nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1728         {
1729             pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1730         }
1731         else
1732         {
1733             *kernel_type = nbnxnk4x4_PlainC;
1734         }
1735     }
1736
1737     if (bDoNonbonded && fp != NULL)
1738     {
1739         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1740                 lookup_nbnxn_kernel_name(*kernel_type),
1741                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1742                 nbnxn_kernel_to_cj_size(*kernel_type));
1743     }
1744 }
1745
1746 static void pick_nbnxn_resources(const t_commrec     *cr,
1747                                  const gmx_hw_info_t *hwinfo,
1748                                  gmx_bool             bDoNonbonded,
1749                                  gmx_bool            *bUseGPU,
1750                                  gmx_bool            *bEmulateGPU,
1751                                  const gmx_gpu_opt_t *gpu_opt)
1752 {
1753     gmx_bool bEmulateGPUEnvVarSet;
1754     char     gpu_err_str[STRLEN];
1755
1756     *bUseGPU = FALSE;
1757
1758     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1759
1760     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1761      * GPUs (currently) only handle non-bonded calculations, we will
1762      * automatically switch to emulation if non-bonded calculations are
1763      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1764      * way to turn off GPU initialization, data movement, and cleanup.
1765      *
1766      * GPU emulation can be useful to assess the performance one can expect by
1767      * adding GPU(s) to the machine. The conditional below allows this even
1768      * if mdrun is compiled without GPU acceleration support.
1769      * Note that you should freezing the system as otherwise it will explode.
1770      */
1771     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1772                     (!bDoNonbonded &&
1773                      gpu_opt->ncuda_dev_use > 0));
1774
1775     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1776      */
1777     if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1778     {
1779         /* Each PP node will use the intra-node id-th device from the
1780          * list of detected/selected GPUs. */
1781         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1782                       &hwinfo->gpu_info, gpu_opt))
1783         {
1784             /* At this point the init should never fail as we made sure that
1785              * we have all the GPUs we need. If it still does, we'll bail. */
1786             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1787                       cr->nodeid,
1788                       get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1789                                         cr->rank_pp_intranode),
1790                       gpu_err_str);
1791         }
1792
1793         /* Here we actually turn on hardware GPU acceleration */
1794         *bUseGPU = TRUE;
1795     }
1796 }
1797
1798 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1799                             nonbonded_verlet_t *nbv,
1800                             int                 group)
1801 {
1802     gmx_bool bUsesSimpleTables = TRUE;
1803     int      grp_index;
1804
1805     switch (cutoff_scheme)
1806     {
1807         case ecutsGROUP:
1808             bUsesSimpleTables = TRUE;
1809             break;
1810         case ecutsVERLET:
1811             assert(NULL != nbv && NULL != nbv->grp);
1812             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1813             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1814             break;
1815         default:
1816             gmx_incons("unimplemented");
1817     }
1818     return bUsesSimpleTables;
1819 }
1820
1821 static void init_ewald_f_table(interaction_const_t *ic,
1822                                gmx_bool             bUsesSimpleTables,
1823                                real                 rtab)
1824 {
1825     real maxr;
1826
1827     if (bUsesSimpleTables)
1828     {
1829         /* With a spacing of 0.0005 we are at the force summation accuracy
1830          * for the SSE kernels for "normal" atomistic simulations.
1831          */
1832         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1833                                                    ic->rcoulomb);
1834
1835         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1836         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1837     }
1838     else
1839     {
1840         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1841         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1842         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1843     }
1844
1845     sfree_aligned(ic->tabq_coul_FDV0);
1846     sfree_aligned(ic->tabq_coul_F);
1847     sfree_aligned(ic->tabq_coul_V);
1848
1849     /* Create the original table data in FDV0 */
1850     snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1851     snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1852     snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1853     table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1854                                 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1855 }
1856
1857 void init_interaction_const_tables(FILE                *fp,
1858                                    interaction_const_t *ic,
1859                                    gmx_bool             bUsesSimpleTables,
1860                                    real                 rtab)
1861 {
1862     real spacing;
1863
1864     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1865     {
1866         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1867
1868         if (fp != NULL)
1869         {
1870             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1871                     1/ic->tabq_scale, ic->tabq_size);
1872         }
1873     }
1874 }
1875
1876 static void clear_force_switch_constants(shift_consts_t *sc)
1877 {
1878     sc->c2   = 0;
1879     sc->c3   = 0;
1880     sc->cpot = 0;
1881 }
1882
1883 static void force_switch_constants(real p,
1884                                    real rsw, real rc,
1885                                    shift_consts_t *sc)
1886 {
1887     /* Here we determine the coefficient for shifting the force to zero
1888      * between distance rsw and the cut-off rc.
1889      * For a potential of r^-p, we have force p*r^-(p+1).
1890      * But to save flops we absorb p in the coefficient.
1891      * Thus we get:
1892      * force/p   = r^-(p+1) + c2*r^2 + c3*r^3
1893      * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1894      */
1895     sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1896     sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1897     sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1898 }
1899
1900 static void potential_switch_constants(real rsw, real rc,
1901                                        switch_consts_t *sc)
1902 {
1903     /* The switch function is 1 at rsw and 0 at rc.
1904      * The derivative and second derivate are zero at both ends.
1905      * rsw        = max(r - r_switch, 0)
1906      * sw         = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1907      * dsw        = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1908      * force      = force*dsw - potential*sw
1909      * potential *= sw
1910      */
1911     sc->c3 = -10*pow(rc - rsw, -3);
1912     sc->c4 =  15*pow(rc - rsw, -4);
1913     sc->c5 =  -6*pow(rc - rsw, -5);
1914 }
1915
1916 static void
1917 init_interaction_const(FILE                       *fp,
1918                        const t_commrec gmx_unused *cr,
1919                        interaction_const_t       **interaction_const,
1920                        const t_forcerec           *fr,
1921                        real                        rtab)
1922 {
1923     interaction_const_t *ic;
1924     gmx_bool             bUsesSimpleTables = TRUE;
1925
1926     snew(ic, 1);
1927
1928     /* Just allocate something so we can free it */
1929     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1930     snew_aligned(ic->tabq_coul_F, 16, 32);
1931     snew_aligned(ic->tabq_coul_V, 16, 32);
1932
1933     ic->rlist           = fr->rlist;
1934     ic->rlistlong       = fr->rlistlong;
1935
1936     /* Lennard-Jones */
1937     ic->vdwtype         = fr->vdwtype;
1938     ic->vdw_modifier    = fr->vdw_modifier;
1939     ic->rvdw            = fr->rvdw;
1940     ic->rvdw_switch     = fr->rvdw_switch;
1941     ic->ewaldcoeff_lj   = fr->ewaldcoeff_lj;
1942     ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1943     ic->sh_lj_ewald     = 0;
1944     clear_force_switch_constants(&ic->dispersion_shift);
1945     clear_force_switch_constants(&ic->repulsion_shift);
1946
1947     switch (ic->vdw_modifier)
1948     {
1949         case eintmodPOTSHIFT:
1950             /* Only shift the potential, don't touch the force */
1951             ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
1952             ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
1953             if (EVDW_PME(ic->vdwtype))
1954             {
1955                 real crc2;
1956
1957                 if (fr->cutoff_scheme == ecutsGROUP)
1958                 {
1959                     gmx_fatal(FARGS, "Potential-shift is not (yet) implemented for LJ-PME with cutoff-scheme=group");
1960                 }
1961                 crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
1962                 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
1963             }
1964             break;
1965         case eintmodFORCESWITCH:
1966             /* Switch the force, switch and shift the potential */
1967             force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
1968                                    &ic->dispersion_shift);
1969             force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
1970                                    &ic->repulsion_shift);
1971             break;
1972         case eintmodPOTSWITCH:
1973             /* Switch the potential and force */
1974             potential_switch_constants(ic->rvdw_switch, ic->rvdw,
1975                                        &ic->vdw_switch);
1976             break;
1977         case eintmodNONE:
1978         case eintmodEXACTCUTOFF:
1979             /* Nothing to do here */
1980             break;
1981         default:
1982             gmx_incons("unimplemented potential modifier");
1983     }
1984
1985     ic->sh_invrc6 = -ic->dispersion_shift.cpot;
1986
1987     /* Electrostatics */
1988     ic->eeltype          = fr->eeltype;
1989     ic->coulomb_modifier = fr->coulomb_modifier;
1990     ic->rcoulomb         = fr->rcoulomb;
1991     ic->epsilon_r        = fr->epsilon_r;
1992     ic->epsfac           = fr->epsfac;
1993     ic->ewaldcoeff_q     = fr->ewaldcoeff_q;
1994
1995     if (fr->coulomb_modifier == eintmodPOTSHIFT)
1996     {
1997         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1998     }
1999     else
2000     {
2001         ic->sh_ewald = 0;
2002     }
2003
2004     /* Reaction-field */
2005     if (EEL_RF(ic->eeltype))
2006     {
2007         ic->epsilon_rf = fr->epsilon_rf;
2008         ic->k_rf       = fr->k_rf;
2009         ic->c_rf       = fr->c_rf;
2010     }
2011     else
2012     {
2013         /* For plain cut-off we might use the reaction-field kernels */
2014         ic->epsilon_rf = ic->epsilon_r;
2015         ic->k_rf       = 0;
2016         if (fr->coulomb_modifier == eintmodPOTSHIFT)
2017         {
2018             ic->c_rf   = 1/ic->rcoulomb;
2019         }
2020         else
2021         {
2022             ic->c_rf   = 0;
2023         }
2024     }
2025
2026     if (fp != NULL)
2027     {
2028         real dispersion_shift;
2029
2030         dispersion_shift = ic->dispersion_shift.cpot;
2031         if (EVDW_PME(ic->vdwtype))
2032         {
2033             dispersion_shift -= ic->sh_lj_ewald;
2034         }
2035         fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2036                 ic->repulsion_shift.cpot, dispersion_shift);
2037
2038         if (ic->eeltype == eelCUT)
2039         {
2040             fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2041         }
2042         else if (EEL_PME(ic->eeltype))
2043         {
2044             fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2045         }
2046         fprintf(fp, "\n");
2047     }
2048
2049     *interaction_const = ic;
2050
2051     if (fr->nbv != NULL && fr->nbv->bUseGPU)
2052     {
2053         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2054
2055         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2056          * also sharing texture references. To keep the code simple, we don't
2057          * treat texture references as shared resources, but this means that
2058          * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2059          * Hence, to ensure that the non-bonded kernels don't start before all
2060          * texture binding operations are finished, we need to wait for all ranks
2061          * to arrive here before continuing.
2062          *
2063          * Note that we could omit this barrier if GPUs are not shared (or
2064          * texture objects are used), but as this is initialization code, there
2065          * is not point in complicating things.
2066          */
2067 #ifdef GMX_THREAD_MPI
2068         if (PAR(cr))
2069         {
2070             gmx_barrier(cr);
2071         }
2072 #endif  /* GMX_THREAD_MPI */
2073     }
2074
2075     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2076     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2077 }
2078
2079 static void init_nb_verlet(FILE                *fp,
2080                            nonbonded_verlet_t **nb_verlet,
2081                            gmx_bool             bFEP_NonBonded,
2082                            const t_inputrec    *ir,
2083                            const t_forcerec    *fr,
2084                            const t_commrec     *cr,
2085                            const char          *nbpu_opt)
2086 {
2087     nonbonded_verlet_t *nbv;
2088     int                 i;
2089     char               *env;
2090     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
2091
2092     nbnxn_alloc_t      *nb_alloc;
2093     nbnxn_free_t       *nb_free;
2094
2095     snew(nbv, 1);
2096
2097     pick_nbnxn_resources(cr, fr->hwinfo,
2098                          fr->bNonbonded,
2099                          &nbv->bUseGPU,
2100                          &bEmulateGPU,
2101                          fr->gpu_opt);
2102
2103     nbv->nbs = NULL;
2104
2105     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2106     for (i = 0; i < nbv->ngrp; i++)
2107     {
2108         nbv->grp[i].nbl_lists.nnbl = 0;
2109         nbv->grp[i].nbat           = NULL;
2110         nbv->grp[i].kernel_type    = nbnxnkNotSet;
2111
2112         if (i == 0) /* local */
2113         {
2114             pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2115                               nbv->bUseGPU, bEmulateGPU, ir,
2116                               &nbv->grp[i].kernel_type,
2117                               &nbv->grp[i].ewald_excl,
2118                               fr->bNonbonded);
2119         }
2120         else /* non-local */
2121         {
2122             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2123             {
2124                 /* Use GPU for local, select a CPU kernel for non-local */
2125                 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2126                                   FALSE, FALSE, ir,
2127                                   &nbv->grp[i].kernel_type,
2128                                   &nbv->grp[i].ewald_excl,
2129                                   fr->bNonbonded);
2130
2131                 bHybridGPURun = TRUE;
2132             }
2133             else
2134             {
2135                 /* Use the same kernel for local and non-local interactions */
2136                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2137                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
2138             }
2139         }
2140     }
2141
2142     if (nbv->bUseGPU)
2143     {
2144         /* init the NxN GPU data; the last argument tells whether we'll have
2145          * both local and non-local NB calculation on GPU */
2146         nbnxn_cuda_init(fp, &nbv->cu_nbv,
2147                         &fr->hwinfo->gpu_info, fr->gpu_opt,
2148                         cr->rank_pp_intranode,
2149                         (nbv->ngrp > 1) && !bHybridGPURun);
2150
2151         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2152         {
2153             char *end;
2154
2155             nbv->min_ci_balanced = strtol(env, &end, 10);
2156             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2157             {
2158                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2159             }
2160
2161             if (debug)
2162             {
2163                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2164                         nbv->min_ci_balanced);
2165             }
2166         }
2167         else
2168         {
2169             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2170             if (debug)
2171             {
2172                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2173                         nbv->min_ci_balanced);
2174             }
2175         }
2176     }
2177     else
2178     {
2179         nbv->min_ci_balanced = 0;
2180     }
2181
2182     *nb_verlet = nbv;
2183
2184     nbnxn_init_search(&nbv->nbs,
2185                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2186                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2187                       bFEP_NonBonded,
2188                       gmx_omp_nthreads_get(emntNonbonded));
2189
2190     for (i = 0; i < nbv->ngrp; i++)
2191     {
2192         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2193         {
2194             nb_alloc = &pmalloc;
2195             nb_free  = &pfree;
2196         }
2197         else
2198         {
2199             nb_alloc = NULL;
2200             nb_free  = NULL;
2201         }
2202
2203         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2204                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2205                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2206                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2207                                 nb_alloc, nb_free);
2208
2209         if (i == 0 ||
2210             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2211         {
2212             gmx_bool bSimpleList;
2213             int      enbnxninitcombrule;
2214
2215             bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2216
2217             if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2218             {
2219                 /* Plain LJ cut-off: we can optimize with combination rules */
2220                 enbnxninitcombrule = enbnxninitcombruleDETECT;
2221             }
2222             else if (fr->vdwtype == evdwPME)
2223             {
2224                 /* LJ-PME: we need to use a combination rule for the grid */
2225                 if (fr->ljpme_combination_rule == eljpmeGEOM)
2226                 {
2227                     enbnxninitcombrule = enbnxninitcombruleGEOM;
2228                 }
2229                 else
2230                 {
2231                     enbnxninitcombrule = enbnxninitcombruleLB;
2232                 }
2233             }
2234             else
2235             {
2236                 /* We use a full combination matrix: no rule required */
2237                 enbnxninitcombrule = enbnxninitcombruleNONE;
2238             }
2239
2240
2241             snew(nbv->grp[i].nbat, 1);
2242             nbnxn_atomdata_init(fp,
2243                                 nbv->grp[i].nbat,
2244                                 nbv->grp[i].kernel_type,
2245                                 enbnxninitcombrule,
2246                                 fr->ntype, fr->nbfp,
2247                                 ir->opts.ngener,
2248                                 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2249                                 nb_alloc, nb_free);
2250         }
2251         else
2252         {
2253             nbv->grp[i].nbat = nbv->grp[0].nbat;
2254         }
2255     }
2256 }
2257
2258 void init_forcerec(FILE              *fp,
2259                    const output_env_t oenv,
2260                    t_forcerec        *fr,
2261                    t_fcdata          *fcd,
2262                    const t_inputrec  *ir,
2263                    const gmx_mtop_t  *mtop,
2264                    const t_commrec   *cr,
2265                    matrix             box,
2266                    const char        *tabfn,
2267                    const char        *tabafn,
2268                    const char        *tabpfn,
2269                    const char        *tabbfn,
2270                    const char        *nbpu_opt,
2271                    gmx_bool           bNoSolvOpt,
2272                    real               print_force)
2273 {
2274     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2275     real           rtab;
2276     char          *env;
2277     double         dbl;
2278     const t_block *cgs;
2279     gmx_bool       bGenericKernelOnly;
2280     gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2281     gmx_bool       bFEP_NonBonded;
2282     t_nblists     *nbl;
2283     int           *nm_ind, egp_flags;
2284
2285     if (fr->hwinfo == NULL)
2286     {
2287         /* Detect hardware, gather information.
2288          * In mdrun, hwinfo has already been set before calling init_forcerec.
2289          * Here we ignore GPUs, as tools will not use them anyhow.
2290          */
2291         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2292     }
2293
2294     /* By default we turn SIMD kernels on, but it might be turned off further down... */
2295     fr->use_simd_kernels = TRUE;
2296
2297     fr->bDomDec = DOMAINDECOMP(cr);
2298
2299     natoms = mtop->natoms;
2300
2301     if (check_box(ir->ePBC, box))
2302     {
2303         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2304     }
2305
2306     /* Test particle insertion ? */
2307     if (EI_TPI(ir->eI))
2308     {
2309         /* Set to the size of the molecule to be inserted (the last one) */
2310         /* Because of old style topologies, we have to use the last cg
2311          * instead of the last molecule type.
2312          */
2313         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2314         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2315         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2316         {
2317             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2318         }
2319     }
2320     else
2321     {
2322         fr->n_tpi = 0;
2323     }
2324
2325     /* Copy AdResS parameters */
2326     if (ir->bAdress)
2327     {
2328         fr->adress_type           = ir->adress->type;
2329         fr->adress_const_wf       = ir->adress->const_wf;
2330         fr->adress_ex_width       = ir->adress->ex_width;
2331         fr->adress_hy_width       = ir->adress->hy_width;
2332         fr->adress_icor           = ir->adress->icor;
2333         fr->adress_site           = ir->adress->site;
2334         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2335         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2336
2337
2338         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2339         for (i = 0; i < ir->adress->n_energy_grps; i++)
2340         {
2341             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2342         }
2343
2344         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2345         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2346         for (i = 0; i < fr->n_adress_tf_grps; i++)
2347         {
2348             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2349         }
2350         copy_rvec(ir->adress->refs, fr->adress_refs);
2351     }
2352     else
2353     {
2354         fr->adress_type           = eAdressOff;
2355         fr->adress_do_hybridpairs = FALSE;
2356     }
2357
2358     /* Copy the user determined parameters */
2359     fr->userint1  = ir->userint1;
2360     fr->userint2  = ir->userint2;
2361     fr->userint3  = ir->userint3;
2362     fr->userint4  = ir->userint4;
2363     fr->userreal1 = ir->userreal1;
2364     fr->userreal2 = ir->userreal2;
2365     fr->userreal3 = ir->userreal3;
2366     fr->userreal4 = ir->userreal4;
2367
2368     /* Shell stuff */
2369     fr->fc_stepsize = ir->fc_stepsize;
2370
2371     /* Free energy */
2372     fr->efep        = ir->efep;
2373     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2374     if (ir->fepvals->bScCoul)
2375     {
2376         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2377         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2378     }
2379     else
2380     {
2381         fr->sc_alphacoul  = 0;
2382         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2383     }
2384     fr->sc_power      = ir->fepvals->sc_power;
2385     fr->sc_r_power    = ir->fepvals->sc_r_power;
2386     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2387
2388     env = getenv("GMX_SCSIGMA_MIN");
2389     if (env != NULL)
2390     {
2391         dbl = 0;
2392         sscanf(env, "%lf", &dbl);
2393         fr->sc_sigma6_min = pow(dbl, 6);
2394         if (fp)
2395         {
2396             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2397         }
2398     }
2399
2400     fr->bNonbonded = TRUE;
2401     if (getenv("GMX_NO_NONBONDED") != NULL)
2402     {
2403         /* turn off non-bonded calculations */
2404         fr->bNonbonded = FALSE;
2405         md_print_warn(cr, fp,
2406                       "Found environment variable GMX_NO_NONBONDED.\n"
2407                       "Disabling nonbonded calculations.\n");
2408     }
2409
2410     bGenericKernelOnly = FALSE;
2411
2412     /* We now check in the NS code whether a particular combination of interactions
2413      * can be used with water optimization, and disable it if that is not the case.
2414      */
2415
2416     if (getenv("GMX_NB_GENERIC") != NULL)
2417     {
2418         if (fp != NULL)
2419         {
2420             fprintf(fp,
2421                     "Found environment variable GMX_NB_GENERIC.\n"
2422                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2423                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2424         }
2425         bGenericKernelOnly = TRUE;
2426     }
2427
2428     if (bGenericKernelOnly == TRUE)
2429     {
2430         bNoSolvOpt         = TRUE;
2431     }
2432
2433     if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2434     {
2435         fr->use_simd_kernels = FALSE;
2436         if (fp != NULL)
2437         {
2438             fprintf(fp,
2439                     "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2440                     "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2441         }
2442     }
2443
2444     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2445
2446     /* Check if we can/should do all-vs-all kernels */
2447     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
2448     fr->AllvsAll_work   = NULL;
2449     fr->AllvsAll_workgb = NULL;
2450
2451     /* All-vs-all kernels have not been implemented in 4.6, and
2452      * the SIMD group kernels are also buggy in this case. Non-SIMD
2453      * group kernels are OK. See Redmine #1249. */
2454     if (fr->bAllvsAll)
2455     {
2456         fr->bAllvsAll            = FALSE;
2457         fr->use_simd_kernels     = FALSE;
2458         if (fp != NULL)
2459         {
2460             fprintf(fp,
2461                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2462                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2463                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2464                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2465                     "we are proceeding by disabling all CPU architecture-specific\n"
2466                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2467                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2468         }
2469     }
2470
2471     /* Neighbour searching stuff */
2472     fr->cutoff_scheme = ir->cutoff_scheme;
2473     fr->bGrid         = (ir->ns_type == ensGRID);
2474     fr->ePBC          = ir->ePBC;
2475
2476     if (fr->cutoff_scheme == ecutsGROUP)
2477     {
2478         const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2479             "removed in a future release when 'verlet' supports all interaction forms.\n";
2480
2481         if (MASTER(cr))
2482         {
2483             fprintf(stderr, "\n%s\n", note);
2484         }
2485         if (fp != NULL)
2486         {
2487             fprintf(fp, "\n%s\n", note);
2488         }
2489     }
2490
2491     /* Determine if we will do PBC for distances in bonded interactions */
2492     if (fr->ePBC == epbcNONE)
2493     {
2494         fr->bMolPBC = FALSE;
2495     }
2496     else
2497     {
2498         if (!DOMAINDECOMP(cr))
2499         {
2500             /* The group cut-off scheme and SHAKE assume charge groups
2501              * are whole, but not using molpbc is faster in most cases.
2502              */
2503             if (fr->cutoff_scheme == ecutsGROUP ||
2504                 (ir->eConstrAlg == econtSHAKE &&
2505                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2506                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2507             {
2508                 fr->bMolPBC = ir->bPeriodicMols;
2509             }
2510             else
2511             {
2512                 fr->bMolPBC = TRUE;
2513                 if (getenv("GMX_USE_GRAPH") != NULL)
2514                 {
2515                     fr->bMolPBC = FALSE;
2516                     if (fp)
2517                     {
2518                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2519                     }
2520                 }
2521             }
2522         }
2523         else
2524         {
2525             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2526         }
2527     }
2528     fr->bGB = (ir->implicit_solvent == eisGBSA);
2529
2530     fr->rc_scaling = ir->refcoord_scaling;
2531     copy_rvec(ir->posres_com, fr->posres_com);
2532     copy_rvec(ir->posres_comB, fr->posres_comB);
2533     fr->rlist                    = cutoff_inf(ir->rlist);
2534     fr->rlistlong                = cutoff_inf(ir->rlistlong);
2535     fr->eeltype                  = ir->coulombtype;
2536     fr->vdwtype                  = ir->vdwtype;
2537     fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
2538
2539     fr->coulomb_modifier = ir->coulomb_modifier;
2540     fr->vdw_modifier     = ir->vdw_modifier;
2541
2542     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2543     switch (fr->eeltype)
2544     {
2545         case eelCUT:
2546             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2547             break;
2548
2549         case eelRF:
2550         case eelGRF:
2551         case eelRF_NEC:
2552             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2553             break;
2554
2555         case eelRF_ZERO:
2556             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2557             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2558             break;
2559
2560         case eelSWITCH:
2561         case eelSHIFT:
2562         case eelUSER:
2563         case eelENCADSHIFT:
2564         case eelPMESWITCH:
2565         case eelPMEUSER:
2566         case eelPMEUSERSWITCH:
2567             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2568             break;
2569
2570         case eelPME:
2571         case eelEWALD:
2572             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2573             break;
2574
2575         default:
2576             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2577             break;
2578     }
2579
2580     /* Vdw: Translate from mdp settings to kernel format */
2581     switch (fr->vdwtype)
2582     {
2583         case evdwCUT:
2584         case evdwPME:
2585             if (fr->bBHAM)
2586             {
2587                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2588             }
2589             else
2590             {
2591                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2592             }
2593             break;
2594
2595         case evdwSWITCH:
2596         case evdwSHIFT:
2597         case evdwUSER:
2598         case evdwENCADSHIFT:
2599             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2600             break;
2601
2602         default:
2603             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2604             break;
2605     }
2606
2607     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2608     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2609     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2610
2611     fr->bTwinRange = fr->rlistlong > fr->rlist;
2612     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2613
2614     fr->reppow     = mtop->ffparams.reppow;
2615
2616     if (ir->cutoff_scheme == ecutsGROUP)
2617     {
2618         fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
2619                           !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2620         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2621         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2622                            fr->eeltype == eelEWALD ||
2623                            fr->eeltype == eelPME ||
2624                            fr->eeltype == eelRF ||
2625                            fr->eeltype == eelRF_ZERO);
2626
2627         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2628          * going to be faster to tabulate the interaction than calling the generic kernel.
2629          */
2630         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2631         {
2632             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2633             {
2634                 fr->bcoultab = TRUE;
2635             }
2636         }
2637         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2638                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2639                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2640                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2641         {
2642             if (fr->rcoulomb != fr->rvdw)
2643             {
2644                 fr->bcoultab = TRUE;
2645             }
2646         }
2647
2648         if (getenv("GMX_REQUIRE_TABLES"))
2649         {
2650             fr->bvdwtab  = TRUE;
2651             fr->bcoultab = TRUE;
2652         }
2653
2654         if (fp)
2655         {
2656             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2657             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2658         }
2659
2660         if (fr->bvdwtab == TRUE)
2661         {
2662             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2663             fr->nbkernel_vdw_modifier    = eintmodNONE;
2664         }
2665         if (fr->bcoultab == TRUE)
2666         {
2667             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2668             fr->nbkernel_elec_modifier    = eintmodNONE;
2669         }
2670     }
2671
2672     if (ir->cutoff_scheme == ecutsVERLET)
2673     {
2674         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2675         {
2676             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2677         }
2678         fr->bvdwtab  = FALSE;
2679         fr->bcoultab = FALSE;
2680     }
2681
2682     /* Tables are used for direct ewald sum */
2683     if (fr->bEwald)
2684     {
2685         if (EEL_PME(ir->coulombtype))
2686         {
2687             if (fp)
2688             {
2689                 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2690             }
2691             if (ir->coulombtype == eelP3M_AD)
2692             {
2693                 please_cite(fp, "Hockney1988");
2694                 please_cite(fp, "Ballenegger2012");
2695             }
2696             else
2697             {
2698                 please_cite(fp, "Essmann95a");
2699             }
2700
2701             if (ir->ewald_geometry == eewg3DC)
2702             {
2703                 if (fp)
2704                 {
2705                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2706                 }
2707                 please_cite(fp, "In-Chul99a");
2708             }
2709         }
2710         fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2711         init_ewald_tab(&(fr->ewald_table), ir, fp);
2712         if (fp)
2713         {
2714             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2715                     1/fr->ewaldcoeff_q);
2716         }
2717     }
2718
2719     if (EVDW_PME(ir->vdwtype))
2720     {
2721         if (fp)
2722         {
2723             fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2724         }
2725         please_cite(fp, "Essmann95a");
2726         fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2727         if (fp)
2728         {
2729             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2730                     1/fr->ewaldcoeff_lj);
2731         }
2732     }
2733
2734     /* Electrostatics */
2735     fr->epsilon_r       = ir->epsilon_r;
2736     fr->epsilon_rf      = ir->epsilon_rf;
2737     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2738     fr->rcoulomb_switch = ir->rcoulomb_switch;
2739     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2740
2741     /* Parameters for generalized RF */
2742     fr->zsquare = 0.0;
2743     fr->temp    = 0.0;
2744
2745     if (fr->eeltype == eelGRF)
2746     {
2747         init_generalized_rf(fp, mtop, ir, fr);
2748     }
2749
2750     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2751                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2752                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2753                        IR_ELEC_FIELD(*ir) ||
2754                        (fr->adress_icor != eAdressICOff)
2755                        );
2756
2757     if (fr->cutoff_scheme == ecutsGROUP &&
2758         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2759     {
2760         /* Count the total number of charge groups */
2761         fr->cg_nalloc = ncg_mtop(mtop);
2762         srenew(fr->cg_cm, fr->cg_nalloc);
2763     }
2764     if (fr->shift_vec == NULL)
2765     {
2766         snew(fr->shift_vec, SHIFTS);
2767     }
2768
2769     if (fr->fshift == NULL)
2770     {
2771         snew(fr->fshift, SHIFTS);
2772     }
2773
2774     if (fr->nbfp == NULL)
2775     {
2776         fr->ntype = mtop->ffparams.atnr;
2777         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2778     }
2779
2780     /* Copy the energy group exclusions */
2781     fr->egp_flags = ir->opts.egp_flags;
2782
2783     /* Van der Waals stuff */
2784     fr->rvdw        = cutoff_inf(ir->rvdw);
2785     fr->rvdw_switch = ir->rvdw_switch;
2786     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2787     {
2788         if (fr->rvdw_switch >= fr->rvdw)
2789         {
2790             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2791                       fr->rvdw_switch, fr->rvdw);
2792         }
2793         if (fp)
2794         {
2795             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2796                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2797                     fr->rvdw_switch, fr->rvdw);
2798         }
2799     }
2800
2801     if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2802     {
2803         gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2804     }
2805
2806     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2807     {
2808         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2809     }
2810
2811     if (fp)
2812     {
2813         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2814                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2815     }
2816
2817     fr->eDispCorr = ir->eDispCorr;
2818     if (ir->eDispCorr != edispcNO)
2819     {
2820         set_avcsixtwelve(fp, fr, mtop);
2821     }
2822
2823     if (fr->bBHAM)
2824     {
2825         set_bham_b_max(fp, fr, mtop);
2826     }
2827
2828     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2829
2830     /* Copy the GBSA data (radius, volume and surftens for each
2831      * atomtype) from the topology atomtype section to forcerec.
2832      */
2833     snew(fr->atype_radius, fr->ntype);
2834     snew(fr->atype_vol, fr->ntype);
2835     snew(fr->atype_surftens, fr->ntype);
2836     snew(fr->atype_gb_radius, fr->ntype);
2837     snew(fr->atype_S_hct, fr->ntype);
2838
2839     if (mtop->atomtypes.nr > 0)
2840     {
2841         for (i = 0; i < fr->ntype; i++)
2842         {
2843             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2844         }
2845         for (i = 0; i < fr->ntype; i++)
2846         {
2847             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2848         }
2849         for (i = 0; i < fr->ntype; i++)
2850         {
2851             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2852         }
2853         for (i = 0; i < fr->ntype; i++)
2854         {
2855             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2856         }
2857         for (i = 0; i < fr->ntype; i++)
2858         {
2859             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2860         }
2861     }
2862
2863     /* Generate the GB table if needed */
2864     if (fr->bGB)
2865     {
2866 #ifdef GMX_DOUBLE
2867         fr->gbtabscale = 2000;
2868 #else
2869         fr->gbtabscale = 500;
2870 #endif
2871
2872         fr->gbtabr = 100;
2873         fr->gbtab  = make_gb_table(oenv, fr);
2874
2875         init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2876
2877         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2878         if (!DOMAINDECOMP(cr))
2879         {
2880             make_local_gb(cr, fr->born, ir->gb_algorithm);
2881         }
2882     }
2883
2884     /* Set the charge scaling */
2885     if (fr->epsilon_r != 0)
2886     {
2887         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2888     }
2889     else
2890     {
2891         /* eps = 0 is infinite dieletric: no coulomb interactions */
2892         fr->epsfac = 0;
2893     }
2894
2895     /* Reaction field constants */
2896     if (EEL_RF(fr->eeltype))
2897     {
2898         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2899                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2900                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2901     }
2902
2903     /*This now calculates sum for q and c6*/
2904     set_chargesum(fp, fr, mtop);
2905
2906     /* if we are using LR electrostatics, and they are tabulated,
2907      * the tables will contain modified coulomb interactions.
2908      * Since we want to use the non-shifted ones for 1-4
2909      * coulombic interactions, we must have an extra set of tables.
2910      */
2911
2912     /* Construct tables.
2913      * A little unnecessary to make both vdw and coul tables sometimes,
2914      * but what the heck... */
2915
2916     bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2917         (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2918
2919     bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2920                              fr->bBHAM || fr->bEwald) &&
2921                             (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2922                              gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2923                              gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2924
2925     negp_pp   = ir->opts.ngener - ir->nwall;
2926     negptable = 0;
2927     if (!bMakeTables)
2928     {
2929         bSomeNormalNbListsAreInUse = TRUE;
2930         fr->nnblists               = 1;
2931     }
2932     else
2933     {
2934         bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2935         for (egi = 0; egi < negp_pp; egi++)
2936         {
2937             for (egj = egi; egj < negp_pp; egj++)
2938             {
2939                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2940                 if (!(egp_flags & EGP_EXCL))
2941                 {
2942                     if (egp_flags & EGP_TABLE)
2943                     {
2944                         negptable++;
2945                     }
2946                     else
2947                     {
2948                         bSomeNormalNbListsAreInUse = TRUE;
2949                     }
2950                 }
2951             }
2952         }
2953         if (bSomeNormalNbListsAreInUse)
2954         {
2955             fr->nnblists = negptable + 1;
2956         }
2957         else
2958         {
2959             fr->nnblists = negptable;
2960         }
2961         if (fr->nnblists > 1)
2962         {
2963             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2964         }
2965     }
2966
2967     if (ir->adress)
2968     {
2969         fr->nnblists *= 2;
2970     }
2971
2972     snew(fr->nblists, fr->nnblists);
2973
2974     /* This code automatically gives table length tabext without cut-off's,
2975      * in that case grompp should already have checked that we do not need
2976      * normal tables and we only generate tables for 1-4 interactions.
2977      */
2978     rtab = ir->rlistlong + ir->tabext;
2979
2980     if (bMakeTables)
2981     {
2982         /* make tables for ordinary interactions */
2983         if (bSomeNormalNbListsAreInUse)
2984         {
2985             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2986             if (ir->adress)
2987             {
2988                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2989             }
2990             if (!bMakeSeparate14Table)
2991             {
2992                 fr->tab14 = fr->nblists[0].table_elec_vdw;
2993             }
2994             m = 1;
2995         }
2996         else
2997         {
2998             m = 0;
2999         }
3000         if (negptable > 0)
3001         {
3002             /* Read the special tables for certain energy group pairs */
3003             nm_ind = mtop->groups.grps[egcENER].nm_ind;
3004             for (egi = 0; egi < negp_pp; egi++)
3005             {
3006                 for (egj = egi; egj < negp_pp; egj++)
3007                 {
3008                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3009                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3010                     {
3011                         nbl = &(fr->nblists[m]);
3012                         if (fr->nnblists > 1)
3013                         {
3014                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3015                         }
3016                         /* Read the table file with the two energy groups names appended */
3017                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3018                                         *mtop->groups.grpname[nm_ind[egi]],
3019                                         *mtop->groups.grpname[nm_ind[egj]],
3020                                         &fr->nblists[m]);
3021                         if (ir->adress)
3022                         {
3023                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3024                                             *mtop->groups.grpname[nm_ind[egi]],
3025                                             *mtop->groups.grpname[nm_ind[egj]],
3026                                             &fr->nblists[fr->nnblists/2+m]);
3027                         }
3028                         m++;
3029                     }
3030                     else if (fr->nnblists > 1)
3031                     {
3032                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3033                     }
3034                 }
3035             }
3036         }
3037     }
3038     if (bMakeSeparate14Table)
3039     {
3040         /* generate extra tables with plain Coulomb for 1-4 interactions only */
3041         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3042                                 GMX_MAKETABLES_14ONLY);
3043     }
3044
3045     /* Read AdResS Thermo Force table if needed */
3046     if (fr->adress_icor == eAdressICThermoForce)
3047     {
3048         /* old todo replace */
3049
3050         if (ir->adress->n_tf_grps > 0)
3051         {
3052             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3053
3054         }
3055         else
3056         {
3057             /* load the default table */
3058             snew(fr->atf_tabs, 1);
3059             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3060         }
3061     }
3062
3063     /* Wall stuff */
3064     fr->nwall = ir->nwall;
3065     if (ir->nwall && ir->wall_type == ewtTABLE)
3066     {
3067         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3068     }
3069
3070     if (fcd && tabbfn)
3071     {
3072         fcd->bondtab  = make_bonded_tables(fp,
3073                                            F_TABBONDS, F_TABBONDSNC,
3074                                            mtop, tabbfn, "b");
3075         fcd->angletab = make_bonded_tables(fp,
3076                                            F_TABANGLES, -1,
3077                                            mtop, tabbfn, "a");
3078         fcd->dihtab   = make_bonded_tables(fp,
3079                                            F_TABDIHS, -1,
3080                                            mtop, tabbfn, "d");
3081     }
3082     else
3083     {
3084         if (debug)
3085         {
3086             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3087         }
3088     }
3089
3090     /* QM/MM initialization if requested
3091      */
3092     if (ir->bQMMM)
3093     {
3094         fprintf(stderr, "QM/MM calculation requested.\n");
3095     }
3096
3097     fr->bQMMM      = ir->bQMMM;
3098     fr->qr         = mk_QMMMrec();
3099
3100     /* Set all the static charge group info */
3101     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3102                                    &bFEP_NonBonded,
3103                                    &fr->bExcl_IntraCGAll_InterCGNone);
3104     if (DOMAINDECOMP(cr))
3105     {
3106         fr->cginfo = NULL;
3107     }
3108     else
3109     {
3110         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3111     }
3112
3113     if (!DOMAINDECOMP(cr))
3114     {
3115         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3116                             mtop->natoms, mtop->natoms, mtop->natoms);
3117     }
3118
3119     fr->print_force = print_force;
3120
3121
3122     /* coarse load balancing vars */
3123     fr->t_fnbf    = 0.;
3124     fr->t_wait    = 0.;
3125     fr->timesteps = 0;
3126
3127     /* Initialize neighbor search */
3128     init_ns(fp, cr, &fr->ns, fr, mtop);
3129
3130     if (cr->duty & DUTY_PP)
3131     {
3132         gmx_nonbonded_setup(fr, bGenericKernelOnly);
3133         /*
3134            if (ir->bAdress)
3135             {
3136                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3137             }
3138          */
3139     }
3140
3141     /* Initialize the thread working data for bonded interactions */
3142     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3143
3144     snew(fr->excl_load, fr->nthreads+1);
3145
3146     if (fr->cutoff_scheme == ecutsVERLET)
3147     {
3148         if (ir->rcoulomb != ir->rvdw)
3149         {
3150             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3151         }
3152
3153         init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3154     }
3155
3156     /* fr->ic is used both by verlet and group kernels (to some extent) now */
3157     init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3158
3159     if (ir->eDispCorr != edispcNO)
3160     {
3161         calc_enervirdiff(fp, ir->eDispCorr, fr);
3162     }
3163 }
3164
3165 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3166 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
3167 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3168
3169 void pr_forcerec(FILE *fp, t_forcerec *fr)
3170 {
3171     int i;
3172
3173     pr_real(fp, fr->rlist);
3174     pr_real(fp, fr->rcoulomb);
3175     pr_real(fp, fr->fudgeQQ);
3176     pr_bool(fp, fr->bGrid);
3177     pr_bool(fp, fr->bTwinRange);
3178     /*pr_int(fp,fr->cg0);
3179        pr_int(fp,fr->hcg);*/
3180     for (i = 0; i < fr->nnblists; i++)
3181     {
3182         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3183     }
3184     pr_real(fp, fr->rcoulomb_switch);
3185     pr_real(fp, fr->rcoulomb);
3186
3187     fflush(fp);
3188 }
3189
3190 void forcerec_set_excl_load(t_forcerec           *fr,
3191                             const gmx_localtop_t *top)
3192 {
3193     const int *ind, *a;
3194     int        t, i, j, ntot, n, ntarget;
3195
3196     ind = top->excls.index;
3197     a   = top->excls.a;
3198
3199     ntot = 0;
3200     for (i = 0; i < top->excls.nr; i++)
3201     {
3202         for (j = ind[i]; j < ind[i+1]; j++)
3203         {
3204             if (a[j] > i)
3205             {
3206                 ntot++;
3207             }
3208         }
3209     }
3210
3211     fr->excl_load[0] = 0;
3212     n                = 0;
3213     i                = 0;
3214     for (t = 1; t <= fr->nthreads; t++)
3215     {
3216         ntarget = (ntot*t)/fr->nthreads;
3217         while (i < top->excls.nr && n < ntarget)
3218         {
3219             for (j = ind[i]; j < ind[i+1]; j++)
3220             {
3221                 if (a[j] > i)
3222                 {
3223                     n++;
3224                 }
3225             }
3226             i++;
3227         }
3228         fr->excl_load[t] = i;
3229     }
3230 }