src/gromacs/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "typedefs.h"
  45 #include "types/commrec.h"
  46 #include "gromacs/math/vec.h"
  47 #include "gromacs/math/utilities.h"
  48 #include "macros.h"
  49 #include "gromacs/utility/smalloc.h"
  50 #include "macros.h"
  51 #include "gromacs/utility/fatalerror.h"
  52 #include "gromacs/math/units.h"
  53 #include "force.h"
  54 #include "tables.h"
  55 #include "nonbonded.h"
  56 #include "invblock.h"
  57 #include "names.h"
  58 #include "network.h"
  59 #include "pbc.h"
  60 #include "ns.h"
  61 #include "txtdump.h"
  62 #include "coulomb.h"
  63 #include "md_support.h"
  64 #include "md_logging.h"
  65 #include "domdec.h"
  66 #include "qmmm.h"
  67 #include "copyrite.h"
  68 #include "mtop_util.h"
  69 #include "nbnxn_simd.h"
  70 #include "nbnxn_search.h"
  71 #include "nbnxn_atomdata.h"
  72 #include "nbnxn_consts.h"
  73 #include "gmx_omp_nthreads.h"
  74 #include "gmx_detect_hardware.h"
  75 #include "inputrec.h"
  76
  77 #include "types/nbnxn_cuda_types_ext.h"
  78 #include "gpu_utils.h"
  79 #include "nbnxn_cuda_data_mgmt.h"
  80 #include "pmalloc_cuda.h"
  81
  82 t_forcerec *mk_forcerec(void)
  83 {
  84     t_forcerec *fr;
  85
  86     snew(fr, 1);
  87
  88     return fr;
  89 }
  90
  91 #ifdef DEBUG
  92 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
  93 {
  94     int i, j;
  95
  96     for (i = 0; (i < atnr); i++)
  97     {
  98         for (j = 0; (j < atnr); j++)
  99         {
 100             fprintf(fp, "%2d - %2d", i, j);
 101             if (bBHAM)
 102             {
 103                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 104                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 105             }
 106             else
 107             {
 108                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 109                         C12(nbfp, atnr, i, j)/12.0);
 110             }
 111         }
 112     }
 113 }
 114 #endif
 115
 116 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 117 {
 118     real *nbfp;
 119     int   i, j, k, atnr;
 120
 121     atnr = idef->atnr;
 122     if (bBHAM)
 123     {
 124         snew(nbfp, 3*atnr*atnr);
 125         for (i = k = 0; (i < atnr); i++)
 126         {
 127             for (j = 0; (j < atnr); j++, k++)
 128             {
 129                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 130                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 131                 /* nbfp now includes the 6.0 derivative prefactor */
 132                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 133             }
 134         }
 135     }
 136     else
 137     {
 138         snew(nbfp, 2*atnr*atnr);
 139         for (i = k = 0; (i < atnr); i++)
 140         {
 141             for (j = 0; (j < atnr); j++, k++)
 142             {
 143                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 144                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 145                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 146             }
 147         }
 148     }
 149
 150     return nbfp;
 151 }
 152
 153 static real *make_ljpme_c6grid(const gmx_ffparams_t *idef, t_forcerec *fr)
 154 {
 155     int   i, j, k, atnr;
 156     real  c6, c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 157     real *grid;
 158
 159     /* For LJ-PME simulations, we correct the energies with the reciprocal space
 160      * inside of the cut-off. To do this the non-bonded kernels needs to have
 161      * access to the C6-values used on the reciprocal grid in pme.c
 162      */
 163
 164     atnr = idef->atnr;
 165     snew(grid, 2*atnr*atnr);
 166     for (i = k = 0; (i < atnr); i++)
 167     {
 168         for (j = 0; (j < atnr); j++, k++)
 169         {
 170             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 171             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 172             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 173             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 174             c6   = sqrt(c6i * c6j);
 175             if (fr->ljpme_combination_rule == eljpmeLB
 176                 && !gmx_numzero(c6) && !gmx_numzero(c12i) && !gmx_numzero(c12j))
 177             {
 178                 sigmai = pow(c12i / c6i, 1.0/6.0);
 179                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 180                 epsi   = c6i * c6i / c12i;
 181                 epsj   = c6j * c6j / c12j;
 182                 c6     = sqrt(epsi * epsj) * pow(0.5*(sigmai+sigmaj), 6);
 183             }
 184             /* Store the elements at the same relative positions as C6 in nbfp in order
 185              * to simplify access in the kernels
 186              */
 187             grid[2*(atnr*i+j)] = c6*6.0;
 188         }
 189     }
 190     return grid;
 191 }
 192
 193 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 194 {
 195     real *nbfp;
 196     int   i, j, k, atnr;
 197     real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 198     real  c6, c12;
 199
 200     atnr = idef->atnr;
 201     snew(nbfp, 2*atnr*atnr);
 202     for (i = 0; i < atnr; ++i)
 203     {
 204         for (j = 0; j < atnr; ++j)
 205         {
 206             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 207             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 208             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 209             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 210             c6   = sqrt(c6i  * c6j);
 211             c12  = sqrt(c12i * c12j);
 212             if (comb_rule == eCOMB_ARITHMETIC
 213                 && !gmx_numzero(c6) && !gmx_numzero(c12))
 214             {
 215                 sigmai = pow(c12i / c6i, 1.0/6.0);
 216                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 217                 epsi   = c6i * c6i / c12i;
 218                 epsj   = c6j * c6j / c12j;
 219                 c6     = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
 220                 c12    = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
 221             }
 222             C6(nbfp, atnr, i, j)   = c6*6.0;
 223             C12(nbfp, atnr, i, j)  = c12*12.0;
 224         }
 225     }
 226     return nbfp;
 227 }
 228
 229 /* This routine sets fr->solvent_opt to the most common solvent in the
 230  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 231  * the fr->solvent_type array with the correct type (or esolNO).
 232  *
 233  * Charge groups that fulfill the conditions but are not identical to the
 234  * most common one will be marked as esolNO in the solvent_type array.
 235  *
 236  * TIP3p is identical to SPC for these purposes, so we call it
 237  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 238  *
 239  * NOTE: QM particle should not
 240  * become an optimized solvent. Not even if there is only one charge
 241  * group in the Qm
 242  */
 243
 244 typedef struct
 245 {
 246     int    model;
 247     int    count;
 248     int    vdwtype[4];
 249     real   charge[4];
 250 } solvent_parameters_t;
 251
 252 static void
 253 check_solvent_cg(const gmx_moltype_t    *molt,
 254                  int                     cg0,
 255                  int                     nmol,
 256                  const unsigned char    *qm_grpnr,
 257                  const t_grps           *qm_grps,
 258                  t_forcerec   *          fr,
 259                  int                    *n_solvent_parameters,
 260                  solvent_parameters_t  **solvent_parameters_p,
 261                  int                     cginfo,
 262                  int                    *cg_sp)
 263 {
 264     const t_blocka       *excl;
 265     t_atom               *atom;
 266     int                   j, k;
 267     int                   j0, j1, nj;
 268     gmx_bool              perturbed;
 269     gmx_bool              has_vdw[4];
 270     gmx_bool              match;
 271     real                  tmp_charge[4]  = { 0.0 }; /* init to zero to make gcc4.8 happy */
 272     int                   tmp_vdwtype[4] = { 0 };   /* init to zero to make gcc4.8 happy */
 273     int                   tjA;
 274     gmx_bool              qm;
 275     solvent_parameters_t *solvent_parameters;
 276
 277     /* We use a list with parameters for each solvent type.
 278      * Every time we discover a new molecule that fulfills the basic
 279      * conditions for a solvent we compare with the previous entries
 280      * in these lists. If the parameters are the same we just increment
 281      * the counter for that type, and otherwise we create a new type
 282      * based on the current molecule.
 283      *
 284      * Once we've finished going through all molecules we check which
 285      * solvent is most common, and mark all those molecules while we
 286      * clear the flag on all others.
 287      */
 288
 289     solvent_parameters = *solvent_parameters_p;
 290
 291     /* Mark the cg first as non optimized */
 292     *cg_sp = -1;
 293
 294     /* Check if this cg has no exclusions with atoms in other charge groups
 295      * and all atoms inside the charge group excluded.
 296      * We only have 3 or 4 atom solvent loops.
 297      */
 298     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 299         !GET_CGINFO_EXCL_INTRA(cginfo))
 300     {
 301         return;
 302     }
 303
 304     /* Get the indices of the first atom in this charge group */
 305     j0     = molt->cgs.index[cg0];
 306     j1     = molt->cgs.index[cg0+1];
 307
 308     /* Number of atoms in our molecule */
 309     nj     = j1 - j0;
 310
 311     if (debug)
 312     {
 313         fprintf(debug,
 314                 "Moltype '%s': there are %d atoms in this charge group\n",
 315                 *molt->name, nj);
 316     }
 317
 318     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 319      * otherwise skip it.
 320      */
 321     if (nj < 3 || nj > 4)
 322     {
 323         return;
 324     }
 325
 326     /* Check if we are doing QM on this group */
 327     qm = FALSE;
 328     if (qm_grpnr != NULL)
 329     {
 330         for (j = j0; j < j1 && !qm; j++)
 331         {
 332             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 333         }
 334     }
 335     /* Cannot use solvent optimization with QM */
 336     if (qm)
 337     {
 338         return;
 339     }
 340
 341     atom = molt->atoms.atom;
 342
 343     /* Still looks like a solvent, time to check parameters */
 344
 345     /* If it is perturbed (free energy) we can't use the solvent loops,
 346      * so then we just skip to the next molecule.
 347      */
 348     perturbed = FALSE;
 349
 350     for (j = j0; j < j1 && !perturbed; j++)
 351     {
 352         perturbed = PERTURBED(atom[j]);
 353     }
 354
 355     if (perturbed)
 356     {
 357         return;
 358     }
 359
 360     /* Now it's only a question if the VdW and charge parameters
 361      * are OK. Before doing the check we compare and see if they are
 362      * identical to a possible previous solvent type.
 363      * First we assign the current types and charges.
 364      */
 365     for (j = 0; j < nj; j++)
 366     {
 367         tmp_vdwtype[j] = atom[j0+j].type;
 368         tmp_charge[j]  = atom[j0+j].q;
 369     }
 370
 371     /* Does it match any previous solvent type? */
 372     for (k = 0; k < *n_solvent_parameters; k++)
 373     {
 374         match = TRUE;
 375
 376
 377         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 378         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 379              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 380         {
 381             match = FALSE;
 382         }
 383
 384         /* Check that types & charges match for all atoms in molecule */
 385         for (j = 0; j < nj && match == TRUE; j++)
 386         {
 387             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 388             {
 389                 match = FALSE;
 390             }
 391             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 392             {
 393                 match = FALSE;
 394             }
 395         }
 396         if (match == TRUE)
 397         {
 398             /* Congratulations! We have a matched solvent.
 399              * Flag it with this type for later processing.
 400              */
 401             *cg_sp = k;
 402             solvent_parameters[k].count += nmol;
 403
 404             /* We are done with this charge group */
 405             return;
 406         }
 407     }
 408
 409     /* If we get here, we have a tentative new solvent type.
 410      * Before we add it we must check that it fulfills the requirements
 411      * of the solvent optimized loops. First determine which atoms have
 412      * VdW interactions.
 413      */
 414     for (j = 0; j < nj; j++)
 415     {
 416         has_vdw[j] = FALSE;
 417         tjA        = tmp_vdwtype[j];
 418
 419         /* Go through all other tpes and see if any have non-zero
 420          * VdW parameters when combined with this one.
 421          */
 422         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 423         {
 424             /* We already checked that the atoms weren't perturbed,
 425              * so we only need to check state A now.
 426              */
 427             if (fr->bBHAM)
 428             {
 429                 has_vdw[j] = (has_vdw[j] ||
 430                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 431                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 432                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 433             }
 434             else
 435             {
 436                 /* Standard LJ */
 437                 has_vdw[j] = (has_vdw[j] ||
 438                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 439                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 440             }
 441         }
 442     }
 443
 444     /* Now we know all we need to make the final check and assignment. */
 445     if (nj == 3)
 446     {
 447         /* So, is it an SPC?
 448          * For this we require thatn all atoms have charge,
 449          * the charges on atom 2 & 3 should be the same, and only
 450          * atom 1 might have VdW.
 451          */
 452         if (has_vdw[1] == FALSE &&
 453             has_vdw[2] == FALSE &&
 454             tmp_charge[0]  != 0 &&
 455             tmp_charge[1]  != 0 &&
 456             tmp_charge[2]  == tmp_charge[1])
 457         {
 458             srenew(solvent_parameters, *n_solvent_parameters+1);
 459             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 460             solvent_parameters[*n_solvent_parameters].count = nmol;
 461             for (k = 0; k < 3; k++)
 462             {
 463                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 464                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 465             }
 466
 467             *cg_sp = *n_solvent_parameters;
 468             (*n_solvent_parameters)++;
 469         }
 470     }
 471     else if (nj == 4)
 472     {
 473         /* Or could it be a TIP4P?
 474          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 475          * Only atom 1 mght have VdW.
 476          */
 477         if (has_vdw[1] == FALSE &&
 478             has_vdw[2] == FALSE &&
 479             has_vdw[3] == FALSE &&
 480             tmp_charge[0]  == 0 &&
 481             tmp_charge[1]  != 0 &&
 482             tmp_charge[2]  == tmp_charge[1] &&
 483             tmp_charge[3]  != 0)
 484         {
 485             srenew(solvent_parameters, *n_solvent_parameters+1);
 486             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 487             solvent_parameters[*n_solvent_parameters].count = nmol;
 488             for (k = 0; k < 4; k++)
 489             {
 490                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 491                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 492             }
 493
 494             *cg_sp = *n_solvent_parameters;
 495             (*n_solvent_parameters)++;
 496         }
 497     }
 498
 499     *solvent_parameters_p = solvent_parameters;
 500 }
 501
 502 static void
 503 check_solvent(FILE  *                fp,
 504               const gmx_mtop_t  *    mtop,
 505               t_forcerec  *          fr,
 506               cginfo_mb_t           *cginfo_mb)
 507 {
 508     const t_block     *   cgs;
 509     const t_block     *   mols;
 510     const gmx_moltype_t  *molt;
 511     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 512     int                   n_solvent_parameters;
 513     solvent_parameters_t *solvent_parameters;
 514     int                 **cg_sp;
 515     int                   bestsp, bestsol;
 516
 517     if (debug)
 518     {
 519         fprintf(debug, "Going to determine what solvent types we have.\n");
 520     }
 521
 522     mols = &mtop->mols;
 523
 524     n_solvent_parameters = 0;
 525     solvent_parameters   = NULL;
 526     /* Allocate temporary array for solvent type */
 527     snew(cg_sp, mtop->nmolblock);
 528
 529     cg_offset = 0;
 530     at_offset = 0;
 531     for (mb = 0; mb < mtop->nmolblock; mb++)
 532     {
 533         molt = &mtop->moltype[mtop->molblock[mb].type];
 534         cgs  = &molt->cgs;
 535         /* Here we have to loop over all individual molecules
 536          * because we need to check for QMMM particles.
 537          */
 538         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 539         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 540         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 541         for (mol = 0; mol < nmol_ch; mol++)
 542         {
 543             cgm = mol*cgs->nr;
 544             am  = mol*cgs->index[cgs->nr];
 545             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 546             {
 547                 check_solvent_cg(molt, cg_mol, nmol,
 548                                  mtop->groups.grpnr[egcQMMM] ?
 549                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 550                                  &mtop->groups.grps[egcQMMM],
 551                                  fr,
 552                                  &n_solvent_parameters, &solvent_parameters,
 553                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 554                                  &cg_sp[mb][cgm+cg_mol]);
 555             }
 556         }
 557         cg_offset += cgs->nr;
 558         at_offset += cgs->index[cgs->nr];
 559     }
 560
 561     /* Puh! We finished going through all charge groups.
 562      * Now find the most common solvent model.
 563      */
 564
 565     /* Most common solvent this far */
 566     bestsp = -2;
 567     for (i = 0; i < n_solvent_parameters; i++)
 568     {
 569         if (bestsp == -2 ||
 570             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 571         {
 572             bestsp = i;
 573         }
 574     }
 575
 576     if (bestsp >= 0)
 577     {
 578         bestsol = solvent_parameters[bestsp].model;
 579     }
 580     else
 581     {
 582         bestsol = esolNO;
 583     }
 584
 585 #ifdef DISABLE_WATER_NLIST
 586     bestsol = esolNO;
 587 #endif
 588
 589     fr->nWatMol = 0;
 590     for (mb = 0; mb < mtop->nmolblock; mb++)
 591     {
 592         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 593         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 594         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 595         {
 596             if (cg_sp[mb][i] == bestsp)
 597             {
 598                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 599                 fr->nWatMol += nmol;
 600             }
 601             else
 602             {
 603                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 604             }
 605         }
 606         sfree(cg_sp[mb]);
 607     }
 608     sfree(cg_sp);
 609
 610     if (bestsol != esolNO && fp != NULL)
 611     {
 612         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 613                 esol_names[bestsol],
 614                 solvent_parameters[bestsp].count);
 615     }
 616
 617     sfree(solvent_parameters);
 618     fr->solvent_opt = bestsol;
 619 }
 620
 621 enum {
 622     acNONE = 0, acCONSTRAINT, acSETTLE
 623 };
 624
 625 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 626                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 627                                    gmx_bool *bFEP_NonBonded,
 628                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 629 {
 630     const t_block        *cgs;
 631     const t_blocka       *excl;
 632     const gmx_moltype_t  *molt;
 633     const gmx_molblock_t *molb;
 634     cginfo_mb_t          *cginfo_mb;
 635     gmx_bool             *type_VDW;
 636     int                  *cginfo;
 637     int                   cg_offset, a_offset, cgm, am;
 638     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 639     int                  *a_con;
 640     int                   ftype;
 641     int                   ia;
 642     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ, bHavePerturbedAtoms;
 643
 644     ncg_tot = ncg_mtop(mtop);
 645     snew(cginfo_mb, mtop->nmolblock);
 646
 647     snew(type_VDW, fr->ntype);
 648     for (ai = 0; ai < fr->ntype; ai++)
 649     {
 650         type_VDW[ai] = FALSE;
 651         for (j = 0; j < fr->ntype; j++)
 652         {
 653             type_VDW[ai] = type_VDW[ai] ||
 654                 fr->bBHAM ||
 655                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 656                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 657         }
 658     }
 659
 660     *bFEP_NonBonded               = FALSE;
 661     *bExcl_IntraCGAll_InterCGNone = TRUE;
 662
 663     excl_nalloc = 10;
 664     snew(bExcl, excl_nalloc);
 665     cg_offset = 0;
 666     a_offset  = 0;
 667     for (mb = 0; mb < mtop->nmolblock; mb++)
 668     {
 669         molb = &mtop->molblock[mb];
 670         molt = &mtop->moltype[molb->type];
 671         cgs  = &molt->cgs;
 672         excl = &molt->excls;
 673
 674         /* Check if the cginfo is identical for all molecules in this block.
 675          * If so, we only need an array of the size of one molecule.
 676          * Otherwise we make an array of #mol times #cgs per molecule.
 677          */
 678         bId = TRUE;
 679         am  = 0;
 680         for (m = 0; m < molb->nmol; m++)
 681         {
 682             am = m*cgs->index[cgs->nr];
 683             for (cg = 0; cg < cgs->nr; cg++)
 684             {
 685                 a0 = cgs->index[cg];
 686                 a1 = cgs->index[cg+1];
 687                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 688                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 689                 {
 690                     bId = FALSE;
 691                 }
 692                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 693                 {
 694                     for (ai = a0; ai < a1; ai++)
 695                     {
 696                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 697                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 698                         {
 699                             bId = FALSE;
 700                         }
 701                     }
 702                 }
 703             }
 704         }
 705
 706         cginfo_mb[mb].cg_start = cg_offset;
 707         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 708         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 709         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 710         cginfo = cginfo_mb[mb].cginfo;
 711
 712         /* Set constraints flags for constrained atoms */
 713         snew(a_con, molt->atoms.nr);
 714         for (ftype = 0; ftype < F_NRE; ftype++)
 715         {
 716             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 717             {
 718                 int nral;
 719
 720                 nral = NRAL(ftype);
 721                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 722                 {
 723                     int a;
 724
 725                     for (a = 0; a < nral; a++)
 726                     {
 727                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 728                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 729                     }
 730                 }
 731             }
 732         }
 733
 734         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 735         {
 736             cgm = m*cgs->nr;
 737             am  = m*cgs->index[cgs->nr];
 738             for (cg = 0; cg < cgs->nr; cg++)
 739             {
 740                 a0 = cgs->index[cg];
 741                 a1 = cgs->index[cg+1];
 742
 743                 /* Store the energy group in cginfo */
 744                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 745                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 746
 747                 /* Check the intra/inter charge group exclusions */
 748                 if (a1-a0 > excl_nalloc)
 749                 {
 750                     excl_nalloc = a1 - a0;
 751                     srenew(bExcl, excl_nalloc);
 752                 }
 753                 /* bExclIntraAll: all intra cg interactions excluded
 754                  * bExclInter:    any inter cg interactions excluded
 755                  */
 756                 bExclIntraAll       = TRUE;
 757                 bExclInter          = FALSE;
 758                 bHaveVDW            = FALSE;
 759                 bHaveQ              = FALSE;
 760                 bHavePerturbedAtoms = FALSE;
 761                 for (ai = a0; ai < a1; ai++)
 762                 {
 763                     /* Check VDW and electrostatic interactions */
 764                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 765                                             type_VDW[molt->atoms.atom[ai].typeB]);
 766                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 767                                             molt->atoms.atom[ai].qB != 0);
 768
 769                     bHavePerturbedAtoms = bHavePerturbedAtoms || (PERTURBED(molt->atoms.atom[ai]) != 0);
 770
 771                     /* Clear the exclusion list for atom ai */
 772                     for (aj = a0; aj < a1; aj++)
 773                     {
 774                         bExcl[aj-a0] = FALSE;
 775                     }
 776                     /* Loop over all the exclusions of atom ai */
 777                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 778                     {
 779                         aj = excl->a[j];
 780                         if (aj < a0 || aj >= a1)
 781                         {
 782                             bExclInter = TRUE;
 783                         }
 784                         else
 785                         {
 786                             bExcl[aj-a0] = TRUE;
 787                         }
 788                     }
 789                     /* Check if ai excludes a0 to a1 */
 790                     for (aj = a0; aj < a1; aj++)
 791                     {
 792                         if (!bExcl[aj-a0])
 793                         {
 794                             bExclIntraAll = FALSE;
 795                         }
 796                     }
 797
 798                     switch (a_con[ai])
 799                     {
 800                         case acCONSTRAINT:
 801                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 802                             break;
 803                         case acSETTLE:
 804                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 805                             break;
 806                         default:
 807                             break;
 808                     }
 809                 }
 810                 if (bExclIntraAll)
 811                 {
 812                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 813                 }
 814                 if (bExclInter)
 815                 {
 816                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 817                 }
 818                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 819                 {
 820                     /* The size in cginfo is currently only read with DD */
 821                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 822                 }
 823                 if (bHaveVDW)
 824                 {
 825                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 826                 }
 827                 if (bHaveQ)
 828                 {
 829                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 830                 }
 831                 if (bHavePerturbedAtoms && fr->efep != efepNO)
 832                 {
 833                     SET_CGINFO_FEP(cginfo[cgm+cg]);
 834                     *bFEP_NonBonded = TRUE;
 835                 }
 836                 /* Store the charge group size */
 837                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 838
 839                 if (!bExclIntraAll || bExclInter)
 840                 {
 841                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 842                 }
 843             }
 844         }
 845
 846         sfree(a_con);
 847
 848         cg_offset += molb->nmol*cgs->nr;
 849         a_offset  += molb->nmol*cgs->index[cgs->nr];
 850     }
 851     sfree(bExcl);
 852
 853     /* the solvent optimizer is called after the QM is initialized,
 854      * because we don't want to have the QM subsystemto become an
 855      * optimized solvent
 856      */
 857
 858     check_solvent(fplog, mtop, fr, cginfo_mb);
 859
 860     if (getenv("GMX_NO_SOLV_OPT"))
 861     {
 862         if (fplog)
 863         {
 864             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 865                     "Disabling all solvent optimization\n");
 866         }
 867         fr->solvent_opt = esolNO;
 868     }
 869     if (bNoSolvOpt)
 870     {
 871         fr->solvent_opt = esolNO;
 872     }
 873     if (!fr->solvent_opt)
 874     {
 875         for (mb = 0; mb < mtop->nmolblock; mb++)
 876         {
 877             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 878             {
 879                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 880             }
 881         }
 882     }
 883
 884     return cginfo_mb;
 885 }
 886
 887 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 888 {
 889     int  ncg, mb, cg;
 890     int *cginfo;
 891
 892     ncg = cgi_mb[nmb-1].cg_end;
 893     snew(cginfo, ncg);
 894     mb = 0;
 895     for (cg = 0; cg < ncg; cg++)
 896     {
 897         while (cg >= cgi_mb[mb].cg_end)
 898         {
 899             mb++;
 900         }
 901         cginfo[cg] =
 902             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 903     }
 904
 905     return cginfo;
 906 }
 907
 908 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 909 {
 910     /*This now calculates sum for q and c6*/
 911     double         qsum, q2sum, q, c6sum, c6;
 912     int            mb, nmol, i;
 913     const t_atoms *atoms;
 914
 915     qsum   = 0;
 916     q2sum  = 0;
 917     c6sum  = 0;
 918     for (mb = 0; mb < mtop->nmolblock; mb++)
 919     {
 920         nmol  = mtop->molblock[mb].nmol;
 921         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 922         for (i = 0; i < atoms->nr; i++)
 923         {
 924             q       = atoms->atom[i].q;
 925             qsum   += nmol*q;
 926             q2sum  += nmol*q*q;
 927             c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 928             c6sum  += nmol*c6;
 929         }
 930     }
 931     fr->qsum[0]   = qsum;
 932     fr->q2sum[0]  = q2sum;
 933     fr->c6sum[0]  = c6sum;
 934
 935     if (fr->efep != efepNO)
 936     {
 937         qsum   = 0;
 938         q2sum  = 0;
 939         c6sum  = 0;
 940         for (mb = 0; mb < mtop->nmolblock; mb++)
 941         {
 942             nmol  = mtop->molblock[mb].nmol;
 943             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 944             for (i = 0; i < atoms->nr; i++)
 945             {
 946                 q       = atoms->atom[i].qB;
 947                 qsum   += nmol*q;
 948                 q2sum  += nmol*q*q;
 949                 c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 950                 c6sum  += nmol*c6;
 951             }
 952             fr->qsum[1]   = qsum;
 953             fr->q2sum[1]  = q2sum;
 954             fr->c6sum[1]  = c6sum;
 955         }
 956     }
 957     else
 958     {
 959         fr->qsum[1]   = fr->qsum[0];
 960         fr->q2sum[1]  = fr->q2sum[0];
 961         fr->c6sum[1]  = fr->c6sum[0];
 962     }
 963     if (log)
 964     {
 965         if (fr->efep == efepNO)
 966         {
 967             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 968         }
 969         else
 970         {
 971             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 972                     fr->qsum[0], fr->qsum[1]);
 973         }
 974     }
 975 }
 976
 977 void update_forcerec(t_forcerec *fr, matrix box)
 978 {
 979     if (fr->eeltype == eelGRF)
 980     {
 981         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 982                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 983                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 984     }
 985 }
 986
 987 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 988 {
 989     const t_atoms  *atoms, *atoms_tpi;
 990     const t_blocka *excl;
 991     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 992     gmx_int64_t     npair, npair_ij, tmpi, tmpj;
 993     double          csix, ctwelve;
 994     int             ntp, *typecount;
 995     gmx_bool        bBHAM;
 996     real           *nbfp;
 997     real           *nbfp_comb = NULL;
 998
 999     ntp   = fr->ntype;
1000     bBHAM = fr->bBHAM;
1001     nbfp  = fr->nbfp;
1002
1003     /* For LJ-PME, we want to correct for the difference between the
1004      * actual C6 values and the C6 values used by the LJ-PME based on
1005      * combination rules. */
1006
1007     if (EVDW_PME(fr->vdwtype))
1008     {
1009         nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
1010                                              (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
1011         for (tpi = 0; tpi < ntp; ++tpi)
1012         {
1013             for (tpj = 0; tpj < ntp; ++tpj)
1014             {
1015                 C6(nbfp_comb, ntp, tpi, tpj) =
1016                     C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
1017                 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
1018             }
1019         }
1020         nbfp = nbfp_comb;
1021     }
1022     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
1023     {
1024         csix    = 0;
1025         ctwelve = 0;
1026         npair   = 0;
1027         nexcl   = 0;
1028         if (!fr->n_tpi)
1029         {
1030             /* Count the types so we avoid natoms^2 operations */
1031             snew(typecount, ntp);
1032             gmx_mtop_count_atomtypes(mtop, q, typecount);
1033
1034             for (tpi = 0; tpi < ntp; tpi++)
1035             {
1036                 for (tpj = tpi; tpj < ntp; tpj++)
1037                 {
1038                     tmpi = typecount[tpi];
1039                     tmpj = typecount[tpj];
1040                     if (tpi != tpj)
1041                     {
1042                         npair_ij = tmpi*tmpj;
1043                     }
1044                     else
1045                     {
1046                         npair_ij = tmpi*(tmpi - 1)/2;
1047                     }
1048                     if (bBHAM)
1049                     {
1050                         /* nbfp now includes the 6.0 derivative prefactor */
1051                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1052                     }
1053                     else
1054                     {
1055                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
1056                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
1057                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
1058                     }
1059                     npair += npair_ij;
1060                 }
1061             }
1062             sfree(typecount);
1063             /* Subtract the excluded pairs.
1064              * The main reason for substracting exclusions is that in some cases
1065              * some combinations might never occur and the parameters could have
1066              * any value. These unused values should not influence the dispersion
1067              * correction.
1068              */
1069             for (mb = 0; mb < mtop->nmolblock; mb++)
1070             {
1071                 nmol  = mtop->molblock[mb].nmol;
1072                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1073                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
1074                 for (i = 0; (i < atoms->nr); i++)
1075                 {
1076                     if (q == 0)
1077                     {
1078                         tpi = atoms->atom[i].type;
1079                     }
1080                     else
1081                     {
1082                         tpi = atoms->atom[i].typeB;
1083                     }
1084                     j1  = excl->index[i];
1085                     j2  = excl->index[i+1];
1086                     for (j = j1; j < j2; j++)
1087                     {
1088                         k = excl->a[j];
1089                         if (k > i)
1090                         {
1091                             if (q == 0)
1092                             {
1093                                 tpj = atoms->atom[k].type;
1094                             }
1095                             else
1096                             {
1097                                 tpj = atoms->atom[k].typeB;
1098                             }
1099                             if (bBHAM)
1100                             {
1101                                 /* nbfp now includes the 6.0 derivative prefactor */
1102                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1103                             }
1104                             else
1105                             {
1106                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1107                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1108                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1109                             }
1110                             nexcl += nmol;
1111                         }
1112                     }
1113                 }
1114             }
1115         }
1116         else
1117         {
1118             /* Only correct for the interaction of the test particle
1119              * with the rest of the system.
1120              */
1121             atoms_tpi =
1122                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1123
1124             npair = 0;
1125             for (mb = 0; mb < mtop->nmolblock; mb++)
1126             {
1127                 nmol  = mtop->molblock[mb].nmol;
1128                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1129                 for (j = 0; j < atoms->nr; j++)
1130                 {
1131                     nmolc = nmol;
1132                     /* Remove the interaction of the test charge group
1133                      * with itself.
1134                      */
1135                     if (mb == mtop->nmolblock-1)
1136                     {
1137                         nmolc--;
1138
1139                         if (mb == 0 && nmol == 1)
1140                         {
1141                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1142                         }
1143                     }
1144                     if (q == 0)
1145                     {
1146                         tpj = atoms->atom[j].type;
1147                     }
1148                     else
1149                     {
1150                         tpj = atoms->atom[j].typeB;
1151                     }
1152                     for (i = 0; i < fr->n_tpi; i++)
1153                     {
1154                         if (q == 0)
1155                         {
1156                             tpi = atoms_tpi->atom[i].type;
1157                         }
1158                         else
1159                         {
1160                             tpi = atoms_tpi->atom[i].typeB;
1161                         }
1162                         if (bBHAM)
1163                         {
1164                             /* nbfp now includes the 6.0 derivative prefactor */
1165                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1166                         }
1167                         else
1168                         {
1169                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1170                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1171                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1172                         }
1173                         npair += nmolc;
1174                     }
1175                 }
1176             }
1177         }
1178         if (npair - nexcl <= 0 && fplog)
1179         {
1180             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1181             csix     = 0;
1182             ctwelve  = 0;
1183         }
1184         else
1185         {
1186             csix    /= npair - nexcl;
1187             ctwelve /= npair - nexcl;
1188         }
1189         if (debug)
1190         {
1191             fprintf(debug, "Counted %d exclusions\n", nexcl);
1192             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1193             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1194         }
1195         fr->avcsix[q]    = csix;
1196         fr->avctwelve[q] = ctwelve;
1197     }
1198
1199     if (EVDW_PME(fr->vdwtype))
1200     {
1201         sfree(nbfp_comb);
1202     }
1203
1204     if (fplog != NULL)
1205     {
1206         if (fr->eDispCorr == edispcAllEner ||
1207             fr->eDispCorr == edispcAllEnerPres)
1208         {
1209             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1210                     fr->avcsix[0], fr->avctwelve[0]);
1211         }
1212         else
1213         {
1214             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1215         }
1216     }
1217 }
1218
1219
1220 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1221                            const gmx_mtop_t *mtop)
1222 {
1223     const t_atoms *at1, *at2;
1224     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1225     real           b, bmin;
1226     real          *nbfp;
1227
1228     if (fplog)
1229     {
1230         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1231     }
1232     nbfp   = fr->nbfp;
1233     ntypes = fr->ntype;
1234
1235     bmin           = -1;
1236     fr->bham_b_max = 0;
1237     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1238     {
1239         at1 = &mtop->moltype[mt1].atoms;
1240         for (i = 0; (i < at1->nr); i++)
1241         {
1242             tpi = at1->atom[i].type;
1243             if (tpi >= ntypes)
1244             {
1245                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1246             }
1247
1248             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1249             {
1250                 at2 = &mtop->moltype[mt2].atoms;
1251                 for (j = 0; (j < at2->nr); j++)
1252                 {
1253                     tpj = at2->atom[j].type;
1254                     if (tpj >= ntypes)
1255                     {
1256                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1257                     }
1258                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1259                     if (b > fr->bham_b_max)
1260                     {
1261                         fr->bham_b_max = b;
1262                     }
1263                     if ((b < bmin) || (bmin == -1))
1264                     {
1265                         bmin = b;
1266                     }
1267                 }
1268             }
1269         }
1270     }
1271     if (fplog)
1272     {
1273         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1274                 bmin, fr->bham_b_max);
1275     }
1276 }
1277
1278 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1279                             t_forcerec *fr, real rtab,
1280                             const t_commrec *cr,
1281                             const char *tabfn, char *eg1, char *eg2,
1282                             t_nblists *nbl)
1283 {
1284     char buf[STRLEN];
1285     int  i, j;
1286
1287     if (tabfn == NULL)
1288     {
1289         if (debug)
1290         {
1291             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1292         }
1293         return;
1294     }
1295
1296     sprintf(buf, "%s", tabfn);
1297     if (eg1 && eg2)
1298     {
1299         /* Append the two energy group names */
1300         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1301                 eg1, eg2, ftp2ext(efXVG));
1302     }
1303     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1304     /* Copy the contents of the table to separate coulomb and LJ tables too,
1305      * to improve cache performance.
1306      */
1307     /* For performance reasons we want
1308      * the table data to be aligned to 16-byte. The pointers could be freed
1309      * but currently aren't.
1310      */
1311     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1312     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1313     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1314     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1315     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1316     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1317     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1318     nbl->table_elec.ninteractions = 1;
1319     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1320     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1321
1322     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1323     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1324     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1325     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1326     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1327     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1328     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1329     nbl->table_vdw.ninteractions = 2;
1330     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1331     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1332
1333     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1334     {
1335         for (j = 0; j < 4; j++)
1336         {
1337             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1338         }
1339         for (j = 0; j < 8; j++)
1340         {
1341             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1342         }
1343     }
1344 }
1345
1346 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1347                          int *ncount, int **count)
1348 {
1349     const gmx_moltype_t *molt;
1350     const t_ilist       *il;
1351     int                  mt, ftype, stride, i, j, tabnr;
1352
1353     for (mt = 0; mt < mtop->nmoltype; mt++)
1354     {
1355         molt = &mtop->moltype[mt];
1356         for (ftype = 0; ftype < F_NRE; ftype++)
1357         {
1358             if (ftype == ftype1 || ftype == ftype2)
1359             {
1360                 il     = &molt->ilist[ftype];
1361                 stride = 1 + NRAL(ftype);
1362                 for (i = 0; i < il->nr; i += stride)
1363                 {
1364                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1365                     if (tabnr < 0)
1366                     {
1367                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1368                     }
1369                     if (tabnr >= *ncount)
1370                     {
1371                         srenew(*count, tabnr+1);
1372                         for (j = *ncount; j < tabnr+1; j++)
1373                         {
1374                             (*count)[j] = 0;
1375                         }
1376                         *ncount = tabnr+1;
1377                     }
1378                     (*count)[tabnr]++;
1379                 }
1380             }
1381         }
1382     }
1383 }
1384
1385 static bondedtable_t *make_bonded_tables(FILE *fplog,
1386                                          int ftype1, int ftype2,
1387                                          const gmx_mtop_t *mtop,
1388                                          const char *basefn, const char *tabext)
1389 {
1390     int            i, ncount, *count;
1391     char           tabfn[STRLEN];
1392     bondedtable_t *tab;
1393
1394     tab = NULL;
1395
1396     ncount = 0;
1397     count  = NULL;
1398     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1399
1400     if (ncount > 0)
1401     {
1402         snew(tab, ncount);
1403         for (i = 0; i < ncount; i++)
1404         {
1405             if (count[i] > 0)
1406             {
1407                 sprintf(tabfn, "%s", basefn);
1408                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1409                         tabext, i, ftp2ext(efXVG));
1410                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1411             }
1412         }
1413         sfree(count);
1414     }
1415
1416     return tab;
1417 }
1418
1419 void forcerec_set_ranges(t_forcerec *fr,
1420                          int ncg_home, int ncg_force,
1421                          int natoms_force,
1422                          int natoms_force_constr, int natoms_f_novirsum)
1423 {
1424     fr->cg0 = 0;
1425     fr->hcg = ncg_home;
1426
1427     /* fr->ncg_force is unused in the standard code,
1428      * but it can be useful for modified code dealing with charge groups.
1429      */
1430     fr->ncg_force           = ncg_force;
1431     fr->natoms_force        = natoms_force;
1432     fr->natoms_force_constr = natoms_force_constr;
1433
1434     if (fr->natoms_force_constr > fr->nalloc_force)
1435     {
1436         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1437
1438         if (fr->bTwinRange)
1439         {
1440             srenew(fr->f_twin, fr->nalloc_force);
1441         }
1442     }
1443
1444     if (fr->bF_NoVirSum)
1445     {
1446         fr->f_novirsum_n = natoms_f_novirsum;
1447         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1448         {
1449             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1450             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1451         }
1452     }
1453     else
1454     {
1455         fr->f_novirsum_n = 0;
1456     }
1457 }
1458
1459 static real cutoff_inf(real cutoff)
1460 {
1461     if (cutoff == 0)
1462     {
1463         cutoff = GMX_CUTOFF_INF;
1464     }
1465
1466     return cutoff;
1467 }
1468
1469 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1470                                   t_forcerec *fr, const t_inputrec *ir,
1471                                   const char *tabfn, const gmx_mtop_t *mtop,
1472                                   matrix     box)
1473 {
1474     char buf[STRLEN];
1475     int  i, j;
1476
1477     if (tabfn == NULL)
1478     {
1479         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1480         return;
1481     }
1482
1483     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1484
1485     sprintf(buf, "%s", tabfn);
1486     for (i = 0; i < ir->adress->n_tf_grps; i++)
1487     {
1488         j = ir->adress->tf_table_index[i]; /* get energy group index */
1489         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1490                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1491         if (fp)
1492         {
1493             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1494         }
1495         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1496     }
1497
1498 }
1499
1500 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1501 {
1502     gmx_bool bAllvsAll;
1503
1504     bAllvsAll =
1505         (
1506             ir->rlist == 0            &&
1507             ir->rcoulomb == 0         &&
1508             ir->rvdw == 0             &&
1509             ir->ePBC == epbcNONE      &&
1510             ir->vdwtype == evdwCUT    &&
1511             ir->coulombtype == eelCUT &&
1512             ir->efep == efepNO        &&
1513             (ir->implicit_solvent == eisNO ||
1514              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1515                                                   ir->gb_algorithm == egbHCT   ||
1516                                                   ir->gb_algorithm == egbOBC))) &&
1517             getenv("GMX_NO_ALLVSALL") == NULL
1518         );
1519
1520     if (bAllvsAll && ir->opts.ngener > 1)
1521     {
1522         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1523
1524         if (bPrintNote)
1525         {
1526             if (MASTER(cr))
1527             {
1528                 fprintf(stderr, "\n%s\n", note);
1529             }
1530             if (fp != NULL)
1531             {
1532                 fprintf(fp, "\n%s\n", note);
1533             }
1534         }
1535         bAllvsAll = FALSE;
1536     }
1537
1538     if (bAllvsAll && fp && MASTER(cr))
1539     {
1540         fprintf(fp, "\nUsing SIMD all-vs-all kernels.\n\n");
1541     }
1542
1543     return bAllvsAll;
1544 }
1545
1546
1547 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1548 {
1549     int t, i;
1550
1551     /* These thread local data structures are used for bondeds only */
1552     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1553
1554     if (fr->nthreads > 1)
1555     {
1556         snew(fr->f_t, fr->nthreads);
1557         /* Thread 0 uses the global force and energy arrays */
1558         for (t = 1; t < fr->nthreads; t++)
1559         {
1560             fr->f_t[t].f        = NULL;
1561             fr->f_t[t].f_nalloc = 0;
1562             snew(fr->f_t[t].fshift, SHIFTS);
1563             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1564             for (i = 0; i < egNR; i++)
1565             {
1566                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1567             }
1568         }
1569     }
1570 }
1571
1572
1573 gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
1574                                       const t_commrec  *cr,
1575                                       const t_inputrec *ir,
1576                                       gmx_bool          bGPU)
1577 {
1578     if (!bGPU && (ir->vdwtype == evdwPME && ir->ljpme_combination_rule == eljpmeLB))
1579     {
1580         md_print_warn(cr, fplog, "LJ-PME with Lorentz-Berthelot is not supported with %s, falling back to %s\n",
1581                       bGPU ? "GPUs" : "SIMD kernels",
1582                       bGPU ? "CPU only" : "plain-C kernels");
1583         return FALSE;
1584     }
1585
1586     return TRUE;
1587 }
1588
1589
1590 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1591                                   int                         *kernel_type,
1592                                   int                         *ewald_excl)
1593 {
1594     *kernel_type = nbnxnk4x4_PlainC;
1595     *ewald_excl  = ewaldexclTable;
1596
1597 #ifdef GMX_NBNXN_SIMD
1598     {
1599 #ifdef GMX_NBNXN_SIMD_4XN
1600         *kernel_type = nbnxnk4xN_SIMD_4xN;
1601 #endif
1602 #ifdef GMX_NBNXN_SIMD_2XNN
1603         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1604 #endif
1605
1606 #if defined GMX_NBNXN_SIMD_2XNN && defined GMX_NBNXN_SIMD_4XN
1607         /* We need to choose if we want 2x(N+N) or 4xN kernels.
1608          * Currently this is based on the SIMD acceleration choice,
1609          * but it might be better to decide this at runtime based on CPU.
1610          *
1611          * 4xN calculates more (zero) interactions, but has less pair-search
1612          * work and much better kernel instruction scheduling.
1613          *
1614          * Up till now we have only seen that on Intel Sandy/Ivy Bridge,
1615          * which doesn't have FMA, both the analytical and tabulated Ewald
1616          * kernels have similar pair rates for 4x8 and 2x(4+4), so we choose
1617          * 2x(4+4) because it results in significantly fewer pairs.
1618          * For RF, the raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1619          * 10% with HT, 50% without HT. As we currently don't detect the actual
1620          * use of HT, use 4x8 to avoid a potential performance hit.
1621          * On Intel Haswell 4x8 is always faster.
1622          */
1623         *kernel_type = nbnxnk4xN_SIMD_4xN;
1624
1625 #ifndef GMX_SIMD_HAVE_FMA
1626         if (EEL_PME_EWALD(ir->coulombtype) ||
1627             EVDW_PME(ir->vdwtype))
1628         {
1629             /* We have Ewald kernels without FMA (Intel Sandy/Ivy Bridge).
1630              * There are enough instructions to make 2x(4+4) efficient.
1631              */
1632             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1633         }
1634 #endif
1635 #endif  /* GMX_NBNXN_SIMD_2XNN && GMX_NBNXN_SIMD_4XN */
1636
1637
1638         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1639         {
1640 #ifdef GMX_NBNXN_SIMD_4XN
1641             *kernel_type = nbnxnk4xN_SIMD_4xN;
1642 #else
1643             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1644 #endif
1645         }
1646         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1647         {
1648 #ifdef GMX_NBNXN_SIMD_2XNN
1649             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1650 #else
1651             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1652 #endif
1653         }
1654
1655         /* Analytical Ewald exclusion correction is only an option in
1656          * the SIMD kernel.
1657          * Since table lookup's don't parallelize with SIMD, analytical
1658          * will probably always be faster for a SIMD width of 8 or more.
1659          * With FMA analytical is sometimes faster for a width if 4 as well.
1660          * On BlueGene/Q, this is faster regardless of precision.
1661          * In single precision, this is faster on Bulldozer.
1662          */
1663 #if GMX_SIMD_REAL_WIDTH >= 8 || \
1664         (GMX_SIMD_REAL_WIDTH >= 4 && defined GMX_SIMD_HAVE_FMA && !defined GMX_DOUBLE) || \
1665         defined GMX_SIMD_IBM_QPX
1666         *ewald_excl = ewaldexclAnalytical;
1667 #endif
1668         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1669         {
1670             *ewald_excl = ewaldexclTable;
1671         }
1672         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1673         {
1674             *ewald_excl = ewaldexclAnalytical;
1675         }
1676
1677     }
1678 #endif /* GMX_NBNXN_SIMD */
1679 }
1680
1681
1682 const char *lookup_nbnxn_kernel_name(int kernel_type)
1683 {
1684     const char *returnvalue = NULL;
1685     switch (kernel_type)
1686     {
1687         case nbnxnkNotSet:
1688             returnvalue = "not set";
1689             break;
1690         case nbnxnk4x4_PlainC:
1691             returnvalue = "plain C";
1692             break;
1693         case nbnxnk4xN_SIMD_4xN:
1694         case nbnxnk4xN_SIMD_2xNN:
1695 #ifdef GMX_NBNXN_SIMD
1696 #if defined GMX_SIMD_X86_SSE2
1697             returnvalue = "SSE2";
1698 #elif defined GMX_SIMD_X86_SSE4_1
1699             returnvalue = "SSE4.1";
1700 #elif defined GMX_SIMD_X86_AVX_128_FMA
1701             returnvalue = "AVX_128_FMA";
1702 #elif defined GMX_SIMD_X86_AVX_256
1703             returnvalue = "AVX_256";
1704 #elif defined GMX_SIMD_X86_AVX2_256
1705             returnvalue = "AVX2_256";
1706 #else
1707             returnvalue = "SIMD";
1708 #endif
1709 #else  /* GMX_NBNXN_SIMD */
1710             returnvalue = "not available";
1711 #endif /* GMX_NBNXN_SIMD */
1712             break;
1713         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1714         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1715
1716         case nbnxnkNR:
1717         default:
1718             gmx_fatal(FARGS, "Illegal kernel type selected");
1719             returnvalue = NULL;
1720             break;
1721     }
1722     return returnvalue;
1723 };
1724
1725 static void pick_nbnxn_kernel(FILE                *fp,
1726                               const t_commrec     *cr,
1727                               gmx_bool             use_simd_kernels,
1728                               gmx_bool             bUseGPU,
1729                               gmx_bool             bEmulateGPU,
1730                               const t_inputrec    *ir,
1731                               int                 *kernel_type,
1732                               int                 *ewald_excl,
1733                               gmx_bool             bDoNonbonded)
1734 {
1735     assert(kernel_type);
1736
1737     *kernel_type = nbnxnkNotSet;
1738     *ewald_excl  = ewaldexclTable;
1739
1740     if (bEmulateGPU)
1741     {
1742         *kernel_type = nbnxnk8x8x8_PlainC;
1743
1744         if (bDoNonbonded)
1745         {
1746             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1747         }
1748     }
1749     else if (bUseGPU)
1750     {
1751         *kernel_type = nbnxnk8x8x8_CUDA;
1752     }
1753
1754     if (*kernel_type == nbnxnkNotSet)
1755     {
1756         /* LJ PME with LB combination rule does 7 mesh operations.
1757          * This so slow that we don't compile SIMD non-bonded kernels for that.
1758          */
1759         if (use_simd_kernels &&
1760             nbnxn_acceleration_supported(fp, cr, ir, FALSE))
1761         {
1762             pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1763         }
1764         else
1765         {
1766             *kernel_type = nbnxnk4x4_PlainC;
1767         }
1768     }
1769
1770     if (bDoNonbonded && fp != NULL)
1771     {
1772         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1773                 lookup_nbnxn_kernel_name(*kernel_type),
1774                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1775                 nbnxn_kernel_to_cj_size(*kernel_type));
1776     }
1777 }
1778
1779 static void pick_nbnxn_resources(const t_commrec     *cr,
1780                                  const gmx_hw_info_t *hwinfo,
1781                                  gmx_bool             bDoNonbonded,
1782                                  gmx_bool            *bUseGPU,
1783                                  gmx_bool            *bEmulateGPU,
1784                                  const gmx_gpu_opt_t *gpu_opt)
1785 {
1786     gmx_bool bEmulateGPUEnvVarSet;
1787     char     gpu_err_str[STRLEN];
1788
1789     *bUseGPU = FALSE;
1790
1791     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1792
1793     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1794      * GPUs (currently) only handle non-bonded calculations, we will
1795      * automatically switch to emulation if non-bonded calculations are
1796      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1797      * way to turn off GPU initialization, data movement, and cleanup.
1798      *
1799      * GPU emulation can be useful to assess the performance one can expect by
1800      * adding GPU(s) to the machine. The conditional below allows this even
1801      * if mdrun is compiled without GPU acceleration support.
1802      * Note that you should freezing the system as otherwise it will explode.
1803      */
1804     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1805                     (!bDoNonbonded &&
1806                      gpu_opt->ncuda_dev_use > 0));
1807
1808     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1809      */
1810     if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1811     {
1812         /* Each PP node will use the intra-node id-th device from the
1813          * list of detected/selected GPUs. */
1814         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1815                       &hwinfo->gpu_info, gpu_opt))
1816         {
1817             /* At this point the init should never fail as we made sure that
1818              * we have all the GPUs we need. If it still does, we'll bail. */
1819             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1820                       cr->nodeid,
1821                       get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1822                                         cr->rank_pp_intranode),
1823                       gpu_err_str);
1824         }
1825
1826         /* Here we actually turn on hardware GPU acceleration */
1827         *bUseGPU = TRUE;
1828     }
1829 }
1830
1831 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1832                             nonbonded_verlet_t *nbv,
1833                             int                 group)
1834 {
1835     gmx_bool bUsesSimpleTables = TRUE;
1836     int      grp_index;
1837
1838     switch (cutoff_scheme)
1839     {
1840         case ecutsGROUP:
1841             bUsesSimpleTables = TRUE;
1842             break;
1843         case ecutsVERLET:
1844             assert(NULL != nbv && NULL != nbv->grp);
1845             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1846             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1847             break;
1848         default:
1849             gmx_incons("unimplemented");
1850     }
1851     return bUsesSimpleTables;
1852 }
1853
1854 static void init_ewald_f_table(interaction_const_t *ic,
1855                                gmx_bool             bUsesSimpleTables,
1856                                real                 rtab)
1857 {
1858     real maxr;
1859
1860     if (bUsesSimpleTables)
1861     {
1862         /* With a spacing of 0.0005 we are at the force summation accuracy
1863          * for the SSE kernels for "normal" atomistic simulations.
1864          */
1865         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1866                                                    ic->rcoulomb);
1867
1868         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1869         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1870     }
1871     else
1872     {
1873         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1874         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1875         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1876     }
1877
1878     sfree_aligned(ic->tabq_coul_FDV0);
1879     sfree_aligned(ic->tabq_coul_F);
1880     sfree_aligned(ic->tabq_coul_V);
1881
1882     sfree_aligned(ic->tabq_vdw_FDV0);
1883     sfree_aligned(ic->tabq_vdw_F);
1884     sfree_aligned(ic->tabq_vdw_V);
1885
1886     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1887     {
1888         /* Create the original table data in FDV0 */
1889         snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1890         snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1891         snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1892         table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1893                                     ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q, v_q_ewald_lr);
1894     }
1895
1896     if (EVDW_PME(ic->vdwtype))
1897     {
1898         snew_aligned(ic->tabq_vdw_FDV0, ic->tabq_size*4, 32);
1899         snew_aligned(ic->tabq_vdw_F, ic->tabq_size, 32);
1900         snew_aligned(ic->tabq_vdw_V, ic->tabq_size, 32);
1901         table_spline3_fill_ewald_lr(ic->tabq_vdw_F, ic->tabq_vdw_V, ic->tabq_vdw_FDV0,
1902                                     ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_lj, v_lj_ewald_lr);
1903     }
1904 }
1905
1906 void init_interaction_const_tables(FILE                *fp,
1907                                    interaction_const_t *ic,
1908                                    gmx_bool             bUsesSimpleTables,
1909                                    real                 rtab)
1910 {
1911     real spacing;
1912
1913     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype) || EVDW_PME(ic->vdwtype))
1914     {
1915         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1916
1917         if (fp != NULL)
1918         {
1919             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1920                     1/ic->tabq_scale, ic->tabq_size);
1921         }
1922     }
1923 }
1924
1925 static void clear_force_switch_constants(shift_consts_t *sc)
1926 {
1927     sc->c2   = 0;
1928     sc->c3   = 0;
1929     sc->cpot = 0;
1930 }
1931
1932 static void force_switch_constants(real p,
1933                                    real rsw, real rc,
1934                                    shift_consts_t *sc)
1935 {
1936     /* Here we determine the coefficient for shifting the force to zero
1937      * between distance rsw and the cut-off rc.
1938      * For a potential of r^-p, we have force p*r^-(p+1).
1939      * But to save flops we absorb p in the coefficient.
1940      * Thus we get:
1941      * force/p   = r^-(p+1) + c2*r^2 + c3*r^3
1942      * potential = r^-p + c2/3*r^3 + c3/4*r^4 + cpot
1943      */
1944     sc->c2   =  ((p + 1)*rsw - (p + 4)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 2));
1945     sc->c3   = -((p + 1)*rsw - (p + 3)*rc)/(pow(rc, p + 2)*pow(rc - rsw, 3));
1946     sc->cpot = -pow(rc, -p) + p*sc->c2/3*pow(rc - rsw, 3) + p*sc->c3/4*pow(rc - rsw, 4);
1947 }
1948
1949 static void potential_switch_constants(real rsw, real rc,
1950                                        switch_consts_t *sc)
1951 {
1952     /* The switch function is 1 at rsw and 0 at rc.
1953      * The derivative and second derivate are zero at both ends.
1954      * rsw        = max(r - r_switch, 0)
1955      * sw         = 1 + c3*rsw^3 + c4*rsw^4 + c5*rsw^5
1956      * dsw        = 3*c3*rsw^2 + 4*c4*rsw^3 + 5*c5*rsw^4
1957      * force      = force*dsw - potential*sw
1958      * potential *= sw
1959      */
1960     sc->c3 = -10*pow(rc - rsw, -3);
1961     sc->c4 =  15*pow(rc - rsw, -4);
1962     sc->c5 =  -6*pow(rc - rsw, -5);
1963 }
1964
1965 static void
1966 init_interaction_const(FILE                       *fp,
1967                        const t_commrec gmx_unused *cr,
1968                        interaction_const_t       **interaction_const,
1969                        const t_forcerec           *fr,
1970                        real                        rtab)
1971 {
1972     interaction_const_t *ic;
1973     gmx_bool             bUsesSimpleTables = TRUE;
1974
1975     snew(ic, 1);
1976
1977     /* Just allocate something so we can free it */
1978     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1979     snew_aligned(ic->tabq_coul_F, 16, 32);
1980     snew_aligned(ic->tabq_coul_V, 16, 32);
1981
1982     ic->rlist           = fr->rlist;
1983     ic->rlistlong       = fr->rlistlong;
1984
1985     /* Lennard-Jones */
1986     ic->vdwtype         = fr->vdwtype;
1987     ic->vdw_modifier    = fr->vdw_modifier;
1988     ic->rvdw            = fr->rvdw;
1989     ic->rvdw_switch     = fr->rvdw_switch;
1990     ic->ewaldcoeff_lj   = fr->ewaldcoeff_lj;
1991     ic->ljpme_comb_rule = fr->ljpme_combination_rule;
1992     ic->sh_lj_ewald     = 0;
1993     clear_force_switch_constants(&ic->dispersion_shift);
1994     clear_force_switch_constants(&ic->repulsion_shift);
1995
1996     switch (ic->vdw_modifier)
1997     {
1998         case eintmodPOTSHIFT:
1999             /* Only shift the potential, don't touch the force */
2000             ic->dispersion_shift.cpot = -pow(ic->rvdw, -6.0);
2001             ic->repulsion_shift.cpot  = -pow(ic->rvdw, -12.0);
2002             if (EVDW_PME(ic->vdwtype))
2003             {
2004                 real crc2;
2005
2006                 crc2            = sqr(ic->ewaldcoeff_lj*ic->rvdw);
2007                 ic->sh_lj_ewald = (exp(-crc2)*(1 + crc2 + 0.5*crc2*crc2) - 1)*pow(ic->rvdw, -6.0);
2008             }
2009             break;
2010         case eintmodFORCESWITCH:
2011             /* Switch the force, switch and shift the potential */
2012             force_switch_constants(6.0, ic->rvdw_switch, ic->rvdw,
2013                                    &ic->dispersion_shift);
2014             force_switch_constants(12.0, ic->rvdw_switch, ic->rvdw,
2015                                    &ic->repulsion_shift);
2016             break;
2017         case eintmodPOTSWITCH:
2018             /* Switch the potential and force */
2019             potential_switch_constants(ic->rvdw_switch, ic->rvdw,
2020                                        &ic->vdw_switch);
2021             break;
2022         case eintmodNONE:
2023         case eintmodEXACTCUTOFF:
2024             /* Nothing to do here */
2025             break;
2026         default:
2027             gmx_incons("unimplemented potential modifier");
2028     }
2029
2030     ic->sh_invrc6 = -ic->dispersion_shift.cpot;
2031
2032     /* Electrostatics */
2033     ic->eeltype          = fr->eeltype;
2034     ic->coulomb_modifier = fr->coulomb_modifier;
2035     ic->rcoulomb         = fr->rcoulomb;
2036     ic->epsilon_r        = fr->epsilon_r;
2037     ic->epsfac           = fr->epsfac;
2038     ic->ewaldcoeff_q     = fr->ewaldcoeff_q;
2039
2040     if (fr->coulomb_modifier == eintmodPOTSHIFT)
2041     {
2042         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
2043     }
2044     else
2045     {
2046         ic->sh_ewald = 0;
2047     }
2048
2049     /* Reaction-field */
2050     if (EEL_RF(ic->eeltype))
2051     {
2052         ic->epsilon_rf = fr->epsilon_rf;
2053         ic->k_rf       = fr->k_rf;
2054         ic->c_rf       = fr->c_rf;
2055     }
2056     else
2057     {
2058         /* For plain cut-off we might use the reaction-field kernels */
2059         ic->epsilon_rf = ic->epsilon_r;
2060         ic->k_rf       = 0;
2061         if (fr->coulomb_modifier == eintmodPOTSHIFT)
2062         {
2063             ic->c_rf   = 1/ic->rcoulomb;
2064         }
2065         else
2066         {
2067             ic->c_rf   = 0;
2068         }
2069     }
2070
2071     if (fp != NULL)
2072     {
2073         real dispersion_shift;
2074
2075         dispersion_shift = ic->dispersion_shift.cpot;
2076         if (EVDW_PME(ic->vdwtype))
2077         {
2078             dispersion_shift -= ic->sh_lj_ewald;
2079         }
2080         fprintf(fp, "Potential shift: LJ r^-12: %.3e r^-6: %.3e",
2081                 ic->repulsion_shift.cpot, dispersion_shift);
2082
2083         if (ic->eeltype == eelCUT)
2084         {
2085             fprintf(fp, ", Coulomb %.e", -ic->c_rf);
2086         }
2087         else if (EEL_PME(ic->eeltype))
2088         {
2089             fprintf(fp, ", Ewald %.3e", -ic->sh_ewald);
2090         }
2091         fprintf(fp, "\n");
2092     }
2093
2094     *interaction_const = ic;
2095
2096     if (fr->nbv != NULL && fr->nbv->bUseGPU)
2097     {
2098         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
2099
2100         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
2101          * also sharing texture references. To keep the code simple, we don't
2102          * treat texture references as shared resources, but this means that
2103          * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
2104          * Hence, to ensure that the non-bonded kernels don't start before all
2105          * texture binding operations are finished, we need to wait for all ranks
2106          * to arrive here before continuing.
2107          *
2108          * Note that we could omit this barrier if GPUs are not shared (or
2109          * texture objects are used), but as this is initialization code, there
2110          * is not point in complicating things.
2111          */
2112 #ifdef GMX_THREAD_MPI
2113         if (PAR(cr))
2114         {
2115             gmx_barrier(cr);
2116         }
2117 #endif  /* GMX_THREAD_MPI */
2118     }
2119
2120     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
2121     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
2122 }
2123
2124 static void init_nb_verlet(FILE                *fp,
2125                            nonbonded_verlet_t **nb_verlet,
2126                            gmx_bool             bFEP_NonBonded,
2127                            const t_inputrec    *ir,
2128                            const t_forcerec    *fr,
2129                            const t_commrec     *cr,
2130                            const char          *nbpu_opt)
2131 {
2132     nonbonded_verlet_t *nbv;
2133     int                 i;
2134     char               *env;
2135     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
2136
2137     nbnxn_alloc_t      *nb_alloc;
2138     nbnxn_free_t       *nb_free;
2139
2140     snew(nbv, 1);
2141
2142     pick_nbnxn_resources(cr, fr->hwinfo,
2143                          fr->bNonbonded,
2144                          &nbv->bUseGPU,
2145                          &bEmulateGPU,
2146                          fr->gpu_opt);
2147
2148     nbv->nbs = NULL;
2149
2150     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
2151     for (i = 0; i < nbv->ngrp; i++)
2152     {
2153         nbv->grp[i].nbl_lists.nnbl = 0;
2154         nbv->grp[i].nbat           = NULL;
2155         nbv->grp[i].kernel_type    = nbnxnkNotSet;
2156
2157         if (i == 0) /* local */
2158         {
2159             pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2160                               nbv->bUseGPU, bEmulateGPU, ir,
2161                               &nbv->grp[i].kernel_type,
2162                               &nbv->grp[i].ewald_excl,
2163                               fr->bNonbonded);
2164         }
2165         else /* non-local */
2166         {
2167             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2168             {
2169                 /* Use GPU for local, select a CPU kernel for non-local */
2170                 pick_nbnxn_kernel(fp, cr, fr->use_simd_kernels,
2171                                   FALSE, FALSE, ir,
2172                                   &nbv->grp[i].kernel_type,
2173                                   &nbv->grp[i].ewald_excl,
2174                                   fr->bNonbonded);
2175
2176                 bHybridGPURun = TRUE;
2177             }
2178             else
2179             {
2180                 /* Use the same kernel for local and non-local interactions */
2181                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2182                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
2183             }
2184         }
2185     }
2186
2187     if (nbv->bUseGPU)
2188     {
2189         /* init the NxN GPU data; the last argument tells whether we'll have
2190          * both local and non-local NB calculation on GPU */
2191         nbnxn_cuda_init(fp, &nbv->cu_nbv,
2192                         &fr->hwinfo->gpu_info, fr->gpu_opt,
2193                         cr->rank_pp_intranode,
2194                         (nbv->ngrp > 1) && !bHybridGPURun);
2195
2196         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2197         {
2198             char *end;
2199
2200             nbv->min_ci_balanced = strtol(env, &end, 10);
2201             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2202             {
2203                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2204             }
2205
2206             if (debug)
2207             {
2208                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2209                         nbv->min_ci_balanced);
2210             }
2211         }
2212         else
2213         {
2214             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2215             if (debug)
2216             {
2217                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2218                         nbv->min_ci_balanced);
2219             }
2220         }
2221     }
2222     else
2223     {
2224         nbv->min_ci_balanced = 0;
2225     }
2226
2227     *nb_verlet = nbv;
2228
2229     nbnxn_init_search(&nbv->nbs,
2230                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2231                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2232                       bFEP_NonBonded,
2233                       gmx_omp_nthreads_get(emntNonbonded));
2234
2235     for (i = 0; i < nbv->ngrp; i++)
2236     {
2237         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2238         {
2239             nb_alloc = &pmalloc;
2240             nb_free  = &pfree;
2241         }
2242         else
2243         {
2244             nb_alloc = NULL;
2245             nb_free  = NULL;
2246         }
2247
2248         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2249                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2250                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2251                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2252                                 nb_alloc, nb_free);
2253
2254         if (i == 0 ||
2255             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2256         {
2257             gmx_bool bSimpleList;
2258             int      enbnxninitcombrule;
2259
2260             bSimpleList = nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type);
2261
2262             if (bSimpleList && (fr->vdwtype == evdwCUT && (fr->vdw_modifier == eintmodNONE || fr->vdw_modifier == eintmodPOTSHIFT)))
2263             {
2264                 /* Plain LJ cut-off: we can optimize with combination rules */
2265                 enbnxninitcombrule = enbnxninitcombruleDETECT;
2266             }
2267             else if (fr->vdwtype == evdwPME)
2268             {
2269                 /* LJ-PME: we need to use a combination rule for the grid */
2270                 if (fr->ljpme_combination_rule == eljpmeGEOM)
2271                 {
2272                     enbnxninitcombrule = enbnxninitcombruleGEOM;
2273                 }
2274                 else
2275                 {
2276                     enbnxninitcombrule = enbnxninitcombruleLB;
2277                 }
2278             }
2279             else
2280             {
2281                 /* We use a full combination matrix: no rule required */
2282                 enbnxninitcombrule = enbnxninitcombruleNONE;
2283             }
2284
2285
2286             snew(nbv->grp[i].nbat, 1);
2287             nbnxn_atomdata_init(fp,
2288                                 nbv->grp[i].nbat,
2289                                 nbv->grp[i].kernel_type,
2290                                 enbnxninitcombrule,
2291                                 fr->ntype, fr->nbfp,
2292                                 ir->opts.ngener,
2293                                 bSimpleList ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2294                                 nb_alloc, nb_free);
2295         }
2296         else
2297         {
2298             nbv->grp[i].nbat = nbv->grp[0].nbat;
2299         }
2300     }
2301 }
2302
2303 void init_forcerec(FILE              *fp,
2304                    const output_env_t oenv,
2305                    t_forcerec        *fr,
2306                    t_fcdata          *fcd,
2307                    const t_inputrec  *ir,
2308                    const gmx_mtop_t  *mtop,
2309                    const t_commrec   *cr,
2310                    matrix             box,
2311                    const char        *tabfn,
2312                    const char        *tabafn,
2313                    const char        *tabpfn,
2314                    const char        *tabbfn,
2315                    const char        *nbpu_opt,
2316                    gmx_bool           bNoSolvOpt,
2317                    real               print_force)
2318 {
2319     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2320     real           rtab;
2321     char          *env;
2322     double         dbl;
2323     const t_block *cgs;
2324     gmx_bool       bGenericKernelOnly;
2325     gmx_bool       bMakeTables, bMakeSeparate14Table, bSomeNormalNbListsAreInUse;
2326     gmx_bool       bFEP_NonBonded;
2327     t_nblists     *nbl;
2328     int           *nm_ind, egp_flags;
2329
2330     if (fr->hwinfo == NULL)
2331     {
2332         /* Detect hardware, gather information.
2333          * In mdrun, hwinfo has already been set before calling init_forcerec.
2334          * Here we ignore GPUs, as tools will not use them anyhow.
2335          */
2336         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2337     }
2338
2339     /* By default we turn SIMD kernels on, but it might be turned off further down... */
2340     fr->use_simd_kernels = TRUE;
2341
2342     fr->bDomDec = DOMAINDECOMP(cr);
2343
2344     natoms = mtop->natoms;
2345
2346     if (check_box(ir->ePBC, box))
2347     {
2348         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2349     }
2350
2351     /* Test particle insertion ? */
2352     if (EI_TPI(ir->eI))
2353     {
2354         /* Set to the size of the molecule to be inserted (the last one) */
2355         /* Because of old style topologies, we have to use the last cg
2356          * instead of the last molecule type.
2357          */
2358         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2359         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2360         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2361         {
2362             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2363         }
2364     }
2365     else
2366     {
2367         fr->n_tpi = 0;
2368     }
2369
2370     /* Copy AdResS parameters */
2371     if (ir->bAdress)
2372     {
2373         fr->adress_type           = ir->adress->type;
2374         fr->adress_const_wf       = ir->adress->const_wf;
2375         fr->adress_ex_width       = ir->adress->ex_width;
2376         fr->adress_hy_width       = ir->adress->hy_width;
2377         fr->adress_icor           = ir->adress->icor;
2378         fr->adress_site           = ir->adress->site;
2379         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2380         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2381
2382
2383         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2384         for (i = 0; i < ir->adress->n_energy_grps; i++)
2385         {
2386             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2387         }
2388
2389         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2390         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2391         for (i = 0; i < fr->n_adress_tf_grps; i++)
2392         {
2393             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2394         }
2395         copy_rvec(ir->adress->refs, fr->adress_refs);
2396     }
2397     else
2398     {
2399         fr->adress_type           = eAdressOff;
2400         fr->adress_do_hybridpairs = FALSE;
2401     }
2402
2403     /* Copy the user determined parameters */
2404     fr->userint1  = ir->userint1;
2405     fr->userint2  = ir->userint2;
2406     fr->userint3  = ir->userint3;
2407     fr->userint4  = ir->userint4;
2408     fr->userreal1 = ir->userreal1;
2409     fr->userreal2 = ir->userreal2;
2410     fr->userreal3 = ir->userreal3;
2411     fr->userreal4 = ir->userreal4;
2412
2413     /* Shell stuff */
2414     fr->fc_stepsize = ir->fc_stepsize;
2415
2416     /* Free energy */
2417     fr->efep        = ir->efep;
2418     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2419     if (ir->fepvals->bScCoul)
2420     {
2421         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2422         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2423     }
2424     else
2425     {
2426         fr->sc_alphacoul  = 0;
2427         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2428     }
2429     fr->sc_power      = ir->fepvals->sc_power;
2430     fr->sc_r_power    = ir->fepvals->sc_r_power;
2431     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2432
2433     env = getenv("GMX_SCSIGMA_MIN");
2434     if (env != NULL)
2435     {
2436         dbl = 0;
2437         sscanf(env, "%lf", &dbl);
2438         fr->sc_sigma6_min = pow(dbl, 6);
2439         if (fp)
2440         {
2441             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2442         }
2443     }
2444
2445     fr->bNonbonded = TRUE;
2446     if (getenv("GMX_NO_NONBONDED") != NULL)
2447     {
2448         /* turn off non-bonded calculations */
2449         fr->bNonbonded = FALSE;
2450         md_print_warn(cr, fp,
2451                       "Found environment variable GMX_NO_NONBONDED.\n"
2452                       "Disabling nonbonded calculations.\n");
2453     }
2454
2455     bGenericKernelOnly = FALSE;
2456
2457     /* We now check in the NS code whether a particular combination of interactions
2458      * can be used with water optimization, and disable it if that is not the case.
2459      */
2460
2461     if (getenv("GMX_NB_GENERIC") != NULL)
2462     {
2463         if (fp != NULL)
2464         {
2465             fprintf(fp,
2466                     "Found environment variable GMX_NB_GENERIC.\n"
2467                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2468                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2469         }
2470         bGenericKernelOnly = TRUE;
2471     }
2472
2473     if (bGenericKernelOnly == TRUE)
2474     {
2475         bNoSolvOpt         = TRUE;
2476     }
2477
2478     if ( (getenv("GMX_DISABLE_SIMD_KERNELS") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2479     {
2480         fr->use_simd_kernels = FALSE;
2481         if (fp != NULL)
2482         {
2483             fprintf(fp,
2484                     "\nFound environment variable GMX_DISABLE_SIMD_KERNELS.\n"
2485                     "Disabling the usage of any SIMD-specific kernel routines (e.g. SSE2/SSE4.1/AVX).\n\n");
2486         }
2487     }
2488
2489     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2490
2491     /* Check if we can/should do all-vs-all kernels */
2492     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
2493     fr->AllvsAll_work   = NULL;
2494     fr->AllvsAll_workgb = NULL;
2495
2496     /* All-vs-all kernels have not been implemented in 4.6, and
2497      * the SIMD group kernels are also buggy in this case. Non-SIMD
2498      * group kernels are OK. See Redmine #1249. */
2499     if (fr->bAllvsAll)
2500     {
2501         fr->bAllvsAll            = FALSE;
2502         fr->use_simd_kernels     = FALSE;
2503         if (fp != NULL)
2504         {
2505             fprintf(fp,
2506                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2507                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2508                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2509                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2510                     "we are proceeding by disabling all CPU architecture-specific\n"
2511                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2512                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2513         }
2514     }
2515
2516     /* Neighbour searching stuff */
2517     fr->cutoff_scheme = ir->cutoff_scheme;
2518     fr->bGrid         = (ir->ns_type == ensGRID);
2519     fr->ePBC          = ir->ePBC;
2520
2521     if (fr->cutoff_scheme == ecutsGROUP)
2522     {
2523         const char *note = "NOTE: This file uses the deprecated 'group' cutoff_scheme. This will be\n"
2524             "removed in a future release when 'verlet' supports all interaction forms.\n";
2525
2526         if (MASTER(cr))
2527         {
2528             fprintf(stderr, "\n%s\n", note);
2529         }
2530         if (fp != NULL)
2531         {
2532             fprintf(fp, "\n%s\n", note);
2533         }
2534     }
2535
2536     /* Determine if we will do PBC for distances in bonded interactions */
2537     if (fr->ePBC == epbcNONE)
2538     {
2539         fr->bMolPBC = FALSE;
2540     }
2541     else
2542     {
2543         if (!DOMAINDECOMP(cr))
2544         {
2545             /* The group cut-off scheme and SHAKE assume charge groups
2546              * are whole, but not using molpbc is faster in most cases.
2547              */
2548             if (fr->cutoff_scheme == ecutsGROUP ||
2549                 (ir->eConstrAlg == econtSHAKE &&
2550                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2551                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2552             {
2553                 fr->bMolPBC = ir->bPeriodicMols;
2554             }
2555             else
2556             {
2557                 fr->bMolPBC = TRUE;
2558                 if (getenv("GMX_USE_GRAPH") != NULL)
2559                 {
2560                     fr->bMolPBC = FALSE;
2561                     if (fp)
2562                     {
2563                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2564                     }
2565                 }
2566             }
2567         }
2568         else
2569         {
2570             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2571         }
2572     }
2573     fr->bGB = (ir->implicit_solvent == eisGBSA);
2574
2575     fr->rc_scaling = ir->refcoord_scaling;
2576     copy_rvec(ir->posres_com, fr->posres_com);
2577     copy_rvec(ir->posres_comB, fr->posres_comB);
2578     fr->rlist                    = cutoff_inf(ir->rlist);
2579     fr->rlistlong                = cutoff_inf(ir->rlistlong);
2580     fr->eeltype                  = ir->coulombtype;
2581     fr->vdwtype                  = ir->vdwtype;
2582     fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
2583
2584     fr->coulomb_modifier = ir->coulomb_modifier;
2585     fr->vdw_modifier     = ir->vdw_modifier;
2586
2587     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2588     switch (fr->eeltype)
2589     {
2590         case eelCUT:
2591             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2592             break;
2593
2594         case eelRF:
2595         case eelGRF:
2596         case eelRF_NEC:
2597             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2598             break;
2599
2600         case eelRF_ZERO:
2601             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2602             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2603             break;
2604
2605         case eelSWITCH:
2606         case eelSHIFT:
2607         case eelUSER:
2608         case eelENCADSHIFT:
2609         case eelPMESWITCH:
2610         case eelPMEUSER:
2611         case eelPMEUSERSWITCH:
2612             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2613             break;
2614
2615         case eelPME:
2616         case eelEWALD:
2617             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2618             break;
2619
2620         default:
2621             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2622             break;
2623     }
2624
2625     /* Vdw: Translate from mdp settings to kernel format */
2626     switch (fr->vdwtype)
2627     {
2628         case evdwCUT:
2629             if (fr->bBHAM)
2630             {
2631                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2632             }
2633             else
2634             {
2635                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2636             }
2637             break;
2638         case evdwPME:
2639             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LJEWALD;
2640             break;
2641
2642         case evdwSWITCH:
2643         case evdwSHIFT:
2644         case evdwUSER:
2645         case evdwENCADSHIFT:
2646             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2647             break;
2648
2649         default:
2650             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2651             break;
2652     }
2653
2654     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2655     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2656     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2657
2658     fr->bTwinRange = fr->rlistlong > fr->rlist;
2659     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2660
2661     fr->reppow     = mtop->ffparams.reppow;
2662
2663     if (ir->cutoff_scheme == ecutsGROUP)
2664     {
2665         fr->bvdwtab    = ((fr->vdwtype != evdwCUT || !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2666                           && !EVDW_PME(fr->vdwtype));
2667         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2668         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2669                            fr->eeltype == eelEWALD ||
2670                            fr->eeltype == eelPME ||
2671                            fr->eeltype == eelRF ||
2672                            fr->eeltype == eelRF_ZERO);
2673
2674         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2675          * going to be faster to tabulate the interaction than calling the generic kernel.
2676          */
2677         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2678         {
2679             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2680             {
2681                 fr->bcoultab = TRUE;
2682             }
2683         }
2684         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2685                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2686                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2687                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2688         {
2689             if (fr->rcoulomb != fr->rvdw)
2690             {
2691                 fr->bcoultab = TRUE;
2692             }
2693         }
2694
2695         if (getenv("GMX_REQUIRE_TABLES"))
2696         {
2697             fr->bvdwtab  = TRUE;
2698             fr->bcoultab = TRUE;
2699         }
2700
2701         if (fp)
2702         {
2703             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2704             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2705         }
2706
2707         if (fr->bvdwtab == TRUE)
2708         {
2709             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2710             fr->nbkernel_vdw_modifier    = eintmodNONE;
2711         }
2712         if (fr->bcoultab == TRUE)
2713         {
2714             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2715             fr->nbkernel_elec_modifier    = eintmodNONE;
2716         }
2717     }
2718
2719     if (ir->cutoff_scheme == ecutsVERLET)
2720     {
2721         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2722         {
2723             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2724         }
2725         fr->bvdwtab  = FALSE;
2726         fr->bcoultab = FALSE;
2727     }
2728
2729     /* Tables are used for direct ewald sum */
2730     if (fr->bEwald)
2731     {
2732         if (EEL_PME(ir->coulombtype))
2733         {
2734             if (fp)
2735             {
2736                 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2737             }
2738             if (ir->coulombtype == eelP3M_AD)
2739             {
2740                 please_cite(fp, "Hockney1988");
2741                 please_cite(fp, "Ballenegger2012");
2742             }
2743             else
2744             {
2745                 please_cite(fp, "Essmann95a");
2746             }
2747
2748             if (ir->ewald_geometry == eewg3DC)
2749             {
2750                 if (fp)
2751                 {
2752                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2753                 }
2754                 please_cite(fp, "In-Chul99a");
2755             }
2756         }
2757         fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2758         init_ewald_tab(&(fr->ewald_table), ir, fp);
2759         if (fp)
2760         {
2761             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2762                     1/fr->ewaldcoeff_q);
2763         }
2764     }
2765
2766     if (EVDW_PME(ir->vdwtype))
2767     {
2768         if (fp)
2769         {
2770             fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2771         }
2772         please_cite(fp, "Essmann95a");
2773         fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2774         if (fp)
2775         {
2776             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2777                     1/fr->ewaldcoeff_lj);
2778         }
2779     }
2780
2781     /* Electrostatics */
2782     fr->epsilon_r       = ir->epsilon_r;
2783     fr->epsilon_rf      = ir->epsilon_rf;
2784     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2785     fr->rcoulomb_switch = ir->rcoulomb_switch;
2786     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2787
2788     /* Parameters for generalized RF */
2789     fr->zsquare = 0.0;
2790     fr->temp    = 0.0;
2791
2792     if (fr->eeltype == eelGRF)
2793     {
2794         init_generalized_rf(fp, mtop, ir, fr);
2795     }
2796
2797     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2798                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2799                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2800                        IR_ELEC_FIELD(*ir) ||
2801                        (fr->adress_icor != eAdressICOff)
2802                        );
2803
2804     if (fr->cutoff_scheme == ecutsGROUP &&
2805         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2806     {
2807         /* Count the total number of charge groups */
2808         fr->cg_nalloc = ncg_mtop(mtop);
2809         srenew(fr->cg_cm, fr->cg_nalloc);
2810     }
2811     if (fr->shift_vec == NULL)
2812     {
2813         snew(fr->shift_vec, SHIFTS);
2814     }
2815
2816     if (fr->fshift == NULL)
2817     {
2818         snew(fr->fshift, SHIFTS);
2819     }
2820
2821     if (fr->nbfp == NULL)
2822     {
2823         fr->ntype = mtop->ffparams.atnr;
2824         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2825         if (EVDW_PME(fr->vdwtype))
2826         {
2827             fr->ljpme_c6grid  = make_ljpme_c6grid(&mtop->ffparams, fr);
2828         }
2829     }
2830
2831     /* Copy the energy group exclusions */
2832     fr->egp_flags = ir->opts.egp_flags;
2833
2834     /* Van der Waals stuff */
2835     fr->rvdw        = cutoff_inf(ir->rvdw);
2836     fr->rvdw_switch = ir->rvdw_switch;
2837     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2838     {
2839         if (fr->rvdw_switch >= fr->rvdw)
2840         {
2841             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2842                       fr->rvdw_switch, fr->rvdw);
2843         }
2844         if (fp)
2845         {
2846             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2847                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2848                     fr->rvdw_switch, fr->rvdw);
2849         }
2850     }
2851
2852     if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2853     {
2854         gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2855     }
2856
2857     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2858     {
2859         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2860     }
2861
2862     if (fp)
2863     {
2864         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2865                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2866     }
2867
2868     fr->eDispCorr = ir->eDispCorr;
2869     if (ir->eDispCorr != edispcNO)
2870     {
2871         set_avcsixtwelve(fp, fr, mtop);
2872     }
2873
2874     if (fr->bBHAM)
2875     {
2876         set_bham_b_max(fp, fr, mtop);
2877     }
2878
2879     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2880
2881     /* Copy the GBSA data (radius, volume and surftens for each
2882      * atomtype) from the topology atomtype section to forcerec.
2883      */
2884     snew(fr->atype_radius, fr->ntype);
2885     snew(fr->atype_vol, fr->ntype);
2886     snew(fr->atype_surftens, fr->ntype);
2887     snew(fr->atype_gb_radius, fr->ntype);
2888     snew(fr->atype_S_hct, fr->ntype);
2889
2890     if (mtop->atomtypes.nr > 0)
2891     {
2892         for (i = 0; i < fr->ntype; i++)
2893         {
2894             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2895         }
2896         for (i = 0; i < fr->ntype; i++)
2897         {
2898             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2899         }
2900         for (i = 0; i < fr->ntype; i++)
2901         {
2902             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2903         }
2904         for (i = 0; i < fr->ntype; i++)
2905         {
2906             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2907         }
2908         for (i = 0; i < fr->ntype; i++)
2909         {
2910             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2911         }
2912     }
2913
2914     /* Generate the GB table if needed */
2915     if (fr->bGB)
2916     {
2917 #ifdef GMX_DOUBLE
2918         fr->gbtabscale = 2000;
2919 #else
2920         fr->gbtabscale = 500;
2921 #endif
2922
2923         fr->gbtabr = 100;
2924         fr->gbtab  = make_gb_table(oenv, fr);
2925
2926         init_gb(&fr->born, fr, ir, mtop, ir->gb_algorithm);
2927
2928         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2929         if (!DOMAINDECOMP(cr))
2930         {
2931             make_local_gb(cr, fr->born, ir->gb_algorithm);
2932         }
2933     }
2934
2935     /* Set the charge scaling */
2936     if (fr->epsilon_r != 0)
2937     {
2938         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2939     }
2940     else
2941     {
2942         /* eps = 0 is infinite dieletric: no coulomb interactions */
2943         fr->epsfac = 0;
2944     }
2945
2946     /* Reaction field constants */
2947     if (EEL_RF(fr->eeltype))
2948     {
2949         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2950                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2951                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2952     }
2953
2954     /*This now calculates sum for q and c6*/
2955     set_chargesum(fp, fr, mtop);
2956
2957     /* if we are using LR electrostatics, and they are tabulated,
2958      * the tables will contain modified coulomb interactions.
2959      * Since we want to use the non-shifted ones for 1-4
2960      * coulombic interactions, we must have an extra set of tables.
2961      */
2962
2963     /* Construct tables.
2964      * A little unnecessary to make both vdw and coul tables sometimes,
2965      * but what the heck... */
2966
2967     bMakeTables = fr->bcoultab || fr->bvdwtab || fr->bEwald ||
2968         (ir->eDispCorr != edispcNO && ir_vdw_switched(ir));
2969
2970     bMakeSeparate14Table = ((!bMakeTables || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2971                              fr->bBHAM || fr->bEwald) &&
2972                             (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2973                              gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2974                              gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2975
2976     negp_pp   = ir->opts.ngener - ir->nwall;
2977     negptable = 0;
2978     if (!bMakeTables)
2979     {
2980         bSomeNormalNbListsAreInUse = TRUE;
2981         fr->nnblists               = 1;
2982     }
2983     else
2984     {
2985         bSomeNormalNbListsAreInUse = (ir->eDispCorr != edispcNO);
2986         for (egi = 0; egi < negp_pp; egi++)
2987         {
2988             for (egj = egi; egj < negp_pp; egj++)
2989             {
2990                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2991                 if (!(egp_flags & EGP_EXCL))
2992                 {
2993                     if (egp_flags & EGP_TABLE)
2994                     {
2995                         negptable++;
2996                     }
2997                     else
2998                     {
2999                         bSomeNormalNbListsAreInUse = TRUE;
3000                     }
3001                 }
3002             }
3003         }
3004         if (bSomeNormalNbListsAreInUse)
3005         {
3006             fr->nnblists = negptable + 1;
3007         }
3008         else
3009         {
3010             fr->nnblists = negptable;
3011         }
3012         if (fr->nnblists > 1)
3013         {
3014             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
3015         }
3016     }
3017
3018     if (ir->adress)
3019     {
3020         fr->nnblists *= 2;
3021     }
3022
3023     snew(fr->nblists, fr->nnblists);
3024
3025     /* This code automatically gives table length tabext without cut-off's,
3026      * in that case grompp should already have checked that we do not need
3027      * normal tables and we only generate tables for 1-4 interactions.
3028      */
3029     rtab = ir->rlistlong + ir->tabext;
3030
3031     if (bMakeTables)
3032     {
3033         /* make tables for ordinary interactions */
3034         if (bSomeNormalNbListsAreInUse)
3035         {
3036             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
3037             if (ir->adress)
3038             {
3039                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
3040             }
3041             if (!bMakeSeparate14Table)
3042             {
3043                 fr->tab14 = fr->nblists[0].table_elec_vdw;
3044             }
3045             m = 1;
3046         }
3047         else
3048         {
3049             m = 0;
3050         }
3051         if (negptable > 0)
3052         {
3053             /* Read the special tables for certain energy group pairs */
3054             nm_ind = mtop->groups.grps[egcENER].nm_ind;
3055             for (egi = 0; egi < negp_pp; egi++)
3056             {
3057                 for (egj = egi; egj < negp_pp; egj++)
3058                 {
3059                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
3060                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
3061                     {
3062                         nbl = &(fr->nblists[m]);
3063                         if (fr->nnblists > 1)
3064                         {
3065                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
3066                         }
3067                         /* Read the table file with the two energy groups names appended */
3068                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3069                                         *mtop->groups.grpname[nm_ind[egi]],
3070                                         *mtop->groups.grpname[nm_ind[egj]],
3071                                         &fr->nblists[m]);
3072                         if (ir->adress)
3073                         {
3074                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
3075                                             *mtop->groups.grpname[nm_ind[egi]],
3076                                             *mtop->groups.grpname[nm_ind[egj]],
3077                                             &fr->nblists[fr->nnblists/2+m]);
3078                         }
3079                         m++;
3080                     }
3081                     else if (fr->nnblists > 1)
3082                     {
3083                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
3084                     }
3085                 }
3086             }
3087         }
3088     }
3089     if (bMakeSeparate14Table)
3090     {
3091         /* generate extra tables with plain Coulomb for 1-4 interactions only */
3092         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
3093                                 GMX_MAKETABLES_14ONLY);
3094     }
3095
3096     /* Read AdResS Thermo Force table if needed */
3097     if (fr->adress_icor == eAdressICThermoForce)
3098     {
3099         /* old todo replace */
3100
3101         if (ir->adress->n_tf_grps > 0)
3102         {
3103             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
3104
3105         }
3106         else
3107         {
3108             /* load the default table */
3109             snew(fr->atf_tabs, 1);
3110             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
3111         }
3112     }
3113
3114     /* Wall stuff */
3115     fr->nwall = ir->nwall;
3116     if (ir->nwall && ir->wall_type == ewtTABLE)
3117     {
3118         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
3119     }
3120
3121     if (fcd && tabbfn)
3122     {
3123         fcd->bondtab  = make_bonded_tables(fp,
3124                                            F_TABBONDS, F_TABBONDSNC,
3125                                            mtop, tabbfn, "b");
3126         fcd->angletab = make_bonded_tables(fp,
3127                                            F_TABANGLES, -1,
3128                                            mtop, tabbfn, "a");
3129         fcd->dihtab   = make_bonded_tables(fp,
3130                                            F_TABDIHS, -1,
3131                                            mtop, tabbfn, "d");
3132     }
3133     else
3134     {
3135         if (debug)
3136         {
3137             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
3138         }
3139     }
3140
3141     /* QM/MM initialization if requested
3142      */
3143     if (ir->bQMMM)
3144     {
3145         fprintf(stderr, "QM/MM calculation requested.\n");
3146     }
3147
3148     fr->bQMMM      = ir->bQMMM;
3149     fr->qr         = mk_QMMMrec();
3150
3151     /* Set all the static charge group info */
3152     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
3153                                    &bFEP_NonBonded,
3154                                    &fr->bExcl_IntraCGAll_InterCGNone);
3155     if (DOMAINDECOMP(cr))
3156     {
3157         fr->cginfo = NULL;
3158     }
3159     else
3160     {
3161         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
3162     }
3163
3164     if (!DOMAINDECOMP(cr))
3165     {
3166         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
3167                             mtop->natoms, mtop->natoms, mtop->natoms);
3168     }
3169
3170     fr->print_force = print_force;
3171
3172
3173     /* coarse load balancing vars */
3174     fr->t_fnbf    = 0.;
3175     fr->t_wait    = 0.;
3176     fr->timesteps = 0;
3177
3178     /* Initialize neighbor search */
3179     init_ns(fp, cr, &fr->ns, fr, mtop);
3180
3181     if (cr->duty & DUTY_PP)
3182     {
3183         gmx_nonbonded_setup(fr, bGenericKernelOnly);
3184         /*
3185            if (ir->bAdress)
3186             {
3187                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
3188             }
3189          */
3190     }
3191
3192     /* Initialize the thread working data for bonded interactions */
3193     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
3194
3195     snew(fr->excl_load, fr->nthreads+1);
3196
3197     if (fr->cutoff_scheme == ecutsVERLET)
3198     {
3199         if (ir->rcoulomb != ir->rvdw)
3200         {
3201             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
3202         }
3203
3204         init_nb_verlet(fp, &fr->nbv, bFEP_NonBonded, ir, fr, cr, nbpu_opt);
3205     }
3206
3207     /* fr->ic is used both by verlet and group kernels (to some extent) now */
3208     init_interaction_const(fp, cr, &fr->ic, fr, rtab);
3209
3210     if (ir->eDispCorr != edispcNO)
3211     {
3212         calc_enervirdiff(fp, ir->eDispCorr, fr);
3213     }
3214 }
3215
3216 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
3217 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
3218 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3219
3220 void pr_forcerec(FILE *fp, t_forcerec *fr)
3221 {
3222     int i;
3223
3224     pr_real(fp, fr->rlist);
3225     pr_real(fp, fr->rcoulomb);
3226     pr_real(fp, fr->fudgeQQ);
3227     pr_bool(fp, fr->bGrid);
3228     pr_bool(fp, fr->bTwinRange);
3229     /*pr_int(fp,fr->cg0);
3230        pr_int(fp,fr->hcg);*/
3231     for (i = 0; i < fr->nnblists; i++)
3232     {
3233         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3234     }
3235     pr_real(fp, fr->rcoulomb_switch);
3236     pr_real(fp, fr->rcoulomb);
3237
3238     fflush(fp);
3239 }
3240
3241 void forcerec_set_excl_load(t_forcerec           *fr,
3242                             const gmx_localtop_t *top)
3243 {
3244     const int *ind, *a;
3245     int        t, i, j, ntot, n, ntarget;
3246
3247     ind = top->excls.index;
3248     a   = top->excls.a;
3249
3250     ntot = 0;
3251     for (i = 0; i < top->excls.nr; i++)
3252     {
3253         for (j = ind[i]; j < ind[i+1]; j++)
3254         {
3255             if (a[j] > i)
3256             {
3257                 ntot++;
3258             }
3259         }
3260     }
3261
3262     fr->excl_load[0] = 0;
3263     n                = 0;
3264     i                = 0;
3265     for (t = 1; t <= fr->nthreads; t++)
3266     {
3267         ntarget = (ntot*t)/fr->nthreads;
3268         while (i < top->excls.nr && n < ntarget)
3269         {
3270             for (j = ind[i]; j < ind[i+1]; j++)
3271             {
3272                 if (a[j] > i)
3273                 {
3274                     n++;
3275                 }
3276             }
3277             i++;
3278         }
3279         fr->excl_load[t] = i;
3280     }
3281 }