src/gromacs/mdlib/forcerec.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "typedefs.h"
  46 #include "vec.h"
  47 #include "gromacs/math/utilities.h"
  48 #include "macros.h"
  49 #include "smalloc.h"
  50 #include "macros.h"
  51 #include "gmx_fatal.h"
  52 #include "gmx_fatal_collective.h"
  53 #include "physics.h"
  54 #include "force.h"
  55 #include "tables.h"
  56 #include "nonbonded.h"
  57 #include "invblock.h"
  58 #include "names.h"
  59 #include "network.h"
  60 #include "pbc.h"
  61 #include "ns.h"
  62 #include "mshift.h"
  63 #include "txtdump.h"
  64 #include "coulomb.h"
  65 #include "md_support.h"
  66 #include "md_logging.h"
  67 #include "domdec.h"
  68 #include "partdec.h"
  69 #include "qmmm.h"
  70 #include "copyrite.h"
  71 #include "mtop_util.h"
  72 #include "nbnxn_search.h"
  73 #include "nbnxn_atomdata.h"
  74 #include "nbnxn_consts.h"
  75 #include "gmx_omp_nthreads.h"
  76 #include "gmx_detect_hardware.h"
  77
  78 #ifdef _MSC_VER
  79 /* MSVC definition for __cpuid() */
  80 #include <intrin.h>
  81 #endif
  82
  83 #include "types/nbnxn_cuda_types_ext.h"
  84 #include "gpu_utils.h"
  85 #include "nbnxn_cuda_data_mgmt.h"
  86 #include "pmalloc_cuda.h"
  87
  88 t_forcerec *mk_forcerec(void)
  89 {
  90     t_forcerec *fr;
  91
  92     snew(fr, 1);
  93
  94     return fr;
  95 }
  96
  97 #ifdef DEBUG
  98 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
  99 {
 100     int i, j;
 101
 102     for (i = 0; (i < atnr); i++)
 103     {
 104         for (j = 0; (j < atnr); j++)
 105         {
 106             fprintf(fp, "%2d - %2d", i, j);
 107             if (bBHAM)
 108             {
 109                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 110                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 111             }
 112             else
 113             {
 114                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 115                         C12(nbfp, atnr, i, j)/12.0);
 116             }
 117         }
 118     }
 119 }
 120 #endif
 121
 122 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 123 {
 124     real *nbfp;
 125     int   i, j, k, atnr;
 126
 127     atnr = idef->atnr;
 128     if (bBHAM)
 129     {
 130         snew(nbfp, 3*atnr*atnr);
 131         for (i = k = 0; (i < atnr); i++)
 132         {
 133             for (j = 0; (j < atnr); j++, k++)
 134             {
 135                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 136                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 137                 /* nbfp now includes the 6.0 derivative prefactor */
 138                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 139             }
 140         }
 141     }
 142     else
 143     {
 144         snew(nbfp, 2*atnr*atnr);
 145         for (i = k = 0; (i < atnr); i++)
 146         {
 147             for (j = 0; (j < atnr); j++, k++)
 148             {
 149                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 150                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 151                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 152             }
 153         }
 154     }
 155
 156     return nbfp;
 157 }
 158
 159 static real *mk_nbfp_combination_rule(const gmx_ffparams_t *idef, int comb_rule)
 160 {
 161     real *nbfp;
 162     int   i, j, k, atnr;
 163     real  c6i, c6j, c12i, c12j, epsi, epsj, sigmai, sigmaj;
 164     real  c6, c12;
 165
 166     atnr = idef->atnr;
 167     snew(nbfp, 2*atnr*atnr);
 168     for (i = 0; i < atnr; ++i)
 169     {
 170         for (j = 0; j < atnr; ++j)
 171         {
 172             c6i  = idef->iparams[i*(atnr+1)].lj.c6;
 173             c12i = idef->iparams[i*(atnr+1)].lj.c12;
 174             c6j  = idef->iparams[j*(atnr+1)].lj.c6;
 175             c12j = idef->iparams[j*(atnr+1)].lj.c12;
 176             c6   = sqrt(c6i  * c6j);
 177             c12  = sqrt(c12i * c12j);
 178             if (comb_rule == eCOMB_ARITHMETIC
 179                 && !gmx_numzero(c6) && !gmx_numzero(c12))
 180             {
 181                 sigmai = pow(c12i / c6i, 1.0/6.0);
 182                 sigmaj = pow(c12j / c6j, 1.0/6.0);
 183                 epsi   = c6i * c6i / c12i;
 184                 epsj   = c6j * c6j / c12j;
 185                 c6     = epsi * epsj * pow(0.5*(sigmai+sigmaj), 6);
 186                 c12    = epsi * epsj * pow(0.5*(sigmai+sigmaj), 12);
 187             }
 188             C6(nbfp, atnr, i, j)   = c6*6.0;
 189             C12(nbfp, atnr, i, j)  = c12*12.0;
 190         }
 191     }
 192     return nbfp;
 193 }
 194
 195 /* This routine sets fr->solvent_opt to the most common solvent in the
 196  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 197  * the fr->solvent_type array with the correct type (or esolNO).
 198  *
 199  * Charge groups that fulfill the conditions but are not identical to the
 200  * most common one will be marked as esolNO in the solvent_type array.
 201  *
 202  * TIP3p is identical to SPC for these purposes, so we call it
 203  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 204  *
 205  * NOTE: QM particle should not
 206  * become an optimized solvent. Not even if there is only one charge
 207  * group in the Qm
 208  */
 209
 210 typedef struct
 211 {
 212     int    model;
 213     int    count;
 214     int    vdwtype[4];
 215     real   charge[4];
 216 } solvent_parameters_t;
 217
 218 static void
 219 check_solvent_cg(const gmx_moltype_t    *molt,
 220                  int                     cg0,
 221                  int                     nmol,
 222                  const unsigned char    *qm_grpnr,
 223                  const t_grps           *qm_grps,
 224                  t_forcerec   *          fr,
 225                  int                    *n_solvent_parameters,
 226                  solvent_parameters_t  **solvent_parameters_p,
 227                  int                     cginfo,
 228                  int                    *cg_sp)
 229 {
 230     const t_blocka     *  excl;
 231     t_atom               *atom;
 232     int                   j, k;
 233     int                   j0, j1, nj;
 234     gmx_bool              perturbed;
 235     gmx_bool              has_vdw[4];
 236     gmx_bool              match;
 237     real                  tmp_charge[4];
 238     int                   tmp_vdwtype[4];
 239     int                   tjA;
 240     gmx_bool              qm;
 241     solvent_parameters_t *solvent_parameters;
 242
 243     /* We use a list with parameters for each solvent type.
 244      * Every time we discover a new molecule that fulfills the basic
 245      * conditions for a solvent we compare with the previous entries
 246      * in these lists. If the parameters are the same we just increment
 247      * the counter for that type, and otherwise we create a new type
 248      * based on the current molecule.
 249      *
 250      * Once we've finished going through all molecules we check which
 251      * solvent is most common, and mark all those molecules while we
 252      * clear the flag on all others.
 253      */
 254
 255     solvent_parameters = *solvent_parameters_p;
 256
 257     /* Mark the cg first as non optimized */
 258     *cg_sp = -1;
 259
 260     /* Check if this cg has no exclusions with atoms in other charge groups
 261      * and all atoms inside the charge group excluded.
 262      * We only have 3 or 4 atom solvent loops.
 263      */
 264     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 265         !GET_CGINFO_EXCL_INTRA(cginfo))
 266     {
 267         return;
 268     }
 269
 270     /* Get the indices of the first atom in this charge group */
 271     j0     = molt->cgs.index[cg0];
 272     j1     = molt->cgs.index[cg0+1];
 273
 274     /* Number of atoms in our molecule */
 275     nj     = j1 - j0;
 276
 277     if (debug)
 278     {
 279         fprintf(debug,
 280                 "Moltype '%s': there are %d atoms in this charge group\n",
 281                 *molt->name, nj);
 282     }
 283
 284     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 285      * otherwise skip it.
 286      */
 287     if (nj < 3 || nj > 4)
 288     {
 289         return;
 290     }
 291
 292     /* Check if we are doing QM on this group */
 293     qm = FALSE;
 294     if (qm_grpnr != NULL)
 295     {
 296         for (j = j0; j < j1 && !qm; j++)
 297         {
 298             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 299         }
 300     }
 301     /* Cannot use solvent optimization with QM */
 302     if (qm)
 303     {
 304         return;
 305     }
 306
 307     atom = molt->atoms.atom;
 308
 309     /* Still looks like a solvent, time to check parameters */
 310
 311     /* If it is perturbed (free energy) we can't use the solvent loops,
 312      * so then we just skip to the next molecule.
 313      */
 314     perturbed = FALSE;
 315
 316     for (j = j0; j < j1 && !perturbed; j++)
 317     {
 318         perturbed = PERTURBED(atom[j]);
 319     }
 320
 321     if (perturbed)
 322     {
 323         return;
 324     }
 325
 326     /* Now it's only a question if the VdW and charge parameters
 327      * are OK. Before doing the check we compare and see if they are
 328      * identical to a possible previous solvent type.
 329      * First we assign the current types and charges.
 330      */
 331     for (j = 0; j < nj; j++)
 332     {
 333         tmp_vdwtype[j] = atom[j0+j].type;
 334         tmp_charge[j]  = atom[j0+j].q;
 335     }
 336
 337     /* Does it match any previous solvent type? */
 338     for (k = 0; k < *n_solvent_parameters; k++)
 339     {
 340         match = TRUE;
 341
 342
 343         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 344         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 345              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 346         {
 347             match = FALSE;
 348         }
 349
 350         /* Check that types & charges match for all atoms in molecule */
 351         for (j = 0; j < nj && match == TRUE; j++)
 352         {
 353             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 354             {
 355                 match = FALSE;
 356             }
 357             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 358             {
 359                 match = FALSE;
 360             }
 361         }
 362         if (match == TRUE)
 363         {
 364             /* Congratulations! We have a matched solvent.
 365              * Flag it with this type for later processing.
 366              */
 367             *cg_sp = k;
 368             solvent_parameters[k].count += nmol;
 369
 370             /* We are done with this charge group */
 371             return;
 372         }
 373     }
 374
 375     /* If we get here, we have a tentative new solvent type.
 376      * Before we add it we must check that it fulfills the requirements
 377      * of the solvent optimized loops. First determine which atoms have
 378      * VdW interactions.
 379      */
 380     for (j = 0; j < nj; j++)
 381     {
 382         has_vdw[j] = FALSE;
 383         tjA        = tmp_vdwtype[j];
 384
 385         /* Go through all other tpes and see if any have non-zero
 386          * VdW parameters when combined with this one.
 387          */
 388         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 389         {
 390             /* We already checked that the atoms weren't perturbed,
 391              * so we only need to check state A now.
 392              */
 393             if (fr->bBHAM)
 394             {
 395                 has_vdw[j] = (has_vdw[j] ||
 396                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 397                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 398                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 399             }
 400             else
 401             {
 402                 /* Standard LJ */
 403                 has_vdw[j] = (has_vdw[j] ||
 404                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 405                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 406             }
 407         }
 408     }
 409
 410     /* Now we know all we need to make the final check and assignment. */
 411     if (nj == 3)
 412     {
 413         /* So, is it an SPC?
 414          * For this we require thatn all atoms have charge,
 415          * the charges on atom 2 & 3 should be the same, and only
 416          * atom 1 might have VdW.
 417          */
 418         if (has_vdw[1] == FALSE &&
 419             has_vdw[2] == FALSE &&
 420             tmp_charge[0]  != 0 &&
 421             tmp_charge[1]  != 0 &&
 422             tmp_charge[2]  == tmp_charge[1])
 423         {
 424             srenew(solvent_parameters, *n_solvent_parameters+1);
 425             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 426             solvent_parameters[*n_solvent_parameters].count = nmol;
 427             for (k = 0; k < 3; k++)
 428             {
 429                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 430                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 431             }
 432
 433             *cg_sp = *n_solvent_parameters;
 434             (*n_solvent_parameters)++;
 435         }
 436     }
 437     else if (nj == 4)
 438     {
 439         /* Or could it be a TIP4P?
 440          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 441          * Only atom 1 mght have VdW.
 442          */
 443         if (has_vdw[1] == FALSE &&
 444             has_vdw[2] == FALSE &&
 445             has_vdw[3] == FALSE &&
 446             tmp_charge[0]  == 0 &&
 447             tmp_charge[1]  != 0 &&
 448             tmp_charge[2]  == tmp_charge[1] &&
 449             tmp_charge[3]  != 0)
 450         {
 451             srenew(solvent_parameters, *n_solvent_parameters+1);
 452             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 453             solvent_parameters[*n_solvent_parameters].count = nmol;
 454             for (k = 0; k < 4; k++)
 455             {
 456                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 457                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 458             }
 459
 460             *cg_sp = *n_solvent_parameters;
 461             (*n_solvent_parameters)++;
 462         }
 463     }
 464
 465     *solvent_parameters_p = solvent_parameters;
 466 }
 467
 468 static void
 469 check_solvent(FILE  *                fp,
 470               const gmx_mtop_t  *    mtop,
 471               t_forcerec  *          fr,
 472               cginfo_mb_t           *cginfo_mb)
 473 {
 474     const t_block     *   cgs;
 475     const t_block     *   mols;
 476     const gmx_moltype_t  *molt;
 477     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 478     int                   n_solvent_parameters;
 479     solvent_parameters_t *solvent_parameters;
 480     int                 **cg_sp;
 481     int                   bestsp, bestsol;
 482
 483     if (debug)
 484     {
 485         fprintf(debug, "Going to determine what solvent types we have.\n");
 486     }
 487
 488     mols = &mtop->mols;
 489
 490     n_solvent_parameters = 0;
 491     solvent_parameters   = NULL;
 492     /* Allocate temporary array for solvent type */
 493     snew(cg_sp, mtop->nmolblock);
 494
 495     cg_offset = 0;
 496     at_offset = 0;
 497     for (mb = 0; mb < mtop->nmolblock; mb++)
 498     {
 499         molt = &mtop->moltype[mtop->molblock[mb].type];
 500         cgs  = &molt->cgs;
 501         /* Here we have to loop over all individual molecules
 502          * because we need to check for QMMM particles.
 503          */
 504         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 505         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 506         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 507         for (mol = 0; mol < nmol_ch; mol++)
 508         {
 509             cgm = mol*cgs->nr;
 510             am  = mol*cgs->index[cgs->nr];
 511             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 512             {
 513                 check_solvent_cg(molt, cg_mol, nmol,
 514                                  mtop->groups.grpnr[egcQMMM] ?
 515                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 516                                  &mtop->groups.grps[egcQMMM],
 517                                  fr,
 518                                  &n_solvent_parameters, &solvent_parameters,
 519                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 520                                  &cg_sp[mb][cgm+cg_mol]);
 521             }
 522         }
 523         cg_offset += cgs->nr;
 524         at_offset += cgs->index[cgs->nr];
 525     }
 526
 527     /* Puh! We finished going through all charge groups.
 528      * Now find the most common solvent model.
 529      */
 530
 531     /* Most common solvent this far */
 532     bestsp = -2;
 533     for (i = 0; i < n_solvent_parameters; i++)
 534     {
 535         if (bestsp == -2 ||
 536             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 537         {
 538             bestsp = i;
 539         }
 540     }
 541
 542     if (bestsp >= 0)
 543     {
 544         bestsol = solvent_parameters[bestsp].model;
 545     }
 546     else
 547     {
 548         bestsol = esolNO;
 549     }
 550
 551 #ifdef DISABLE_WATER_NLIST
 552     bestsol = esolNO;
 553 #endif
 554
 555     fr->nWatMol = 0;
 556     for (mb = 0; mb < mtop->nmolblock; mb++)
 557     {
 558         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 559         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 560         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 561         {
 562             if (cg_sp[mb][i] == bestsp)
 563             {
 564                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 565                 fr->nWatMol += nmol;
 566             }
 567             else
 568             {
 569                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 570             }
 571         }
 572         sfree(cg_sp[mb]);
 573     }
 574     sfree(cg_sp);
 575
 576     if (bestsol != esolNO && fp != NULL)
 577     {
 578         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 579                 esol_names[bestsol],
 580                 solvent_parameters[bestsp].count);
 581     }
 582
 583     sfree(solvent_parameters);
 584     fr->solvent_opt = bestsol;
 585 }
 586
 587 enum {
 588     acNONE = 0, acCONSTRAINT, acSETTLE
 589 };
 590
 591 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 592                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 593                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 594 {
 595     const t_block        *cgs;
 596     const t_blocka       *excl;
 597     const gmx_moltype_t  *molt;
 598     const gmx_molblock_t *molb;
 599     cginfo_mb_t          *cginfo_mb;
 600     gmx_bool             *type_VDW;
 601     int                  *cginfo;
 602     int                   cg_offset, a_offset, cgm, am;
 603     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 604     int                  *a_con;
 605     int                   ftype;
 606     int                   ia;
 607     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
 608
 609     ncg_tot = ncg_mtop(mtop);
 610     snew(cginfo_mb, mtop->nmolblock);
 611
 612     snew(type_VDW, fr->ntype);
 613     for (ai = 0; ai < fr->ntype; ai++)
 614     {
 615         type_VDW[ai] = FALSE;
 616         for (j = 0; j < fr->ntype; j++)
 617         {
 618             type_VDW[ai] = type_VDW[ai] ||
 619                 fr->bBHAM ||
 620                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 621                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 622         }
 623     }
 624
 625     *bExcl_IntraCGAll_InterCGNone = TRUE;
 626
 627     excl_nalloc = 10;
 628     snew(bExcl, excl_nalloc);
 629     cg_offset = 0;
 630     a_offset  = 0;
 631     for (mb = 0; mb < mtop->nmolblock; mb++)
 632     {
 633         molb = &mtop->molblock[mb];
 634         molt = &mtop->moltype[molb->type];
 635         cgs  = &molt->cgs;
 636         excl = &molt->excls;
 637
 638         /* Check if the cginfo is identical for all molecules in this block.
 639          * If so, we only need an array of the size of one molecule.
 640          * Otherwise we make an array of #mol times #cgs per molecule.
 641          */
 642         bId = TRUE;
 643         am  = 0;
 644         for (m = 0; m < molb->nmol; m++)
 645         {
 646             am = m*cgs->index[cgs->nr];
 647             for (cg = 0; cg < cgs->nr; cg++)
 648             {
 649                 a0 = cgs->index[cg];
 650                 a1 = cgs->index[cg+1];
 651                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 652                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 653                 {
 654                     bId = FALSE;
 655                 }
 656                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 657                 {
 658                     for (ai = a0; ai < a1; ai++)
 659                     {
 660                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 661                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 662                         {
 663                             bId = FALSE;
 664                         }
 665                     }
 666                 }
 667             }
 668         }
 669
 670         cginfo_mb[mb].cg_start = cg_offset;
 671         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 672         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 673         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 674         cginfo = cginfo_mb[mb].cginfo;
 675
 676         /* Set constraints flags for constrained atoms */
 677         snew(a_con, molt->atoms.nr);
 678         for (ftype = 0; ftype < F_NRE; ftype++)
 679         {
 680             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 681             {
 682                 int nral;
 683
 684                 nral = NRAL(ftype);
 685                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 686                 {
 687                     int a;
 688
 689                     for (a = 0; a < nral; a++)
 690                     {
 691                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 692                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 693                     }
 694                 }
 695             }
 696         }
 697
 698         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 699         {
 700             cgm = m*cgs->nr;
 701             am  = m*cgs->index[cgs->nr];
 702             for (cg = 0; cg < cgs->nr; cg++)
 703             {
 704                 a0 = cgs->index[cg];
 705                 a1 = cgs->index[cg+1];
 706
 707                 /* Store the energy group in cginfo */
 708                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 709                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 710
 711                 /* Check the intra/inter charge group exclusions */
 712                 if (a1-a0 > excl_nalloc)
 713                 {
 714                     excl_nalloc = a1 - a0;
 715                     srenew(bExcl, excl_nalloc);
 716                 }
 717                 /* bExclIntraAll: all intra cg interactions excluded
 718                  * bExclInter:    any inter cg interactions excluded
 719                  */
 720                 bExclIntraAll = TRUE;
 721                 bExclInter    = FALSE;
 722                 bHaveVDW      = FALSE;
 723                 bHaveQ        = FALSE;
 724                 for (ai = a0; ai < a1; ai++)
 725                 {
 726                     /* Check VDW and electrostatic interactions */
 727                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 728                                             type_VDW[molt->atoms.atom[ai].typeB]);
 729                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 730                                             molt->atoms.atom[ai].qB != 0);
 731
 732                     /* Clear the exclusion list for atom ai */
 733                     for (aj = a0; aj < a1; aj++)
 734                     {
 735                         bExcl[aj-a0] = FALSE;
 736                     }
 737                     /* Loop over all the exclusions of atom ai */
 738                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 739                     {
 740                         aj = excl->a[j];
 741                         if (aj < a0 || aj >= a1)
 742                         {
 743                             bExclInter = TRUE;
 744                         }
 745                         else
 746                         {
 747                             bExcl[aj-a0] = TRUE;
 748                         }
 749                     }
 750                     /* Check if ai excludes a0 to a1 */
 751                     for (aj = a0; aj < a1; aj++)
 752                     {
 753                         if (!bExcl[aj-a0])
 754                         {
 755                             bExclIntraAll = FALSE;
 756                         }
 757                     }
 758
 759                     switch (a_con[ai])
 760                     {
 761                         case acCONSTRAINT:
 762                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 763                             break;
 764                         case acSETTLE:
 765                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 766                             break;
 767                         default:
 768                             break;
 769                     }
 770                 }
 771                 if (bExclIntraAll)
 772                 {
 773                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 774                 }
 775                 if (bExclInter)
 776                 {
 777                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 778                 }
 779                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 780                 {
 781                     /* The size in cginfo is currently only read with DD */
 782                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 783                 }
 784                 if (bHaveVDW)
 785                 {
 786                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 787                 }
 788                 if (bHaveQ)
 789                 {
 790                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 791                 }
 792                 /* Store the charge group size */
 793                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 794
 795                 if (!bExclIntraAll || bExclInter)
 796                 {
 797                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 798                 }
 799             }
 800         }
 801
 802         sfree(a_con);
 803
 804         cg_offset += molb->nmol*cgs->nr;
 805         a_offset  += molb->nmol*cgs->index[cgs->nr];
 806     }
 807     sfree(bExcl);
 808
 809     /* the solvent optimizer is called after the QM is initialized,
 810      * because we don't want to have the QM subsystemto become an
 811      * optimized solvent
 812      */
 813
 814     check_solvent(fplog, mtop, fr, cginfo_mb);
 815
 816     if (getenv("GMX_NO_SOLV_OPT"))
 817     {
 818         if (fplog)
 819         {
 820             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 821                     "Disabling all solvent optimization\n");
 822         }
 823         fr->solvent_opt = esolNO;
 824     }
 825     if (bNoSolvOpt)
 826     {
 827         fr->solvent_opt = esolNO;
 828     }
 829     if (!fr->solvent_opt)
 830     {
 831         for (mb = 0; mb < mtop->nmolblock; mb++)
 832         {
 833             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 834             {
 835                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 836             }
 837         }
 838     }
 839
 840     return cginfo_mb;
 841 }
 842
 843 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 844 {
 845     int  ncg, mb, cg;
 846     int *cginfo;
 847
 848     ncg = cgi_mb[nmb-1].cg_end;
 849     snew(cginfo, ncg);
 850     mb = 0;
 851     for (cg = 0; cg < ncg; cg++)
 852     {
 853         while (cg >= cgi_mb[mb].cg_end)
 854         {
 855             mb++;
 856         }
 857         cginfo[cg] =
 858             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 859     }
 860
 861     return cginfo;
 862 }
 863
 864 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 865 {
 866     /*This now calculates sum for q and c6*/
 867     double         qsum, q2sum, q, c6sum, c6, sqrc6;
 868     int            mb, nmol, i;
 869     const t_atoms *atoms;
 870
 871     qsum   = 0;
 872     q2sum  = 0;
 873     c6sum  = 0;
 874     for (mb = 0; mb < mtop->nmolblock; mb++)
 875     {
 876         nmol  = mtop->molblock[mb].nmol;
 877         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 878         for (i = 0; i < atoms->nr; i++)
 879         {
 880             q       = atoms->atom[i].q;
 881             qsum   += nmol*q;
 882             q2sum  += nmol*q*q;
 883             c6      = mtop->ffparams.iparams[atoms->atom[i].type*(mtop->ffparams.atnr+1)].lj.c6;
 884             sqrc6   = sqrt(c6);
 885             c6sum  += nmol*sqrc6*sqrc6;
 886         }
 887     }
 888     fr->qsum[0]   = qsum;
 889     fr->q2sum[0]  = q2sum;
 890     fr->c6sum[0]  = c6sum/12;
 891
 892     if (fr->efep != efepNO)
 893     {
 894         qsum   = 0;
 895         q2sum  = 0;
 896         c6sum  = 0;
 897         for (mb = 0; mb < mtop->nmolblock; mb++)
 898         {
 899             nmol  = mtop->molblock[mb].nmol;
 900             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 901             for (i = 0; i < atoms->nr; i++)
 902             {
 903                 q       = atoms->atom[i].qB;
 904                 qsum   += nmol*q;
 905                 q2sum  += nmol*q*q;
 906                 c6      = mtop->ffparams.iparams[atoms->atom[i].typeB*(mtop->ffparams.atnr+1)].lj.c6;
 907                 sqrc6   = sqrt(c6);
 908                 c6sum  += nmol*sqrc6*sqrc6;
 909             }
 910             fr->qsum[1]   = qsum;
 911             fr->q2sum[1]  = q2sum;
 912             fr->c6sum[1]  = c6sum/12;
 913         }
 914     }
 915     else
 916     {
 917         fr->qsum[1]   = fr->qsum[0];
 918         fr->q2sum[1]  = fr->q2sum[0];
 919         fr->c6sum[1]  = fr->c6sum[0];
 920     }
 921     if (log)
 922     {
 923         if (fr->efep == efepNO)
 924         {
 925             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 926         }
 927         else
 928         {
 929             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 930                     fr->qsum[0], fr->qsum[1]);
 931         }
 932     }
 933 }
 934
 935 void update_forcerec(t_forcerec *fr, matrix box)
 936 {
 937     if (fr->eeltype == eelGRF)
 938     {
 939         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 940                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 941                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 942     }
 943 }
 944
 945 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 946 {
 947     const t_atoms  *atoms, *atoms_tpi;
 948     const t_blocka *excl;
 949     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 950 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 951     long long int   npair, npair_ij, tmpi, tmpj;
 952 #else
 953     double          npair, npair_ij, tmpi, tmpj;
 954 #endif
 955     double          csix, ctwelve;
 956     int             ntp, *typecount;
 957     gmx_bool        bBHAM;
 958     real           *nbfp;
 959     real           *nbfp_comb = NULL;
 960
 961     ntp   = fr->ntype;
 962     bBHAM = fr->bBHAM;
 963     nbfp  = fr->nbfp;
 964
 965     /* For LJ-PME, we want to correct for the difference between the
 966      * actual C6 values and the C6 values used by the LJ-PME based on
 967      * combination rules. */
 968
 969     if (EVDW_PME(fr->vdwtype))
 970     {
 971         nbfp_comb = mk_nbfp_combination_rule(&mtop->ffparams,
 972                                              (fr->ljpme_combination_rule == eljpmeLB) ? eCOMB_ARITHMETIC : eCOMB_GEOMETRIC);
 973         for (tpi = 0; tpi < ntp; ++tpi)
 974         {
 975             for (tpj = 0; tpj < ntp; ++tpj)
 976             {
 977                 C6(nbfp_comb, ntp, tpi, tpj) =
 978                     C6(nbfp, ntp, tpi, tpj) - C6(nbfp_comb, ntp, tpi, tpj);
 979                 C12(nbfp_comb, ntp, tpi, tpj) = C12(nbfp, ntp, tpi, tpj);
 980             }
 981         }
 982         nbfp = nbfp_comb;
 983     }
 984     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 985     {
 986         csix    = 0;
 987         ctwelve = 0;
 988         npair   = 0;
 989         nexcl   = 0;
 990         if (!fr->n_tpi)
 991         {
 992             /* Count the types so we avoid natoms^2 operations */
 993             snew(typecount, ntp);
 994             gmx_mtop_count_atomtypes(mtop, q, typecount);
 995
 996             for (tpi = 0; tpi < ntp; tpi++)
 997             {
 998                 for (tpj = tpi; tpj < ntp; tpj++)
 999                 {
1000                     tmpi = typecount[tpi];
1001                     tmpj = typecount[tpj];
1002                     if (tpi != tpj)
1003                     {
1004                         npair_ij = tmpi*tmpj;
1005                     }
1006                     else
1007                     {
1008                         npair_ij = tmpi*(tmpi - 1)/2;
1009                     }
1010                     if (bBHAM)
1011                     {
1012                         /* nbfp now includes the 6.0 derivative prefactor */
1013                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1014                     }
1015                     else
1016                     {
1017                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
1018                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
1019                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
1020                     }
1021                     npair += npair_ij;
1022                 }
1023             }
1024             sfree(typecount);
1025             /* Subtract the excluded pairs.
1026              * The main reason for substracting exclusions is that in some cases
1027              * some combinations might never occur and the parameters could have
1028              * any value. These unused values should not influence the dispersion
1029              * correction.
1030              */
1031             for (mb = 0; mb < mtop->nmolblock; mb++)
1032             {
1033                 nmol  = mtop->molblock[mb].nmol;
1034                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1035                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
1036                 for (i = 0; (i < atoms->nr); i++)
1037                 {
1038                     if (q == 0)
1039                     {
1040                         tpi = atoms->atom[i].type;
1041                     }
1042                     else
1043                     {
1044                         tpi = atoms->atom[i].typeB;
1045                     }
1046                     j1  = excl->index[i];
1047                     j2  = excl->index[i+1];
1048                     for (j = j1; j < j2; j++)
1049                     {
1050                         k = excl->a[j];
1051                         if (k > i)
1052                         {
1053                             if (q == 0)
1054                             {
1055                                 tpj = atoms->atom[k].type;
1056                             }
1057                             else
1058                             {
1059                                 tpj = atoms->atom[k].typeB;
1060                             }
1061                             if (bBHAM)
1062                             {
1063                                 /* nbfp now includes the 6.0 derivative prefactor */
1064                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1065                             }
1066                             else
1067                             {
1068                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1069                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1070                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1071                             }
1072                             nexcl += nmol;
1073                         }
1074                     }
1075                 }
1076             }
1077         }
1078         else
1079         {
1080             /* Only correct for the interaction of the test particle
1081              * with the rest of the system.
1082              */
1083             atoms_tpi =
1084                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1085
1086             npair = 0;
1087             for (mb = 0; mb < mtop->nmolblock; mb++)
1088             {
1089                 nmol  = mtop->molblock[mb].nmol;
1090                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1091                 for (j = 0; j < atoms->nr; j++)
1092                 {
1093                     nmolc = nmol;
1094                     /* Remove the interaction of the test charge group
1095                      * with itself.
1096                      */
1097                     if (mb == mtop->nmolblock-1)
1098                     {
1099                         nmolc--;
1100
1101                         if (mb == 0 && nmol == 1)
1102                         {
1103                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1104                         }
1105                     }
1106                     if (q == 0)
1107                     {
1108                         tpj = atoms->atom[j].type;
1109                     }
1110                     else
1111                     {
1112                         tpj = atoms->atom[j].typeB;
1113                     }
1114                     for (i = 0; i < fr->n_tpi; i++)
1115                     {
1116                         if (q == 0)
1117                         {
1118                             tpi = atoms_tpi->atom[i].type;
1119                         }
1120                         else
1121                         {
1122                             tpi = atoms_tpi->atom[i].typeB;
1123                         }
1124                         if (bBHAM)
1125                         {
1126                             /* nbfp now includes the 6.0 derivative prefactor */
1127                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1128                         }
1129                         else
1130                         {
1131                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1132                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1133                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1134                         }
1135                         npair += nmolc;
1136                     }
1137                 }
1138             }
1139         }
1140         if (npair - nexcl <= 0 && fplog)
1141         {
1142             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1143             csix     = 0;
1144             ctwelve  = 0;
1145         }
1146         else
1147         {
1148             csix    /= npair - nexcl;
1149             ctwelve /= npair - nexcl;
1150         }
1151         if (debug)
1152         {
1153             fprintf(debug, "Counted %d exclusions\n", nexcl);
1154             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1155             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1156         }
1157         fr->avcsix[q]    = csix;
1158         fr->avctwelve[q] = ctwelve;
1159     }
1160
1161     if (EVDW_PME(fr->vdwtype))
1162     {
1163         sfree(nbfp_comb);
1164     }
1165
1166     if (fplog != NULL)
1167     {
1168         if (fr->eDispCorr == edispcAllEner ||
1169             fr->eDispCorr == edispcAllEnerPres)
1170         {
1171             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1172                     fr->avcsix[0], fr->avctwelve[0]);
1173         }
1174         else
1175         {
1176             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1177         }
1178     }
1179 }
1180
1181
1182 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1183                            const gmx_mtop_t *mtop)
1184 {
1185     const t_atoms *at1, *at2;
1186     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1187     real           b, bmin;
1188     real          *nbfp;
1189
1190     if (fplog)
1191     {
1192         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1193     }
1194     nbfp   = fr->nbfp;
1195     ntypes = fr->ntype;
1196
1197     bmin           = -1;
1198     fr->bham_b_max = 0;
1199     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1200     {
1201         at1 = &mtop->moltype[mt1].atoms;
1202         for (i = 0; (i < at1->nr); i++)
1203         {
1204             tpi = at1->atom[i].type;
1205             if (tpi >= ntypes)
1206             {
1207                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1208             }
1209
1210             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1211             {
1212                 at2 = &mtop->moltype[mt2].atoms;
1213                 for (j = 0; (j < at2->nr); j++)
1214                 {
1215                     tpj = at2->atom[j].type;
1216                     if (tpj >= ntypes)
1217                     {
1218                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1219                     }
1220                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1221                     if (b > fr->bham_b_max)
1222                     {
1223                         fr->bham_b_max = b;
1224                     }
1225                     if ((b < bmin) || (bmin == -1))
1226                     {
1227                         bmin = b;
1228                     }
1229                 }
1230             }
1231         }
1232     }
1233     if (fplog)
1234     {
1235         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1236                 bmin, fr->bham_b_max);
1237     }
1238 }
1239
1240 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1241                             t_forcerec *fr, real rtab,
1242                             const t_commrec *cr,
1243                             const char *tabfn, char *eg1, char *eg2,
1244                             t_nblists *nbl)
1245 {
1246     char buf[STRLEN];
1247     int  i, j;
1248
1249     if (tabfn == NULL)
1250     {
1251         if (debug)
1252         {
1253             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1254         }
1255         return;
1256     }
1257
1258     sprintf(buf, "%s", tabfn);
1259     if (eg1 && eg2)
1260     {
1261         /* Append the two energy group names */
1262         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1263                 eg1, eg2, ftp2ext(efXVG));
1264     }
1265     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1266     /* Copy the contents of the table to separate coulomb and LJ tables too,
1267      * to improve cache performance.
1268      */
1269     /* For performance reasons we want
1270      * the table data to be aligned to 16-byte. The pointers could be freed
1271      * but currently aren't.
1272      */
1273     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1274     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1275     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1276     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1277     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1278     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1279     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1280     nbl->table_elec.ninteractions = 1;
1281     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1282     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1283
1284     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1285     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1286     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1287     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1288     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1289     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1290     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1291     nbl->table_vdw.ninteractions = 2;
1292     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1293     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1294
1295     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1296     {
1297         for (j = 0; j < 4; j++)
1298         {
1299             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1300         }
1301         for (j = 0; j < 8; j++)
1302         {
1303             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1304         }
1305     }
1306 }
1307
1308 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1309                          int *ncount, int **count)
1310 {
1311     const gmx_moltype_t *molt;
1312     const t_ilist       *il;
1313     int                  mt, ftype, stride, i, j, tabnr;
1314
1315     for (mt = 0; mt < mtop->nmoltype; mt++)
1316     {
1317         molt = &mtop->moltype[mt];
1318         for (ftype = 0; ftype < F_NRE; ftype++)
1319         {
1320             if (ftype == ftype1 || ftype == ftype2)
1321             {
1322                 il     = &molt->ilist[ftype];
1323                 stride = 1 + NRAL(ftype);
1324                 for (i = 0; i < il->nr; i += stride)
1325                 {
1326                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1327                     if (tabnr < 0)
1328                     {
1329                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1330                     }
1331                     if (tabnr >= *ncount)
1332                     {
1333                         srenew(*count, tabnr+1);
1334                         for (j = *ncount; j < tabnr+1; j++)
1335                         {
1336                             (*count)[j] = 0;
1337                         }
1338                         *ncount = tabnr+1;
1339                     }
1340                     (*count)[tabnr]++;
1341                 }
1342             }
1343         }
1344     }
1345 }
1346
1347 static bondedtable_t *make_bonded_tables(FILE *fplog,
1348                                          int ftype1, int ftype2,
1349                                          const gmx_mtop_t *mtop,
1350                                          const char *basefn, const char *tabext)
1351 {
1352     int            i, ncount, *count;
1353     char           tabfn[STRLEN];
1354     bondedtable_t *tab;
1355
1356     tab = NULL;
1357
1358     ncount = 0;
1359     count  = NULL;
1360     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1361
1362     if (ncount > 0)
1363     {
1364         snew(tab, ncount);
1365         for (i = 0; i < ncount; i++)
1366         {
1367             if (count[i] > 0)
1368             {
1369                 sprintf(tabfn, "%s", basefn);
1370                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1371                         tabext, i, ftp2ext(efXVG));
1372                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1373             }
1374         }
1375         sfree(count);
1376     }
1377
1378     return tab;
1379 }
1380
1381 void forcerec_set_ranges(t_forcerec *fr,
1382                          int ncg_home, int ncg_force,
1383                          int natoms_force,
1384                          int natoms_force_constr, int natoms_f_novirsum)
1385 {
1386     fr->cg0 = 0;
1387     fr->hcg = ncg_home;
1388
1389     /* fr->ncg_force is unused in the standard code,
1390      * but it can be useful for modified code dealing with charge groups.
1391      */
1392     fr->ncg_force           = ncg_force;
1393     fr->natoms_force        = natoms_force;
1394     fr->natoms_force_constr = natoms_force_constr;
1395
1396     if (fr->natoms_force_constr > fr->nalloc_force)
1397     {
1398         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1399
1400         if (fr->bTwinRange)
1401         {
1402             srenew(fr->f_twin, fr->nalloc_force);
1403         }
1404     }
1405
1406     if (fr->bF_NoVirSum)
1407     {
1408         fr->f_novirsum_n = natoms_f_novirsum;
1409         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1410         {
1411             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1412             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1413         }
1414     }
1415     else
1416     {
1417         fr->f_novirsum_n = 0;
1418     }
1419 }
1420
1421 static real cutoff_inf(real cutoff)
1422 {
1423     if (cutoff == 0)
1424     {
1425         cutoff = GMX_CUTOFF_INF;
1426     }
1427
1428     return cutoff;
1429 }
1430
1431 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1432                                   t_forcerec *fr, const t_inputrec *ir,
1433                                   const char *tabfn, const gmx_mtop_t *mtop,
1434                                   matrix     box)
1435 {
1436     char buf[STRLEN];
1437     int  i, j;
1438
1439     if (tabfn == NULL)
1440     {
1441         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1442         return;
1443     }
1444
1445     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1446
1447     sprintf(buf, "%s", tabfn);
1448     for (i = 0; i < ir->adress->n_tf_grps; i++)
1449     {
1450         j = ir->adress->tf_table_index[i]; /* get energy group index */
1451         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1452                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1453         if (fp)
1454         {
1455             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1456         }
1457         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1458     }
1459
1460 }
1461
1462 gmx_bool can_use_allvsall(const t_inputrec *ir, gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1463 {
1464     gmx_bool bAllvsAll;
1465
1466     bAllvsAll =
1467         (
1468             ir->rlist == 0            &&
1469             ir->rcoulomb == 0         &&
1470             ir->rvdw == 0             &&
1471             ir->ePBC == epbcNONE      &&
1472             ir->vdwtype == evdwCUT    &&
1473             ir->coulombtype == eelCUT &&
1474             ir->efep == efepNO        &&
1475             (ir->implicit_solvent == eisNO ||
1476              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1477                                                   ir->gb_algorithm == egbHCT   ||
1478                                                   ir->gb_algorithm == egbOBC))) &&
1479             getenv("GMX_NO_ALLVSALL") == NULL
1480         );
1481
1482     if (bAllvsAll && ir->opts.ngener > 1)
1483     {
1484         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1485
1486         if (bPrintNote)
1487         {
1488             if (MASTER(cr))
1489             {
1490                 fprintf(stderr, "\n%s\n", note);
1491             }
1492             if (fp != NULL)
1493             {
1494                 fprintf(fp, "\n%s\n", note);
1495             }
1496         }
1497         bAllvsAll = FALSE;
1498     }
1499
1500     if (bAllvsAll && fp && MASTER(cr))
1501     {
1502         fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1503     }
1504
1505     return bAllvsAll;
1506 }
1507
1508
1509 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1510 {
1511     int t, i;
1512
1513     /* These thread local data structures are used for bondeds only */
1514     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1515
1516     if (fr->nthreads > 1)
1517     {
1518         snew(fr->f_t, fr->nthreads);
1519         /* Thread 0 uses the global force and energy arrays */
1520         for (t = 1; t < fr->nthreads; t++)
1521         {
1522             fr->f_t[t].f        = NULL;
1523             fr->f_t[t].f_nalloc = 0;
1524             snew(fr->f_t[t].fshift, SHIFTS);
1525             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1526             for (i = 0; i < egNR; i++)
1527             {
1528                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1529             }
1530         }
1531     }
1532 }
1533
1534
1535 static void pick_nbnxn_kernel_cpu(const t_inputrec gmx_unused *ir,
1536                                   int                         *kernel_type,
1537                                   int                         *ewald_excl)
1538 {
1539     *kernel_type = nbnxnk4x4_PlainC;
1540     *ewald_excl  = ewaldexclTable;
1541
1542 #ifdef GMX_NBNXN_SIMD
1543     {
1544 #ifdef GMX_NBNXN_SIMD_4XN
1545         *kernel_type = nbnxnk4xN_SIMD_4xN;
1546 #endif
1547 #ifdef GMX_NBNXN_SIMD_2XNN
1548         /* We expect the 2xNN kernels to be faster in most cases */
1549         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1550 #endif
1551
1552 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1553         if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1554         {
1555             /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1556              * 10% with HT, 50% without HT, but extra zeros interactions
1557              * can compensate. As we currently don't detect the actual use
1558              * of HT, switch to 4x8 to avoid a potential performance hit.
1559              */
1560             *kernel_type = nbnxnk4xN_SIMD_4xN;
1561         }
1562 #endif
1563         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1564         {
1565 #ifdef GMX_NBNXN_SIMD_4XN
1566             *kernel_type = nbnxnk4xN_SIMD_4xN;
1567 #else
1568             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1569 #endif
1570         }
1571         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1572         {
1573 #ifdef GMX_NBNXN_SIMD_2XNN
1574             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1575 #else
1576             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1577 #endif
1578         }
1579
1580         /* Analytical Ewald exclusion correction is only an option in
1581          * the SIMD kernel. On BlueGene/Q, this is faster regardless
1582          * of precision. In single precision, this is faster on
1583          * Bulldozer, and slightly faster on Sandy Bridge.
1584          */
1585 #if ((defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE) || (defined GMX_CPU_ACCELERATION_IBM_QPX)
1586         *ewald_excl = ewaldexclAnalytical;
1587 #endif
1588         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1589         {
1590             *ewald_excl = ewaldexclTable;
1591         }
1592         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1593         {
1594             *ewald_excl = ewaldexclAnalytical;
1595         }
1596
1597     }
1598 #endif /* GMX_NBNXN_SIMD */
1599 }
1600
1601
1602 const char *lookup_nbnxn_kernel_name(int kernel_type)
1603 {
1604     const char *returnvalue = NULL;
1605     switch (kernel_type)
1606     {
1607         case nbnxnkNotSet:
1608             returnvalue = "not set";
1609             break;
1610         case nbnxnk4x4_PlainC:
1611             returnvalue = "plain C";
1612             break;
1613         case nbnxnk4xN_SIMD_4xN:
1614         case nbnxnk4xN_SIMD_2xNN:
1615 #ifdef GMX_NBNXN_SIMD
1616 #ifdef GMX_X86_SSE2
1617             /* We have x86 SSE2 compatible SIMD */
1618 #ifdef GMX_X86_AVX_128_FMA
1619             returnvalue = "AVX-128-FMA";
1620 #else
1621 #if defined GMX_X86_AVX_256 || defined __AVX__
1622             /* x86 SIMD intrinsics can be converted to SSE or AVX depending
1623              * on compiler flags. As we use nearly identical intrinsics,
1624              * compiling for AVX without an AVX macros effectively results
1625              * in AVX kernels.
1626              * For gcc we check for __AVX__
1627              * At least a check for icc should be added (if there is a macro)
1628              */
1629 #if defined GMX_X86_AVX_256 && !defined GMX_NBNXN_HALF_WIDTH_SIMD
1630             returnvalue = "AVX-256";
1631 #else
1632             returnvalue = "AVX-128";
1633 #endif
1634 #else
1635 #ifdef GMX_X86_SSE4_1
1636             returnvalue  = "SSE4.1";
1637 #else
1638             returnvalue  = "SSE2";
1639 #endif
1640 #endif
1641 #endif
1642 #else   /* GMX_X86_SSE2 */
1643             /* not GMX_X86_SSE2, but other SIMD */
1644             returnvalue  = "SIMD";
1645 #endif /* GMX_X86_SSE2 */
1646 #else  /* GMX_NBNXN_SIMD */
1647             returnvalue = "not available";
1648 #endif /* GMX_NBNXN_SIMD */
1649             break;
1650         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1651         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1652
1653         case nbnxnkNR:
1654         default:
1655             gmx_fatal(FARGS, "Illegal kernel type selected");
1656             returnvalue = NULL;
1657             break;
1658     }
1659     return returnvalue;
1660 };
1661
1662 static void pick_nbnxn_kernel(FILE                *fp,
1663                               const t_commrec     *cr,
1664                               gmx_bool             use_cpu_acceleration,
1665                               gmx_bool             bUseGPU,
1666                               gmx_bool             bEmulateGPU,
1667                               const t_inputrec    *ir,
1668                               int                 *kernel_type,
1669                               int                 *ewald_excl,
1670                               gmx_bool             bDoNonbonded)
1671 {
1672     assert(kernel_type);
1673
1674     *kernel_type = nbnxnkNotSet;
1675     *ewald_excl  = ewaldexclTable;
1676
1677     if (bEmulateGPU)
1678     {
1679         *kernel_type = nbnxnk8x8x8_PlainC;
1680
1681         if (bDoNonbonded)
1682         {
1683             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1684         }
1685     }
1686     else if (bUseGPU)
1687     {
1688         *kernel_type = nbnxnk8x8x8_CUDA;
1689     }
1690
1691     if (*kernel_type == nbnxnkNotSet)
1692     {
1693         if (use_cpu_acceleration)
1694         {
1695             pick_nbnxn_kernel_cpu(ir, kernel_type, ewald_excl);
1696         }
1697         else
1698         {
1699             *kernel_type = nbnxnk4x4_PlainC;
1700         }
1701     }
1702
1703     if (bDoNonbonded && fp != NULL)
1704     {
1705         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1706                 lookup_nbnxn_kernel_name(*kernel_type),
1707                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1708                 nbnxn_kernel_to_cj_size(*kernel_type));
1709     }
1710 }
1711
1712 static void pick_nbnxn_resources(const t_commrec     *cr,
1713                                  const gmx_hw_info_t *hwinfo,
1714                                  gmx_bool             bDoNonbonded,
1715                                  gmx_bool            *bUseGPU,
1716                                  gmx_bool            *bEmulateGPU,
1717                                  const gmx_gpu_opt_t *gpu_opt)
1718 {
1719     gmx_bool bEmulateGPUEnvVarSet;
1720     char     gpu_err_str[STRLEN];
1721
1722     *bUseGPU = FALSE;
1723
1724     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1725
1726     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1727      * GPUs (currently) only handle non-bonded calculations, we will
1728      * automatically switch to emulation if non-bonded calculations are
1729      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1730      * way to turn off GPU initialization, data movement, and cleanup.
1731      *
1732      * GPU emulation can be useful to assess the performance one can expect by
1733      * adding GPU(s) to the machine. The conditional below allows this even
1734      * if mdrun is compiled without GPU acceleration support.
1735      * Note that you should freezing the system as otherwise it will explode.
1736      */
1737     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1738                     (!bDoNonbonded &&
1739                      gpu_opt->ncuda_dev_use > 0));
1740
1741     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1742      */
1743     if (gpu_opt->ncuda_dev_use > 0 && !(*bEmulateGPU))
1744     {
1745         /* Each PP node will use the intra-node id-th device from the
1746          * list of detected/selected GPUs. */
1747         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str,
1748                       &hwinfo->gpu_info, gpu_opt))
1749         {
1750             /* At this point the init should never fail as we made sure that
1751              * we have all the GPUs we need. If it still does, we'll bail. */
1752             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1753                       cr->nodeid,
1754                       get_gpu_device_id(&hwinfo->gpu_info, gpu_opt,
1755                                         cr->rank_pp_intranode),
1756                       gpu_err_str);
1757         }
1758
1759         /* Here we actually turn on hardware GPU acceleration */
1760         *bUseGPU = TRUE;
1761     }
1762 }
1763
1764 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1765                             nonbonded_verlet_t *nbv,
1766                             int                 group)
1767 {
1768     gmx_bool bUsesSimpleTables = TRUE;
1769     int      grp_index;
1770
1771     switch (cutoff_scheme)
1772     {
1773         case ecutsGROUP:
1774             bUsesSimpleTables = TRUE;
1775             break;
1776         case ecutsVERLET:
1777             assert(NULL != nbv && NULL != nbv->grp);
1778             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1779             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1780             break;
1781         default:
1782             gmx_incons("unimplemented");
1783     }
1784     return bUsesSimpleTables;
1785 }
1786
1787 static void init_ewald_f_table(interaction_const_t *ic,
1788                                gmx_bool             bUsesSimpleTables,
1789                                real                 rtab)
1790 {
1791     real maxr;
1792
1793     if (bUsesSimpleTables)
1794     {
1795         /* With a spacing of 0.0005 we are at the force summation accuracy
1796          * for the SSE kernels for "normal" atomistic simulations.
1797          */
1798         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff_q,
1799                                                    ic->rcoulomb);
1800
1801         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1802         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1803     }
1804     else
1805     {
1806         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1807         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1808         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1809     }
1810
1811     sfree_aligned(ic->tabq_coul_FDV0);
1812     sfree_aligned(ic->tabq_coul_F);
1813     sfree_aligned(ic->tabq_coul_V);
1814
1815     /* Create the original table data in FDV0 */
1816     snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1817     snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1818     snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1819     table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1820                                 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff_q);
1821 }
1822
1823 void init_interaction_const_tables(FILE                *fp,
1824                                    interaction_const_t *ic,
1825                                    gmx_bool             bUsesSimpleTables,
1826                                    real                 rtab)
1827 {
1828     real spacing;
1829
1830     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1831     {
1832         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1833
1834         if (fp != NULL)
1835         {
1836             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1837                     1/ic->tabq_scale, ic->tabq_size);
1838         }
1839     }
1840 }
1841
1842 static void init_interaction_const(FILE                       *fp,
1843                                    const t_commrec gmx_unused *cr,
1844                                    interaction_const_t       **interaction_const,
1845                                    const t_forcerec           *fr,
1846                                    real                        rtab)
1847 {
1848     interaction_const_t *ic;
1849     gmx_bool             bUsesSimpleTables = TRUE;
1850
1851     snew(ic, 1);
1852
1853     /* Just allocate something so we can free it */
1854     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1855     snew_aligned(ic->tabq_coul_F, 16, 32);
1856     snew_aligned(ic->tabq_coul_V, 16, 32);
1857
1858     ic->rlist       = fr->rlist;
1859     ic->rlistlong   = fr->rlistlong;
1860
1861     /* Lennard-Jones */
1862     ic->rvdw        = fr->rvdw;
1863     if (fr->vdw_modifier == eintmodPOTSHIFT)
1864     {
1865         ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1866     }
1867     else
1868     {
1869         ic->sh_invrc6 = 0;
1870     }
1871
1872     /* Electrostatics */
1873     ic->eeltype     = fr->eeltype;
1874     ic->rcoulomb    = fr->rcoulomb;
1875     ic->epsilon_r   = fr->epsilon_r;
1876     ic->epsfac      = fr->epsfac;
1877
1878     /* Ewald */
1879     ic->ewaldcoeff_q   = fr->ewaldcoeff_q;
1880     ic->ewaldcoeff_lj  = fr->ewaldcoeff_lj;
1881
1882     if (fr->coulomb_modifier == eintmodPOTSHIFT)
1883     {
1884         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff_q*ic->rcoulomb);
1885     }
1886     else
1887     {
1888         ic->sh_ewald = 0;
1889     }
1890
1891     /* Reaction-field */
1892     if (EEL_RF(ic->eeltype))
1893     {
1894         ic->epsilon_rf = fr->epsilon_rf;
1895         ic->k_rf       = fr->k_rf;
1896         ic->c_rf       = fr->c_rf;
1897     }
1898     else
1899     {
1900         /* For plain cut-off we might use the reaction-field kernels */
1901         ic->epsilon_rf = ic->epsilon_r;
1902         ic->k_rf       = 0;
1903         if (fr->coulomb_modifier == eintmodPOTSHIFT)
1904         {
1905             ic->c_rf   = 1/ic->rcoulomb;
1906         }
1907         else
1908         {
1909             ic->c_rf   = 0;
1910         }
1911     }
1912
1913     if (fp != NULL)
1914     {
1915         fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1916                 sqr(ic->sh_invrc6), ic->sh_invrc6);
1917         if (ic->eeltype == eelCUT)
1918         {
1919             fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1920         }
1921         else if (EEL_PME(ic->eeltype))
1922         {
1923             fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1924         }
1925         fprintf(fp, "\n");
1926     }
1927
1928     *interaction_const = ic;
1929
1930     if (fr->nbv != NULL && fr->nbv->bUseGPU)
1931     {
1932         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1933
1934         /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
1935          * also sharing texture references. To keep the code simple, we don't
1936          * treat texture references as shared resources, but this means that
1937          * the coulomb_tab and nbfp texture refs will get updated by multiple threads.
1938          * Hence, to ensure that the non-bonded kernels don't start before all
1939          * texture binding operations are finished, we need to wait for all ranks
1940          * to arrive here before continuing.
1941          *
1942          * Note that we could omit this barrier if GPUs are not shared (or
1943          * texture objects are used), but as this is initialization code, there
1944          * is not point in complicating things.
1945          */
1946 #ifdef GMX_THREAD_MPI
1947         if (PAR(cr))
1948         {
1949             gmx_barrier(cr);
1950         }
1951 #endif  /* GMX_THREAD_MPI */
1952     }
1953
1954     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1955     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1956 }
1957
1958 static void init_nb_verlet(FILE                *fp,
1959                            nonbonded_verlet_t **nb_verlet,
1960                            const t_inputrec    *ir,
1961                            const t_forcerec    *fr,
1962                            const t_commrec     *cr,
1963                            const char          *nbpu_opt)
1964 {
1965     nonbonded_verlet_t *nbv;
1966     int                 i;
1967     char               *env;
1968     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
1969
1970     nbnxn_alloc_t      *nb_alloc;
1971     nbnxn_free_t       *nb_free;
1972
1973     snew(nbv, 1);
1974
1975     pick_nbnxn_resources(cr, fr->hwinfo,
1976                          fr->bNonbonded,
1977                          &nbv->bUseGPU,
1978                          &bEmulateGPU,
1979                          fr->gpu_opt);
1980
1981     nbv->nbs = NULL;
1982
1983     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1984     for (i = 0; i < nbv->ngrp; i++)
1985     {
1986         nbv->grp[i].nbl_lists.nnbl = 0;
1987         nbv->grp[i].nbat           = NULL;
1988         nbv->grp[i].kernel_type    = nbnxnkNotSet;
1989
1990         if (i == 0) /* local */
1991         {
1992             pick_nbnxn_kernel(fp, cr, fr->use_cpu_acceleration,
1993                               nbv->bUseGPU, bEmulateGPU, ir,
1994                               &nbv->grp[i].kernel_type,
1995                               &nbv->grp[i].ewald_excl,
1996                               fr->bNonbonded);
1997         }
1998         else /* non-local */
1999         {
2000             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
2001             {
2002                 /* Use GPU for local, select a CPU kernel for non-local */
2003                 pick_nbnxn_kernel(fp, cr, fr->use_cpu_acceleration,
2004                                   FALSE, FALSE, ir,
2005                                   &nbv->grp[i].kernel_type,
2006                                   &nbv->grp[i].ewald_excl,
2007                                   fr->bNonbonded);
2008
2009                 bHybridGPURun = TRUE;
2010             }
2011             else
2012             {
2013                 /* Use the same kernel for local and non-local interactions */
2014                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
2015                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
2016             }
2017         }
2018     }
2019
2020     if (nbv->bUseGPU)
2021     {
2022         /* init the NxN GPU data; the last argument tells whether we'll have
2023          * both local and non-local NB calculation on GPU */
2024         nbnxn_cuda_init(fp, &nbv->cu_nbv,
2025                         &fr->hwinfo->gpu_info, fr->gpu_opt,
2026                         cr->rank_pp_intranode,
2027                         (nbv->ngrp > 1) && !bHybridGPURun);
2028
2029         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
2030         {
2031             char *end;
2032
2033             nbv->min_ci_balanced = strtol(env, &end, 10);
2034             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
2035             {
2036                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
2037             }
2038
2039             if (debug)
2040             {
2041                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
2042                         nbv->min_ci_balanced);
2043             }
2044         }
2045         else
2046         {
2047             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
2048             if (debug)
2049             {
2050                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
2051                         nbv->min_ci_balanced);
2052             }
2053         }
2054     }
2055     else
2056     {
2057         nbv->min_ci_balanced = 0;
2058     }
2059
2060     *nb_verlet = nbv;
2061
2062     nbnxn_init_search(&nbv->nbs,
2063                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
2064                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
2065                       gmx_omp_nthreads_get(emntNonbonded));
2066
2067     for (i = 0; i < nbv->ngrp; i++)
2068     {
2069         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
2070         {
2071             nb_alloc = &pmalloc;
2072             nb_free  = &pfree;
2073         }
2074         else
2075         {
2076             nb_alloc = NULL;
2077             nb_free  = NULL;
2078         }
2079
2080         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
2081                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2082                                 /* 8x8x8 "non-simple" lists are ATM always combined */
2083                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
2084                                 nb_alloc, nb_free);
2085
2086         if (i == 0 ||
2087             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2088         {
2089             snew(nbv->grp[i].nbat, 1);
2090             nbnxn_atomdata_init(fp,
2091                                 nbv->grp[i].nbat,
2092                                 nbv->grp[i].kernel_type,
2093                                 fr->ntype, fr->nbfp,
2094                                 ir->opts.ngener,
2095                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2096                                 nb_alloc, nb_free);
2097         }
2098         else
2099         {
2100             nbv->grp[i].nbat = nbv->grp[0].nbat;
2101         }
2102     }
2103 }
2104
2105 void init_forcerec(FILE              *fp,
2106                    const output_env_t oenv,
2107                    t_forcerec        *fr,
2108                    t_fcdata          *fcd,
2109                    const t_inputrec  *ir,
2110                    const gmx_mtop_t  *mtop,
2111                    const t_commrec   *cr,
2112                    matrix             box,
2113                    const char        *tabfn,
2114                    const char        *tabafn,
2115                    const char        *tabpfn,
2116                    const char        *tabbfn,
2117                    const char        *nbpu_opt,
2118                    gmx_bool           bNoSolvOpt,
2119                    real               print_force)
2120 {
2121     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2122     real           rtab;
2123     char          *env;
2124     double         dbl;
2125     const t_block *cgs;
2126     gmx_bool       bGenericKernelOnly;
2127     gmx_bool       bTab, bSep14tab, bNormalnblists;
2128     t_nblists     *nbl;
2129     int           *nm_ind, egp_flags;
2130
2131     if (fr->hwinfo == NULL)
2132     {
2133         /* Detect hardware, gather information.
2134          * In mdrun, hwinfo has already been set before calling init_forcerec.
2135          * Here we ignore GPUs, as tools will not use them anyhow.
2136          */
2137         fr->hwinfo = gmx_detect_hardware(fp, cr, FALSE);
2138     }
2139
2140     /* By default we turn acceleration on, but it might be turned off further down... */
2141     fr->use_cpu_acceleration = TRUE;
2142
2143     fr->bDomDec = DOMAINDECOMP(cr);
2144
2145     natoms = mtop->natoms;
2146
2147     if (check_box(ir->ePBC, box))
2148     {
2149         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2150     }
2151
2152     /* Test particle insertion ? */
2153     if (EI_TPI(ir->eI))
2154     {
2155         /* Set to the size of the molecule to be inserted (the last one) */
2156         /* Because of old style topologies, we have to use the last cg
2157          * instead of the last molecule type.
2158          */
2159         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2160         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2161         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2162         {
2163             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2164         }
2165     }
2166     else
2167     {
2168         fr->n_tpi = 0;
2169     }
2170
2171     /* Copy AdResS parameters */
2172     if (ir->bAdress)
2173     {
2174         fr->adress_type           = ir->adress->type;
2175         fr->adress_const_wf       = ir->adress->const_wf;
2176         fr->adress_ex_width       = ir->adress->ex_width;
2177         fr->adress_hy_width       = ir->adress->hy_width;
2178         fr->adress_icor           = ir->adress->icor;
2179         fr->adress_site           = ir->adress->site;
2180         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2181         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2182
2183
2184         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2185         for (i = 0; i < ir->adress->n_energy_grps; i++)
2186         {
2187             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2188         }
2189
2190         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2191         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2192         for (i = 0; i < fr->n_adress_tf_grps; i++)
2193         {
2194             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2195         }
2196         copy_rvec(ir->adress->refs, fr->adress_refs);
2197     }
2198     else
2199     {
2200         fr->adress_type           = eAdressOff;
2201         fr->adress_do_hybridpairs = FALSE;
2202     }
2203
2204     /* Copy the user determined parameters */
2205     fr->userint1  = ir->userint1;
2206     fr->userint2  = ir->userint2;
2207     fr->userint3  = ir->userint3;
2208     fr->userint4  = ir->userint4;
2209     fr->userreal1 = ir->userreal1;
2210     fr->userreal2 = ir->userreal2;
2211     fr->userreal3 = ir->userreal3;
2212     fr->userreal4 = ir->userreal4;
2213
2214     /* Shell stuff */
2215     fr->fc_stepsize = ir->fc_stepsize;
2216
2217     /* Free energy */
2218     fr->efep        = ir->efep;
2219     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2220     if (ir->fepvals->bScCoul)
2221     {
2222         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2223         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2224     }
2225     else
2226     {
2227         fr->sc_alphacoul  = 0;
2228         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2229     }
2230     fr->sc_power      = ir->fepvals->sc_power;
2231     fr->sc_r_power    = ir->fepvals->sc_r_power;
2232     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2233
2234     env = getenv("GMX_SCSIGMA_MIN");
2235     if (env != NULL)
2236     {
2237         dbl = 0;
2238         sscanf(env, "%lf", &dbl);
2239         fr->sc_sigma6_min = pow(dbl, 6);
2240         if (fp)
2241         {
2242             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2243         }
2244     }
2245
2246     fr->bNonbonded = TRUE;
2247     if (getenv("GMX_NO_NONBONDED") != NULL)
2248     {
2249         /* turn off non-bonded calculations */
2250         fr->bNonbonded = FALSE;
2251         md_print_warn(cr, fp,
2252                       "Found environment variable GMX_NO_NONBONDED.\n"
2253                       "Disabling nonbonded calculations.\n");
2254     }
2255
2256     bGenericKernelOnly = FALSE;
2257
2258     /* We now check in the NS code whether a particular combination of interactions
2259      * can be used with water optimization, and disable it if that is not the case.
2260      */
2261
2262     if (getenv("GMX_NB_GENERIC") != NULL)
2263     {
2264         if (fp != NULL)
2265         {
2266             fprintf(fp,
2267                     "Found environment variable GMX_NB_GENERIC.\n"
2268                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2269                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2270         }
2271         bGenericKernelOnly = TRUE;
2272     }
2273
2274     if (bGenericKernelOnly == TRUE)
2275     {
2276         bNoSolvOpt         = TRUE;
2277     }
2278
2279     if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2280     {
2281         fr->use_cpu_acceleration = FALSE;
2282         if (fp != NULL)
2283         {
2284             fprintf(fp,
2285                     "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2286                     "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2287         }
2288     }
2289
2290     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2291
2292     /* Check if we can/should do all-vs-all kernels */
2293     fr->bAllvsAll       = can_use_allvsall(ir, FALSE, NULL, NULL);
2294     fr->AllvsAll_work   = NULL;
2295     fr->AllvsAll_workgb = NULL;
2296
2297     /* All-vs-all kernels have not been implemented in 4.6, and
2298      * the SIMD group kernels are also buggy in this case. Non-accelerated
2299      * group kernels are OK. See Redmine #1249. */
2300     if (fr->bAllvsAll)
2301     {
2302         fr->bAllvsAll            = FALSE;
2303         fr->use_cpu_acceleration = FALSE;
2304         if (fp != NULL)
2305         {
2306             fprintf(fp,
2307                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2308                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2309                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2310                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2311                     "we are proceeding by disabling all CPU architecture-specific\n"
2312                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2313                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2314         }
2315     }
2316
2317     /* Neighbour searching stuff */
2318     fr->cutoff_scheme = ir->cutoff_scheme;
2319     fr->bGrid         = (ir->ns_type == ensGRID);
2320     fr->ePBC          = ir->ePBC;
2321
2322     /* Determine if we will do PBC for distances in bonded interactions */
2323     if (fr->ePBC == epbcNONE)
2324     {
2325         fr->bMolPBC = FALSE;
2326     }
2327     else
2328     {
2329         if (!DOMAINDECOMP(cr))
2330         {
2331             /* The group cut-off scheme and SHAKE assume charge groups
2332              * are whole, but not using molpbc is faster in most cases.
2333              */
2334             if (fr->cutoff_scheme == ecutsGROUP ||
2335                 (ir->eConstrAlg == econtSHAKE &&
2336                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2337                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2338             {
2339                 fr->bMolPBC = ir->bPeriodicMols;
2340             }
2341             else
2342             {
2343                 fr->bMolPBC = TRUE;
2344                 if (getenv("GMX_USE_GRAPH") != NULL)
2345                 {
2346                     fr->bMolPBC = FALSE;
2347                     if (fp)
2348                     {
2349                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2350                     }
2351                 }
2352             }
2353         }
2354         else
2355         {
2356             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2357         }
2358     }
2359     fr->bGB = (ir->implicit_solvent == eisGBSA);
2360
2361     fr->rc_scaling = ir->refcoord_scaling;
2362     copy_rvec(ir->posres_com, fr->posres_com);
2363     copy_rvec(ir->posres_comB, fr->posres_comB);
2364     fr->rlist                    = cutoff_inf(ir->rlist);
2365     fr->rlistlong                = cutoff_inf(ir->rlistlong);
2366     fr->eeltype                  = ir->coulombtype;
2367     fr->vdwtype                  = ir->vdwtype;
2368     fr->ljpme_combination_rule   = ir->ljpme_combination_rule;
2369
2370     fr->coulomb_modifier = ir->coulomb_modifier;
2371     fr->vdw_modifier     = ir->vdw_modifier;
2372
2373     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2374     switch (fr->eeltype)
2375     {
2376         case eelCUT:
2377             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2378             break;
2379
2380         case eelRF:
2381         case eelGRF:
2382         case eelRF_NEC:
2383             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2384             break;
2385
2386         case eelRF_ZERO:
2387             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2388             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2389             break;
2390
2391         case eelSWITCH:
2392         case eelSHIFT:
2393         case eelUSER:
2394         case eelENCADSHIFT:
2395         case eelPMESWITCH:
2396         case eelPMEUSER:
2397         case eelPMEUSERSWITCH:
2398             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2399             break;
2400
2401         case eelPME:
2402         case eelEWALD:
2403             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2404             break;
2405
2406         default:
2407             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2408             break;
2409     }
2410
2411     /* Vdw: Translate from mdp settings to kernel format */
2412     switch (fr->vdwtype)
2413     {
2414         case evdwCUT:
2415         case evdwPME:
2416             if (fr->bBHAM)
2417             {
2418                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2419             }
2420             else
2421             {
2422                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2423             }
2424             break;
2425
2426         case evdwSWITCH:
2427         case evdwSHIFT:
2428         case evdwUSER:
2429         case evdwENCADSHIFT:
2430             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2431             break;
2432
2433         default:
2434             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2435             break;
2436     }
2437
2438     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2439     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2440     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2441
2442     fr->bTwinRange = fr->rlistlong > fr->rlist;
2443     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2444
2445     fr->reppow     = mtop->ffparams.reppow;
2446
2447     if (ir->cutoff_scheme == ecutsGROUP)
2448     {
2449         fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
2450                           !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2451         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2452         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2453                            fr->eeltype == eelEWALD ||
2454                            fr->eeltype == eelPME ||
2455                            fr->eeltype == eelRF ||
2456                            fr->eeltype == eelRF_ZERO);
2457
2458         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2459          * going to be faster to tabulate the interaction than calling the generic kernel.
2460          */
2461         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2462         {
2463             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2464             {
2465                 fr->bcoultab = TRUE;
2466             }
2467         }
2468         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2469                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2470                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2471                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2472         {
2473             if (fr->rcoulomb != fr->rvdw)
2474             {
2475                 fr->bcoultab = TRUE;
2476             }
2477         }
2478
2479         if (getenv("GMX_REQUIRE_TABLES"))
2480         {
2481             fr->bvdwtab  = TRUE;
2482             fr->bcoultab = TRUE;
2483         }
2484
2485         if (fp)
2486         {
2487             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2488             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2489         }
2490
2491         if (fr->bvdwtab == TRUE)
2492         {
2493             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2494             fr->nbkernel_vdw_modifier    = eintmodNONE;
2495         }
2496         if (fr->bcoultab == TRUE)
2497         {
2498             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2499             fr->nbkernel_elec_modifier    = eintmodNONE;
2500         }
2501     }
2502
2503     if (ir->cutoff_scheme == ecutsVERLET)
2504     {
2505         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2506         {
2507             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2508         }
2509         fr->bvdwtab  = FALSE;
2510         fr->bcoultab = FALSE;
2511     }
2512
2513     /* Tables are used for direct ewald sum */
2514     if (fr->bEwald)
2515     {
2516         if (EEL_PME(ir->coulombtype))
2517         {
2518             if (fp)
2519             {
2520                 fprintf(fp, "Will do PME sum in reciprocal space for electrostatic interactions.\n");
2521             }
2522             if (ir->coulombtype == eelP3M_AD)
2523             {
2524                 please_cite(fp, "Hockney1988");
2525                 please_cite(fp, "Ballenegger2012");
2526             }
2527             else
2528             {
2529                 please_cite(fp, "Essmann95a");
2530             }
2531
2532             if (ir->ewald_geometry == eewg3DC)
2533             {
2534                 if (fp)
2535                 {
2536                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2537                 }
2538                 please_cite(fp, "In-Chul99a");
2539             }
2540         }
2541         fr->ewaldcoeff_q = calc_ewaldcoeff_q(ir->rcoulomb, ir->ewald_rtol);
2542         init_ewald_tab(&(fr->ewald_table), ir, fp);
2543         if (fp)
2544         {
2545             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2546                     1/fr->ewaldcoeff_q);
2547         }
2548     }
2549
2550     if (EVDW_PME(ir->vdwtype))
2551     {
2552         if (fp)
2553         {
2554             fprintf(fp, "Will do PME sum in reciprocal space for LJ dispersion interactions.\n");
2555         }
2556         please_cite(fp, "Essman95a");
2557         fr->ewaldcoeff_lj = calc_ewaldcoeff_lj(ir->rvdw, ir->ewald_rtol_lj);
2558         if (fp)
2559         {
2560             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for LJ Ewald\n",
2561                     1/fr->ewaldcoeff_lj);
2562         }
2563     }
2564
2565     /* Electrostatics */
2566     fr->epsilon_r       = ir->epsilon_r;
2567     fr->epsilon_rf      = ir->epsilon_rf;
2568     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2569     fr->rcoulomb_switch = ir->rcoulomb_switch;
2570     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2571
2572     /* Parameters for generalized RF */
2573     fr->zsquare = 0.0;
2574     fr->temp    = 0.0;
2575
2576     if (fr->eeltype == eelGRF)
2577     {
2578         init_generalized_rf(fp, mtop, ir, fr);
2579     }
2580
2581     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) || EVDW_PME(fr->vdwtype) ||
2582                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2583                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2584                        IR_ELEC_FIELD(*ir) ||
2585                        (fr->adress_icor != eAdressICOff)
2586                        );
2587
2588     if (fr->cutoff_scheme == ecutsGROUP &&
2589         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2590     {
2591         /* Count the total number of charge groups */
2592         fr->cg_nalloc = ncg_mtop(mtop);
2593         srenew(fr->cg_cm, fr->cg_nalloc);
2594     }
2595     if (fr->shift_vec == NULL)
2596     {
2597         snew(fr->shift_vec, SHIFTS);
2598     }
2599
2600     if (fr->fshift == NULL)
2601     {
2602         snew(fr->fshift, SHIFTS);
2603     }
2604
2605     if (fr->nbfp == NULL)
2606     {
2607         fr->ntype = mtop->ffparams.atnr;
2608         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2609     }
2610
2611     /* Copy the energy group exclusions */
2612     fr->egp_flags = ir->opts.egp_flags;
2613
2614     /* Van der Waals stuff */
2615     fr->rvdw        = cutoff_inf(ir->rvdw);
2616     fr->rvdw_switch = ir->rvdw_switch;
2617     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2618     {
2619         if (fr->rvdw_switch >= fr->rvdw)
2620         {
2621             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2622                       fr->rvdw_switch, fr->rvdw);
2623         }
2624         if (fp)
2625         {
2626             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2627                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2628                     fr->rvdw_switch, fr->rvdw);
2629         }
2630     }
2631
2632     if (fr->bBHAM && EVDW_PME(fr->vdwtype))
2633     {
2634         gmx_fatal(FARGS, "LJ PME not supported with Buckingham");
2635     }
2636
2637     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2638     {
2639         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2640     }
2641
2642     if (fp)
2643     {
2644         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2645                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2646     }
2647
2648     fr->eDispCorr = ir->eDispCorr;
2649     if (ir->eDispCorr != edispcNO)
2650     {
2651         set_avcsixtwelve(fp, fr, mtop);
2652     }
2653
2654     if (fr->bBHAM)
2655     {
2656         set_bham_b_max(fp, fr, mtop);
2657     }
2658
2659     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2660
2661     /* Copy the GBSA data (radius, volume and surftens for each
2662      * atomtype) from the topology atomtype section to forcerec.
2663      */
2664     snew(fr->atype_radius, fr->ntype);
2665     snew(fr->atype_vol, fr->ntype);
2666     snew(fr->atype_surftens, fr->ntype);
2667     snew(fr->atype_gb_radius, fr->ntype);
2668     snew(fr->atype_S_hct, fr->ntype);
2669
2670     if (mtop->atomtypes.nr > 0)
2671     {
2672         for (i = 0; i < fr->ntype; i++)
2673         {
2674             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2675         }
2676         for (i = 0; i < fr->ntype; i++)
2677         {
2678             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2679         }
2680         for (i = 0; i < fr->ntype; i++)
2681         {
2682             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2683         }
2684         for (i = 0; i < fr->ntype; i++)
2685         {
2686             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2687         }
2688         for (i = 0; i < fr->ntype; i++)
2689         {
2690             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2691         }
2692     }
2693
2694     /* Generate the GB table if needed */
2695     if (fr->bGB)
2696     {
2697 #ifdef GMX_DOUBLE
2698         fr->gbtabscale = 2000;
2699 #else
2700         fr->gbtabscale = 500;
2701 #endif
2702
2703         fr->gbtabr = 100;
2704         fr->gbtab  = make_gb_table(oenv, fr);
2705
2706         init_gb(&fr->born, cr, fr, ir, mtop, ir->gb_algorithm);
2707
2708         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2709         if (!DOMAINDECOMP(cr))
2710         {
2711             make_local_gb(cr, fr->born, ir->gb_algorithm);
2712         }
2713     }
2714
2715     /* Set the charge scaling */
2716     if (fr->epsilon_r != 0)
2717     {
2718         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2719     }
2720     else
2721     {
2722         /* eps = 0 is infinite dieletric: no coulomb interactions */
2723         fr->epsfac = 0;
2724     }
2725
2726     /* Reaction field constants */
2727     if (EEL_RF(fr->eeltype))
2728     {
2729         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2730                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2731                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2732     }
2733
2734     /*This now calculates sum for q and c6*/
2735     set_chargesum(fp, fr, mtop);
2736
2737     /* if we are using LR electrostatics, and they are tabulated,
2738      * the tables will contain modified coulomb interactions.
2739      * Since we want to use the non-shifted ones for 1-4
2740      * coulombic interactions, we must have an extra set of tables.
2741      */
2742
2743     /* Construct tables.
2744      * A little unnecessary to make both vdw and coul tables sometimes,
2745      * but what the heck... */
2746
2747     bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2748
2749     bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2750                   fr->bBHAM || fr->bEwald) &&
2751                  (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2752                   gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2753                   gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2754
2755     negp_pp   = ir->opts.ngener - ir->nwall;
2756     negptable = 0;
2757     if (!bTab)
2758     {
2759         bNormalnblists = TRUE;
2760         fr->nnblists   = 1;
2761     }
2762     else
2763     {
2764         bNormalnblists = (ir->eDispCorr != edispcNO);
2765         for (egi = 0; egi < negp_pp; egi++)
2766         {
2767             for (egj = egi; egj < negp_pp; egj++)
2768             {
2769                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2770                 if (!(egp_flags & EGP_EXCL))
2771                 {
2772                     if (egp_flags & EGP_TABLE)
2773                     {
2774                         negptable++;
2775                     }
2776                     else
2777                     {
2778                         bNormalnblists = TRUE;
2779                     }
2780                 }
2781             }
2782         }
2783         if (bNormalnblists)
2784         {
2785             fr->nnblists = negptable + 1;
2786         }
2787         else
2788         {
2789             fr->nnblists = negptable;
2790         }
2791         if (fr->nnblists > 1)
2792         {
2793             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2794         }
2795     }
2796
2797     if (ir->adress)
2798     {
2799         fr->nnblists *= 2;
2800     }
2801
2802     snew(fr->nblists, fr->nnblists);
2803
2804     /* This code automatically gives table length tabext without cut-off's,
2805      * in that case grompp should already have checked that we do not need
2806      * normal tables and we only generate tables for 1-4 interactions.
2807      */
2808     rtab = ir->rlistlong + ir->tabext;
2809
2810     if (bTab)
2811     {
2812         /* make tables for ordinary interactions */
2813         if (bNormalnblists)
2814         {
2815             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2816             if (ir->adress)
2817             {
2818                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2819             }
2820             if (!bSep14tab)
2821             {
2822                 fr->tab14 = fr->nblists[0].table_elec_vdw;
2823             }
2824             m = 1;
2825         }
2826         else
2827         {
2828             m = 0;
2829         }
2830         if (negptable > 0)
2831         {
2832             /* Read the special tables for certain energy group pairs */
2833             nm_ind = mtop->groups.grps[egcENER].nm_ind;
2834             for (egi = 0; egi < negp_pp; egi++)
2835             {
2836                 for (egj = egi; egj < negp_pp; egj++)
2837                 {
2838                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2839                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2840                     {
2841                         nbl = &(fr->nblists[m]);
2842                         if (fr->nnblists > 1)
2843                         {
2844                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2845                         }
2846                         /* Read the table file with the two energy groups names appended */
2847                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2848                                         *mtop->groups.grpname[nm_ind[egi]],
2849                                         *mtop->groups.grpname[nm_ind[egj]],
2850                                         &fr->nblists[m]);
2851                         if (ir->adress)
2852                         {
2853                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2854                                             *mtop->groups.grpname[nm_ind[egi]],
2855                                             *mtop->groups.grpname[nm_ind[egj]],
2856                                             &fr->nblists[fr->nnblists/2+m]);
2857                         }
2858                         m++;
2859                     }
2860                     else if (fr->nnblists > 1)
2861                     {
2862                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2863                     }
2864                 }
2865             }
2866         }
2867     }
2868     if (bSep14tab)
2869     {
2870         /* generate extra tables with plain Coulomb for 1-4 interactions only */
2871         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2872                                 GMX_MAKETABLES_14ONLY);
2873     }
2874
2875     /* Read AdResS Thermo Force table if needed */
2876     if (fr->adress_icor == eAdressICThermoForce)
2877     {
2878         /* old todo replace */
2879
2880         if (ir->adress->n_tf_grps > 0)
2881         {
2882             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2883
2884         }
2885         else
2886         {
2887             /* load the default table */
2888             snew(fr->atf_tabs, 1);
2889             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2890         }
2891     }
2892
2893     /* Wall stuff */
2894     fr->nwall = ir->nwall;
2895     if (ir->nwall && ir->wall_type == ewtTABLE)
2896     {
2897         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2898     }
2899
2900     if (fcd && tabbfn)
2901     {
2902         fcd->bondtab  = make_bonded_tables(fp,
2903                                            F_TABBONDS, F_TABBONDSNC,
2904                                            mtop, tabbfn, "b");
2905         fcd->angletab = make_bonded_tables(fp,
2906                                            F_TABANGLES, -1,
2907                                            mtop, tabbfn, "a");
2908         fcd->dihtab   = make_bonded_tables(fp,
2909                                            F_TABDIHS, -1,
2910                                            mtop, tabbfn, "d");
2911     }
2912     else
2913     {
2914         if (debug)
2915         {
2916             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2917         }
2918     }
2919
2920     /* QM/MM initialization if requested
2921      */
2922     if (ir->bQMMM)
2923     {
2924         fprintf(stderr, "QM/MM calculation requested.\n");
2925     }
2926
2927     fr->bQMMM      = ir->bQMMM;
2928     fr->qr         = mk_QMMMrec();
2929
2930     /* Set all the static charge group info */
2931     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2932                                    &fr->bExcl_IntraCGAll_InterCGNone);
2933     if (DOMAINDECOMP(cr))
2934     {
2935         fr->cginfo = NULL;
2936     }
2937     else
2938     {
2939         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2940     }
2941
2942     if (!DOMAINDECOMP(cr))
2943     {
2944         /* When using particle decomposition, the effect of the second argument,
2945          * which sets fr->hcg, is corrected later in do_md and init_em.
2946          */
2947         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2948                             mtop->natoms, mtop->natoms, mtop->natoms);
2949     }
2950
2951     fr->print_force = print_force;
2952
2953
2954     /* coarse load balancing vars */
2955     fr->t_fnbf    = 0.;
2956     fr->t_wait    = 0.;
2957     fr->timesteps = 0;
2958
2959     /* Initialize neighbor search */
2960     init_ns(fp, cr, &fr->ns, fr, mtop);
2961
2962     if (cr->duty & DUTY_PP)
2963     {
2964         gmx_nonbonded_setup(fr, bGenericKernelOnly);
2965         /*
2966            if (ir->bAdress)
2967             {
2968                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2969             }
2970          */
2971     }
2972
2973     /* Initialize the thread working data for bonded interactions */
2974     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2975
2976     snew(fr->excl_load, fr->nthreads+1);
2977
2978     if (fr->cutoff_scheme == ecutsVERLET)
2979     {
2980         if (ir->rcoulomb != ir->rvdw)
2981         {
2982             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2983         }
2984
2985         init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2986     }
2987
2988     /* fr->ic is used both by verlet and group kernels (to some extent) now */
2989     init_interaction_const(fp, cr, &fr->ic, fr, rtab);
2990
2991     if (ir->eDispCorr != edispcNO)
2992     {
2993         calc_enervirdiff(fp, ir->eDispCorr, fr);
2994     }
2995 }
2996
2997 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2998 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
2999 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
3000
3001 void pr_forcerec(FILE *fp, t_forcerec *fr)
3002 {
3003     int i;
3004
3005     pr_real(fp, fr->rlist);
3006     pr_real(fp, fr->rcoulomb);
3007     pr_real(fp, fr->fudgeQQ);
3008     pr_bool(fp, fr->bGrid);
3009     pr_bool(fp, fr->bTwinRange);
3010     /*pr_int(fp,fr->cg0);
3011        pr_int(fp,fr->hcg);*/
3012     for (i = 0; i < fr->nnblists; i++)
3013     {
3014         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
3015     }
3016     pr_real(fp, fr->rcoulomb_switch);
3017     pr_real(fp, fr->rcoulomb);
3018
3019     fflush(fp);
3020 }
3021
3022 void forcerec_set_excl_load(t_forcerec *fr,
3023                             const gmx_localtop_t *top, const t_commrec *cr)
3024 {
3025     const int *ind, *a;
3026     int        t, i, j, ntot, n, ntarget;
3027
3028     if (cr != NULL && PARTDECOMP(cr))
3029     {
3030         /* No OpenMP with particle decomposition */
3031         pd_at_range(cr,
3032                     &fr->excl_load[0],
3033                     &fr->excl_load[1]);
3034
3035         return;
3036     }
3037
3038     ind = top->excls.index;
3039     a   = top->excls.a;
3040
3041     ntot = 0;
3042     for (i = 0; i < top->excls.nr; i++)
3043     {
3044         for (j = ind[i]; j < ind[i+1]; j++)
3045         {
3046             if (a[j] > i)
3047             {
3048                 ntot++;
3049             }
3050         }
3051     }
3052
3053     fr->excl_load[0] = 0;
3054     n                = 0;
3055     i                = 0;
3056     for (t = 1; t <= fr->nthreads; t++)
3057     {
3058         ntarget = (ntot*t)/fr->nthreads;
3059         while (i < top->excls.nr && n < ntarget)
3060         {
3061             for (j = ind[i]; j < ind[i+1]; j++)
3062             {
3063                 if (a[j] > i)
3064                 {
3065                     n++;
3066                 }
3067             }
3068             i++;
3069         }
3070         fr->excl_load[t] = i;
3071     }
3072 }