src/gromacs/mdlib/forcerec.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include <string.h>
  42 #include <assert.h>
  43 #include "sysstuff.h"
  44 #include "typedefs.h"
  45 #include "vec.h"
  46 #include "maths.h"
  47 #include "macros.h"
  48 #include "smalloc.h"
  49 #include "macros.h"
  50 #include "gmx_fatal.h"
  51 #include "gmx_fatal_collective.h"
  52 #include "physics.h"
  53 #include "force.h"
  54 #include "tables.h"
  55 #include "nonbonded.h"
  56 #include "invblock.h"
  57 #include "names.h"
  58 #include "network.h"
  59 #include "pbc.h"
  60 #include "ns.h"
  61 #include "mshift.h"
  62 #include "txtdump.h"
  63 #include "coulomb.h"
  64 #include "md_support.h"
  65 #include "md_logging.h"
  66 #include "domdec.h"
  67 #include "partdec.h"
  68 #include "qmmm.h"
  69 #include "copyrite.h"
  70 #include "mtop_util.h"
  71 #include "nbnxn_search.h"
  72 #include "nbnxn_atomdata.h"
  73 #include "nbnxn_consts.h"
  74 #include "statutil.h"
  75 #include "gmx_omp_nthreads.h"
  76 #include "gmx_detect_hardware.h"
  77
  78 #ifdef _MSC_VER
  79 /* MSVC definition for __cpuid() */
  80 #include <intrin.h>
  81 #endif
  82
  83 #include "types/nbnxn_cuda_types_ext.h"
  84 #include "gpu_utils.h"
  85 #include "nbnxn_cuda_data_mgmt.h"
  86 #include "pmalloc_cuda.h"
  87
  88 t_forcerec *mk_forcerec(void)
  89 {
  90     t_forcerec *fr;
  91
  92     snew(fr, 1);
  93
  94     return fr;
  95 }
  96
  97 #ifdef DEBUG
  98 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
  99 {
 100     int i, j;
 101
 102     for (i = 0; (i < atnr); i++)
 103     {
 104         for (j = 0; (j < atnr); j++)
 105         {
 106             fprintf(fp, "%2d - %2d", i, j);
 107             if (bBHAM)
 108             {
 109                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 110                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 111             }
 112             else
 113             {
 114                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 115                         C12(nbfp, atnr, i, j)/12.0);
 116             }
 117         }
 118     }
 119 }
 120 #endif
 121
 122 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 123 {
 124     real *nbfp;
 125     int   i, j, k, atnr;
 126
 127     atnr = idef->atnr;
 128     if (bBHAM)
 129     {
 130         snew(nbfp, 3*atnr*atnr);
 131         for (i = k = 0; (i < atnr); i++)
 132         {
 133             for (j = 0; (j < atnr); j++, k++)
 134             {
 135                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 136                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 137                 /* nbfp now includes the 6.0 derivative prefactor */
 138                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 139             }
 140         }
 141     }
 142     else
 143     {
 144         snew(nbfp, 2*atnr*atnr);
 145         for (i = k = 0; (i < atnr); i++)
 146         {
 147             for (j = 0; (j < atnr); j++, k++)
 148             {
 149                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 150                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 151                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 152             }
 153         }
 154     }
 155
 156     return nbfp;
 157 }
 158
 159 /* This routine sets fr->solvent_opt to the most common solvent in the
 160  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 161  * the fr->solvent_type array with the correct type (or esolNO).
 162  *
 163  * Charge groups that fulfill the conditions but are not identical to the
 164  * most common one will be marked as esolNO in the solvent_type array.
 165  *
 166  * TIP3p is identical to SPC for these purposes, so we call it
 167  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 168  *
 169  * NOTE: QM particle should not
 170  * become an optimized solvent. Not even if there is only one charge
 171  * group in the Qm
 172  */
 173
 174 typedef struct
 175 {
 176     int    model;
 177     int    count;
 178     int    vdwtype[4];
 179     real   charge[4];
 180 } solvent_parameters_t;
 181
 182 static void
 183 check_solvent_cg(const gmx_moltype_t    *molt,
 184                  int                     cg0,
 185                  int                     nmol,
 186                  const unsigned char    *qm_grpnr,
 187                  const t_grps           *qm_grps,
 188                  t_forcerec   *          fr,
 189                  int                    *n_solvent_parameters,
 190                  solvent_parameters_t  **solvent_parameters_p,
 191                  int                     cginfo,
 192                  int                    *cg_sp)
 193 {
 194     const t_blocka     *  excl;
 195     t_atom               *atom;
 196     int                   j, k;
 197     int                   j0, j1, nj;
 198     gmx_bool              perturbed;
 199     gmx_bool              has_vdw[4];
 200     gmx_bool              match;
 201     real                  tmp_charge[4];
 202     int                   tmp_vdwtype[4];
 203     int                   tjA;
 204     gmx_bool              qm;
 205     solvent_parameters_t *solvent_parameters;
 206
 207     /* We use a list with parameters for each solvent type.
 208      * Every time we discover a new molecule that fulfills the basic
 209      * conditions for a solvent we compare with the previous entries
 210      * in these lists. If the parameters are the same we just increment
 211      * the counter for that type, and otherwise we create a new type
 212      * based on the current molecule.
 213      *
 214      * Once we've finished going through all molecules we check which
 215      * solvent is most common, and mark all those molecules while we
 216      * clear the flag on all others.
 217      */
 218
 219     solvent_parameters = *solvent_parameters_p;
 220
 221     /* Mark the cg first as non optimized */
 222     *cg_sp = -1;
 223
 224     /* Check if this cg has no exclusions with atoms in other charge groups
 225      * and all atoms inside the charge group excluded.
 226      * We only have 3 or 4 atom solvent loops.
 227      */
 228     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 229         !GET_CGINFO_EXCL_INTRA(cginfo))
 230     {
 231         return;
 232     }
 233
 234     /* Get the indices of the first atom in this charge group */
 235     j0     = molt->cgs.index[cg0];
 236     j1     = molt->cgs.index[cg0+1];
 237
 238     /* Number of atoms in our molecule */
 239     nj     = j1 - j0;
 240
 241     if (debug)
 242     {
 243         fprintf(debug,
 244                 "Moltype '%s': there are %d atoms in this charge group\n",
 245                 *molt->name, nj);
 246     }
 247
 248     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 249      * otherwise skip it.
 250      */
 251     if (nj < 3 || nj > 4)
 252     {
 253         return;
 254     }
 255
 256     /* Check if we are doing QM on this group */
 257     qm = FALSE;
 258     if (qm_grpnr != NULL)
 259     {
 260         for (j = j0; j < j1 && !qm; j++)
 261         {
 262             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 263         }
 264     }
 265     /* Cannot use solvent optimization with QM */
 266     if (qm)
 267     {
 268         return;
 269     }
 270
 271     atom = molt->atoms.atom;
 272
 273     /* Still looks like a solvent, time to check parameters */
 274
 275     /* If it is perturbed (free energy) we can't use the solvent loops,
 276      * so then we just skip to the next molecule.
 277      */
 278     perturbed = FALSE;
 279
 280     for (j = j0; j < j1 && !perturbed; j++)
 281     {
 282         perturbed = PERTURBED(atom[j]);
 283     }
 284
 285     if (perturbed)
 286     {
 287         return;
 288     }
 289
 290     /* Now it's only a question if the VdW and charge parameters
 291      * are OK. Before doing the check we compare and see if they are
 292      * identical to a possible previous solvent type.
 293      * First we assign the current types and charges.
 294      */
 295     for (j = 0; j < nj; j++)
 296     {
 297         tmp_vdwtype[j] = atom[j0+j].type;
 298         tmp_charge[j]  = atom[j0+j].q;
 299     }
 300
 301     /* Does it match any previous solvent type? */
 302     for (k = 0; k < *n_solvent_parameters; k++)
 303     {
 304         match = TRUE;
 305
 306
 307         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 308         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 309              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 310         {
 311             match = FALSE;
 312         }
 313
 314         /* Check that types & charges match for all atoms in molecule */
 315         for (j = 0; j < nj && match == TRUE; j++)
 316         {
 317             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 318             {
 319                 match = FALSE;
 320             }
 321             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 322             {
 323                 match = FALSE;
 324             }
 325         }
 326         if (match == TRUE)
 327         {
 328             /* Congratulations! We have a matched solvent.
 329              * Flag it with this type for later processing.
 330              */
 331             *cg_sp = k;
 332             solvent_parameters[k].count += nmol;
 333
 334             /* We are done with this charge group */
 335             return;
 336         }
 337     }
 338
 339     /* If we get here, we have a tentative new solvent type.
 340      * Before we add it we must check that it fulfills the requirements
 341      * of the solvent optimized loops. First determine which atoms have
 342      * VdW interactions.
 343      */
 344     for (j = 0; j < nj; j++)
 345     {
 346         has_vdw[j] = FALSE;
 347         tjA        = tmp_vdwtype[j];
 348
 349         /* Go through all other tpes and see if any have non-zero
 350          * VdW parameters when combined with this one.
 351          */
 352         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 353         {
 354             /* We already checked that the atoms weren't perturbed,
 355              * so we only need to check state A now.
 356              */
 357             if (fr->bBHAM)
 358             {
 359                 has_vdw[j] = (has_vdw[j] ||
 360                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 361                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 362                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 363             }
 364             else
 365             {
 366                 /* Standard LJ */
 367                 has_vdw[j] = (has_vdw[j] ||
 368                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 369                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 370             }
 371         }
 372     }
 373
 374     /* Now we know all we need to make the final check and assignment. */
 375     if (nj == 3)
 376     {
 377         /* So, is it an SPC?
 378          * For this we require thatn all atoms have charge,
 379          * the charges on atom 2 & 3 should be the same, and only
 380          * atom 1 might have VdW.
 381          */
 382         if (has_vdw[1] == FALSE &&
 383             has_vdw[2] == FALSE &&
 384             tmp_charge[0]  != 0 &&
 385             tmp_charge[1]  != 0 &&
 386             tmp_charge[2]  == tmp_charge[1])
 387         {
 388             srenew(solvent_parameters, *n_solvent_parameters+1);
 389             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 390             solvent_parameters[*n_solvent_parameters].count = nmol;
 391             for (k = 0; k < 3; k++)
 392             {
 393                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 394                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 395             }
 396
 397             *cg_sp = *n_solvent_parameters;
 398             (*n_solvent_parameters)++;
 399         }
 400     }
 401     else if (nj == 4)
 402     {
 403         /* Or could it be a TIP4P?
 404          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 405          * Only atom 1 mght have VdW.
 406          */
 407         if (has_vdw[1] == FALSE &&
 408             has_vdw[2] == FALSE &&
 409             has_vdw[3] == FALSE &&
 410             tmp_charge[0]  == 0 &&
 411             tmp_charge[1]  != 0 &&
 412             tmp_charge[2]  == tmp_charge[1] &&
 413             tmp_charge[3]  != 0)
 414         {
 415             srenew(solvent_parameters, *n_solvent_parameters+1);
 416             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 417             solvent_parameters[*n_solvent_parameters].count = nmol;
 418             for (k = 0; k < 4; k++)
 419             {
 420                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 421                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 422             }
 423
 424             *cg_sp = *n_solvent_parameters;
 425             (*n_solvent_parameters)++;
 426         }
 427     }
 428
 429     *solvent_parameters_p = solvent_parameters;
 430 }
 431
 432 static void
 433 check_solvent(FILE  *                fp,
 434               const gmx_mtop_t  *    mtop,
 435               t_forcerec  *          fr,
 436               cginfo_mb_t           *cginfo_mb)
 437 {
 438     const t_block     *   cgs;
 439     const t_block     *   mols;
 440     const gmx_moltype_t  *molt;
 441     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 442     int                   n_solvent_parameters;
 443     solvent_parameters_t *solvent_parameters;
 444     int                 **cg_sp;
 445     int                   bestsp, bestsol;
 446
 447     if (debug)
 448     {
 449         fprintf(debug, "Going to determine what solvent types we have.\n");
 450     }
 451
 452     mols = &mtop->mols;
 453
 454     n_solvent_parameters = 0;
 455     solvent_parameters   = NULL;
 456     /* Allocate temporary array for solvent type */
 457     snew(cg_sp, mtop->nmolblock);
 458
 459     cg_offset = 0;
 460     at_offset = 0;
 461     for (mb = 0; mb < mtop->nmolblock; mb++)
 462     {
 463         molt = &mtop->moltype[mtop->molblock[mb].type];
 464         cgs  = &molt->cgs;
 465         /* Here we have to loop over all individual molecules
 466          * because we need to check for QMMM particles.
 467          */
 468         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 469         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 470         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 471         for (mol = 0; mol < nmol_ch; mol++)
 472         {
 473             cgm = mol*cgs->nr;
 474             am  = mol*cgs->index[cgs->nr];
 475             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 476             {
 477                 check_solvent_cg(molt, cg_mol, nmol,
 478                                  mtop->groups.grpnr[egcQMMM] ?
 479                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 480                                  &mtop->groups.grps[egcQMMM],
 481                                  fr,
 482                                  &n_solvent_parameters, &solvent_parameters,
 483                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 484                                  &cg_sp[mb][cgm+cg_mol]);
 485             }
 486         }
 487         cg_offset += cgs->nr;
 488         at_offset += cgs->index[cgs->nr];
 489     }
 490
 491     /* Puh! We finished going through all charge groups.
 492      * Now find the most common solvent model.
 493      */
 494
 495     /* Most common solvent this far */
 496     bestsp = -2;
 497     for (i = 0; i < n_solvent_parameters; i++)
 498     {
 499         if (bestsp == -2 ||
 500             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 501         {
 502             bestsp = i;
 503         }
 504     }
 505
 506     if (bestsp >= 0)
 507     {
 508         bestsol = solvent_parameters[bestsp].model;
 509     }
 510     else
 511     {
 512         bestsol = esolNO;
 513     }
 514
 515 #ifdef DISABLE_WATER_NLIST
 516     bestsol = esolNO;
 517 #endif
 518
 519     fr->nWatMol = 0;
 520     for (mb = 0; mb < mtop->nmolblock; mb++)
 521     {
 522         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 523         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 524         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 525         {
 526             if (cg_sp[mb][i] == bestsp)
 527             {
 528                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 529                 fr->nWatMol += nmol;
 530             }
 531             else
 532             {
 533                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 534             }
 535         }
 536         sfree(cg_sp[mb]);
 537     }
 538     sfree(cg_sp);
 539
 540     if (bestsol != esolNO && fp != NULL)
 541     {
 542         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 543                 esol_names[bestsol],
 544                 solvent_parameters[bestsp].count);
 545     }
 546
 547     sfree(solvent_parameters);
 548     fr->solvent_opt = bestsol;
 549 }
 550
 551 enum {
 552     acNONE = 0, acCONSTRAINT, acSETTLE
 553 };
 554
 555 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 556                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 557                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 558 {
 559     const t_block        *cgs;
 560     const t_blocka       *excl;
 561     const gmx_moltype_t  *molt;
 562     const gmx_molblock_t *molb;
 563     cginfo_mb_t          *cginfo_mb;
 564     gmx_bool             *type_VDW;
 565     int                  *cginfo;
 566     int                   cg_offset, a_offset, cgm, am;
 567     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 568     int                  *a_con;
 569     int                   ftype;
 570     int                   ia;
 571     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
 572
 573     ncg_tot = ncg_mtop(mtop);
 574     snew(cginfo_mb, mtop->nmolblock);
 575
 576     snew(type_VDW, fr->ntype);
 577     for (ai = 0; ai < fr->ntype; ai++)
 578     {
 579         type_VDW[ai] = FALSE;
 580         for (j = 0; j < fr->ntype; j++)
 581         {
 582             type_VDW[ai] = type_VDW[ai] ||
 583                 fr->bBHAM ||
 584                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 585                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 586         }
 587     }
 588
 589     *bExcl_IntraCGAll_InterCGNone = TRUE;
 590
 591     excl_nalloc = 10;
 592     snew(bExcl, excl_nalloc);
 593     cg_offset = 0;
 594     a_offset  = 0;
 595     for (mb = 0; mb < mtop->nmolblock; mb++)
 596     {
 597         molb = &mtop->molblock[mb];
 598         molt = &mtop->moltype[molb->type];
 599         cgs  = &molt->cgs;
 600         excl = &molt->excls;
 601
 602         /* Check if the cginfo is identical for all molecules in this block.
 603          * If so, we only need an array of the size of one molecule.
 604          * Otherwise we make an array of #mol times #cgs per molecule.
 605          */
 606         bId = TRUE;
 607         am  = 0;
 608         for (m = 0; m < molb->nmol; m++)
 609         {
 610             am = m*cgs->index[cgs->nr];
 611             for (cg = 0; cg < cgs->nr; cg++)
 612             {
 613                 a0 = cgs->index[cg];
 614                 a1 = cgs->index[cg+1];
 615                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 616                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 617                 {
 618                     bId = FALSE;
 619                 }
 620                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 621                 {
 622                     for (ai = a0; ai < a1; ai++)
 623                     {
 624                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 625                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 626                         {
 627                             bId = FALSE;
 628                         }
 629                     }
 630                 }
 631             }
 632         }
 633
 634         cginfo_mb[mb].cg_start = cg_offset;
 635         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 636         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 637         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 638         cginfo = cginfo_mb[mb].cginfo;
 639
 640         /* Set constraints flags for constrained atoms */
 641         snew(a_con, molt->atoms.nr);
 642         for (ftype = 0; ftype < F_NRE; ftype++)
 643         {
 644             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 645             {
 646                 int nral;
 647
 648                 nral = NRAL(ftype);
 649                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 650                 {
 651                     int a;
 652
 653                     for (a = 0; a < nral; a++)
 654                     {
 655                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 656                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 657                     }
 658                 }
 659             }
 660         }
 661
 662         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 663         {
 664             cgm = m*cgs->nr;
 665             am  = m*cgs->index[cgs->nr];
 666             for (cg = 0; cg < cgs->nr; cg++)
 667             {
 668                 a0 = cgs->index[cg];
 669                 a1 = cgs->index[cg+1];
 670
 671                 /* Store the energy group in cginfo */
 672                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 673                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 674
 675                 /* Check the intra/inter charge group exclusions */
 676                 if (a1-a0 > excl_nalloc)
 677                 {
 678                     excl_nalloc = a1 - a0;
 679                     srenew(bExcl, excl_nalloc);
 680                 }
 681                 /* bExclIntraAll: all intra cg interactions excluded
 682                  * bExclInter:    any inter cg interactions excluded
 683                  */
 684                 bExclIntraAll = TRUE;
 685                 bExclInter    = FALSE;
 686                 bHaveVDW      = FALSE;
 687                 bHaveQ        = FALSE;
 688                 for (ai = a0; ai < a1; ai++)
 689                 {
 690                     /* Check VDW and electrostatic interactions */
 691                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 692                                             type_VDW[molt->atoms.atom[ai].typeB]);
 693                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 694                                             molt->atoms.atom[ai].qB != 0);
 695
 696                     /* Clear the exclusion list for atom ai */
 697                     for (aj = a0; aj < a1; aj++)
 698                     {
 699                         bExcl[aj-a0] = FALSE;
 700                     }
 701                     /* Loop over all the exclusions of atom ai */
 702                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 703                     {
 704                         aj = excl->a[j];
 705                         if (aj < a0 || aj >= a1)
 706                         {
 707                             bExclInter = TRUE;
 708                         }
 709                         else
 710                         {
 711                             bExcl[aj-a0] = TRUE;
 712                         }
 713                     }
 714                     /* Check if ai excludes a0 to a1 */
 715                     for (aj = a0; aj < a1; aj++)
 716                     {
 717                         if (!bExcl[aj-a0])
 718                         {
 719                             bExclIntraAll = FALSE;
 720                         }
 721                     }
 722
 723                     switch (a_con[ai])
 724                     {
 725                         case acCONSTRAINT:
 726                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 727                             break;
 728                         case acSETTLE:
 729                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 730                             break;
 731                         default:
 732                             break;
 733                     }
 734                 }
 735                 if (bExclIntraAll)
 736                 {
 737                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 738                 }
 739                 if (bExclInter)
 740                 {
 741                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 742                 }
 743                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 744                 {
 745                     /* The size in cginfo is currently only read with DD */
 746                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 747                 }
 748                 if (bHaveVDW)
 749                 {
 750                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 751                 }
 752                 if (bHaveQ)
 753                 {
 754                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 755                 }
 756                 /* Store the charge group size */
 757                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 758
 759                 if (!bExclIntraAll || bExclInter)
 760                 {
 761                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 762                 }
 763             }
 764         }
 765
 766         sfree(a_con);
 767
 768         cg_offset += molb->nmol*cgs->nr;
 769         a_offset  += molb->nmol*cgs->index[cgs->nr];
 770     }
 771     sfree(bExcl);
 772
 773     /* the solvent optimizer is called after the QM is initialized,
 774      * because we don't want to have the QM subsystemto become an
 775      * optimized solvent
 776      */
 777
 778     check_solvent(fplog, mtop, fr, cginfo_mb);
 779
 780     if (getenv("GMX_NO_SOLV_OPT"))
 781     {
 782         if (fplog)
 783         {
 784             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 785                     "Disabling all solvent optimization\n");
 786         }
 787         fr->solvent_opt = esolNO;
 788     }
 789     if (bNoSolvOpt)
 790     {
 791         fr->solvent_opt = esolNO;
 792     }
 793     if (!fr->solvent_opt)
 794     {
 795         for (mb = 0; mb < mtop->nmolblock; mb++)
 796         {
 797             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 798             {
 799                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 800             }
 801         }
 802     }
 803
 804     return cginfo_mb;
 805 }
 806
 807 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 808 {
 809     int  ncg, mb, cg;
 810     int *cginfo;
 811
 812     ncg = cgi_mb[nmb-1].cg_end;
 813     snew(cginfo, ncg);
 814     mb = 0;
 815     for (cg = 0; cg < ncg; cg++)
 816     {
 817         while (cg >= cgi_mb[mb].cg_end)
 818         {
 819             mb++;
 820         }
 821         cginfo[cg] =
 822             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 823     }
 824
 825     return cginfo;
 826 }
 827
 828 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 829 {
 830     double         qsum, q2sum, q;
 831     int            mb, nmol, i;
 832     const t_atoms *atoms;
 833
 834     qsum  = 0;
 835     q2sum = 0;
 836     for (mb = 0; mb < mtop->nmolblock; mb++)
 837     {
 838         nmol  = mtop->molblock[mb].nmol;
 839         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 840         for (i = 0; i < atoms->nr; i++)
 841         {
 842             q      = atoms->atom[i].q;
 843             qsum  += nmol*q;
 844             q2sum += nmol*q*q;
 845         }
 846     }
 847     fr->qsum[0]  = qsum;
 848     fr->q2sum[0] = q2sum;
 849     if (fr->efep != efepNO)
 850     {
 851         qsum  = 0;
 852         q2sum = 0;
 853         for (mb = 0; mb < mtop->nmolblock; mb++)
 854         {
 855             nmol  = mtop->molblock[mb].nmol;
 856             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 857             for (i = 0; i < atoms->nr; i++)
 858             {
 859                 q      = atoms->atom[i].qB;
 860                 qsum  += nmol*q;
 861                 q2sum += nmol*q*q;
 862             }
 863             fr->qsum[1]  = qsum;
 864             fr->q2sum[1] = q2sum;
 865         }
 866     }
 867     else
 868     {
 869         fr->qsum[1]  = fr->qsum[0];
 870         fr->q2sum[1] = fr->q2sum[0];
 871     }
 872     if (log)
 873     {
 874         if (fr->efep == efepNO)
 875         {
 876             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 877         }
 878         else
 879         {
 880             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 881                     fr->qsum[0], fr->qsum[1]);
 882         }
 883     }
 884 }
 885
 886 void update_forcerec(FILE *log, t_forcerec *fr, matrix box)
 887 {
 888     if (fr->eeltype == eelGRF)
 889     {
 890         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 891                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 892                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 893     }
 894 }
 895
 896 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 897 {
 898     const t_atoms  *atoms, *atoms_tpi;
 899     const t_blocka *excl;
 900     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 901 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 902     long long int   npair, npair_ij, tmpi, tmpj;
 903 #else
 904     double          npair, npair_ij, tmpi, tmpj;
 905 #endif
 906     double          csix, ctwelve;
 907     int             ntp, *typecount;
 908     gmx_bool        bBHAM;
 909     real           *nbfp;
 910
 911     ntp   = fr->ntype;
 912     bBHAM = fr->bBHAM;
 913     nbfp  = fr->nbfp;
 914
 915     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 916     {
 917         csix    = 0;
 918         ctwelve = 0;
 919         npair   = 0;
 920         nexcl   = 0;
 921         if (!fr->n_tpi)
 922         {
 923             /* Count the types so we avoid natoms^2 operations */
 924             snew(typecount, ntp);
 925             for (mb = 0; mb < mtop->nmolblock; mb++)
 926             {
 927                 nmol  = mtop->molblock[mb].nmol;
 928                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 929                 for (i = 0; i < atoms->nr; i++)
 930                 {
 931                     if (q == 0)
 932                     {
 933                         tpi = atoms->atom[i].type;
 934                     }
 935                     else
 936                     {
 937                         tpi = atoms->atom[i].typeB;
 938                     }
 939                     typecount[tpi] += nmol;
 940                 }
 941             }
 942             for (tpi = 0; tpi < ntp; tpi++)
 943             {
 944                 for (tpj = tpi; tpj < ntp; tpj++)
 945                 {
 946                     tmpi = typecount[tpi];
 947                     tmpj = typecount[tpj];
 948                     if (tpi != tpj)
 949                     {
 950                         npair_ij = tmpi*tmpj;
 951                     }
 952                     else
 953                     {
 954                         npair_ij = tmpi*(tmpi - 1)/2;
 955                     }
 956                     if (bBHAM)
 957                     {
 958                         /* nbfp now includes the 6.0 derivative prefactor */
 959                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 960                     }
 961                     else
 962                     {
 963                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
 964                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
 965                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
 966                     }
 967                     npair += npair_ij;
 968                 }
 969             }
 970             sfree(typecount);
 971             /* Subtract the excluded pairs.
 972              * The main reason for substracting exclusions is that in some cases
 973              * some combinations might never occur and the parameters could have
 974              * any value. These unused values should not influence the dispersion
 975              * correction.
 976              */
 977             for (mb = 0; mb < mtop->nmolblock; mb++)
 978             {
 979                 nmol  = mtop->molblock[mb].nmol;
 980                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 981                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 982                 for (i = 0; (i < atoms->nr); i++)
 983                 {
 984                     if (q == 0)
 985                     {
 986                         tpi = atoms->atom[i].type;
 987                     }
 988                     else
 989                     {
 990                         tpi = atoms->atom[i].typeB;
 991                     }
 992                     j1  = excl->index[i];
 993                     j2  = excl->index[i+1];
 994                     for (j = j1; j < j2; j++)
 995                     {
 996                         k = excl->a[j];
 997                         if (k > i)
 998                         {
 999                             if (q == 0)
1000                             {
1001                                 tpj = atoms->atom[k].type;
1002                             }
1003                             else
1004                             {
1005                                 tpj = atoms->atom[k].typeB;
1006                             }
1007                             if (bBHAM)
1008                             {
1009                                 /* nbfp now includes the 6.0 derivative prefactor */
1010                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1011                             }
1012                             else
1013                             {
1014                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1015                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1016                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1017                             }
1018                             nexcl += nmol;
1019                         }
1020                     }
1021                 }
1022             }
1023         }
1024         else
1025         {
1026             /* Only correct for the interaction of the test particle
1027              * with the rest of the system.
1028              */
1029             atoms_tpi =
1030                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1031
1032             npair = 0;
1033             for (mb = 0; mb < mtop->nmolblock; mb++)
1034             {
1035                 nmol  = mtop->molblock[mb].nmol;
1036                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1037                 for (j = 0; j < atoms->nr; j++)
1038                 {
1039                     nmolc = nmol;
1040                     /* Remove the interaction of the test charge group
1041                      * with itself.
1042                      */
1043                     if (mb == mtop->nmolblock-1)
1044                     {
1045                         nmolc--;
1046
1047                         if (mb == 0 && nmol == 1)
1048                         {
1049                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1050                         }
1051                     }
1052                     if (q == 0)
1053                     {
1054                         tpj = atoms->atom[j].type;
1055                     }
1056                     else
1057                     {
1058                         tpj = atoms->atom[j].typeB;
1059                     }
1060                     for (i = 0; i < fr->n_tpi; i++)
1061                     {
1062                         if (q == 0)
1063                         {
1064                             tpi = atoms_tpi->atom[i].type;
1065                         }
1066                         else
1067                         {
1068                             tpi = atoms_tpi->atom[i].typeB;
1069                         }
1070                         if (bBHAM)
1071                         {
1072                             /* nbfp now includes the 6.0 derivative prefactor */
1073                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1074                         }
1075                         else
1076                         {
1077                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1078                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1079                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1080                         }
1081                         npair += nmolc;
1082                     }
1083                 }
1084             }
1085         }
1086         if (npair - nexcl <= 0 && fplog)
1087         {
1088             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1089             csix     = 0;
1090             ctwelve  = 0;
1091         }
1092         else
1093         {
1094             csix    /= npair - nexcl;
1095             ctwelve /= npair - nexcl;
1096         }
1097         if (debug)
1098         {
1099             fprintf(debug, "Counted %d exclusions\n", nexcl);
1100             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1101             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1102         }
1103         fr->avcsix[q]    = csix;
1104         fr->avctwelve[q] = ctwelve;
1105     }
1106     if (fplog != NULL)
1107     {
1108         if (fr->eDispCorr == edispcAllEner ||
1109             fr->eDispCorr == edispcAllEnerPres)
1110         {
1111             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1112                     fr->avcsix[0], fr->avctwelve[0]);
1113         }
1114         else
1115         {
1116             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1117         }
1118     }
1119 }
1120
1121
1122 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1123                            const gmx_mtop_t *mtop)
1124 {
1125     const t_atoms *at1, *at2;
1126     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1127     real           b, bmin;
1128     real          *nbfp;
1129
1130     if (fplog)
1131     {
1132         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1133     }
1134     nbfp   = fr->nbfp;
1135     ntypes = fr->ntype;
1136
1137     bmin           = -1;
1138     fr->bham_b_max = 0;
1139     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1140     {
1141         at1 = &mtop->moltype[mt1].atoms;
1142         for (i = 0; (i < at1->nr); i++)
1143         {
1144             tpi = at1->atom[i].type;
1145             if (tpi >= ntypes)
1146             {
1147                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1148             }
1149
1150             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1151             {
1152                 at2 = &mtop->moltype[mt2].atoms;
1153                 for (j = 0; (j < at2->nr); j++)
1154                 {
1155                     tpj = at2->atom[j].type;
1156                     if (tpj >= ntypes)
1157                     {
1158                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1159                     }
1160                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1161                     if (b > fr->bham_b_max)
1162                     {
1163                         fr->bham_b_max = b;
1164                     }
1165                     if ((b < bmin) || (bmin == -1))
1166                     {
1167                         bmin = b;
1168                     }
1169                 }
1170             }
1171         }
1172     }
1173     if (fplog)
1174     {
1175         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1176                 bmin, fr->bham_b_max);
1177     }
1178 }
1179
1180 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1181                             t_forcerec *fr, real rtab,
1182                             const t_commrec *cr,
1183                             const char *tabfn, char *eg1, char *eg2,
1184                             t_nblists *nbl)
1185 {
1186     char buf[STRLEN];
1187     int  i, j;
1188
1189     if (tabfn == NULL)
1190     {
1191         if (debug)
1192         {
1193             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1194         }
1195         return;
1196     }
1197
1198     sprintf(buf, "%s", tabfn);
1199     if (eg1 && eg2)
1200     {
1201         /* Append the two energy group names */
1202         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1203                 eg1, eg2, ftp2ext(efXVG));
1204     }
1205     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1206     /* Copy the contents of the table to separate coulomb and LJ tables too,
1207      * to improve cache performance.
1208      */
1209     /* For performance reasons we want
1210      * the table data to be aligned to 16-byte. The pointers could be freed
1211      * but currently aren't.
1212      */
1213     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1214     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1215     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1216     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1217     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1218     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1219     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1220     nbl->table_elec.ninteractions = 1;
1221     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1222     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1223
1224     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1225     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1226     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1227     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1228     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1229     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1230     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1231     nbl->table_vdw.ninteractions = 2;
1232     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1233     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1234
1235     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1236     {
1237         for (j = 0; j < 4; j++)
1238         {
1239             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1240         }
1241         for (j = 0; j < 8; j++)
1242         {
1243             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1244         }
1245     }
1246 }
1247
1248 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1249                          int *ncount, int **count)
1250 {
1251     const gmx_moltype_t *molt;
1252     const t_ilist       *il;
1253     int                  mt, ftype, stride, i, j, tabnr;
1254
1255     for (mt = 0; mt < mtop->nmoltype; mt++)
1256     {
1257         molt = &mtop->moltype[mt];
1258         for (ftype = 0; ftype < F_NRE; ftype++)
1259         {
1260             if (ftype == ftype1 || ftype == ftype2)
1261             {
1262                 il     = &molt->ilist[ftype];
1263                 stride = 1 + NRAL(ftype);
1264                 for (i = 0; i < il->nr; i += stride)
1265                 {
1266                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1267                     if (tabnr < 0)
1268                     {
1269                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1270                     }
1271                     if (tabnr >= *ncount)
1272                     {
1273                         srenew(*count, tabnr+1);
1274                         for (j = *ncount; j < tabnr+1; j++)
1275                         {
1276                             (*count)[j] = 0;
1277                         }
1278                         *ncount = tabnr+1;
1279                     }
1280                     (*count)[tabnr]++;
1281                 }
1282             }
1283         }
1284     }
1285 }
1286
1287 static bondedtable_t *make_bonded_tables(FILE *fplog,
1288                                          int ftype1, int ftype2,
1289                                          const gmx_mtop_t *mtop,
1290                                          const char *basefn, const char *tabext)
1291 {
1292     int            i, ncount, *count;
1293     char           tabfn[STRLEN];
1294     bondedtable_t *tab;
1295
1296     tab = NULL;
1297
1298     ncount = 0;
1299     count  = NULL;
1300     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1301
1302     if (ncount > 0)
1303     {
1304         snew(tab, ncount);
1305         for (i = 0; i < ncount; i++)
1306         {
1307             if (count[i] > 0)
1308             {
1309                 sprintf(tabfn, "%s", basefn);
1310                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1311                         tabext, i, ftp2ext(efXVG));
1312                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1313             }
1314         }
1315         sfree(count);
1316     }
1317
1318     return tab;
1319 }
1320
1321 void forcerec_set_ranges(t_forcerec *fr,
1322                          int ncg_home, int ncg_force,
1323                          int natoms_force,
1324                          int natoms_force_constr, int natoms_f_novirsum)
1325 {
1326     fr->cg0 = 0;
1327     fr->hcg = ncg_home;
1328
1329     /* fr->ncg_force is unused in the standard code,
1330      * but it can be useful for modified code dealing with charge groups.
1331      */
1332     fr->ncg_force           = ncg_force;
1333     fr->natoms_force        = natoms_force;
1334     fr->natoms_force_constr = natoms_force_constr;
1335
1336     if (fr->natoms_force_constr > fr->nalloc_force)
1337     {
1338         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1339
1340         if (fr->bTwinRange)
1341         {
1342             srenew(fr->f_twin, fr->nalloc_force);
1343         }
1344     }
1345
1346     if (fr->bF_NoVirSum)
1347     {
1348         fr->f_novirsum_n = natoms_f_novirsum;
1349         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1350         {
1351             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1352             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1353         }
1354     }
1355     else
1356     {
1357         fr->f_novirsum_n = 0;
1358     }
1359 }
1360
1361 static real cutoff_inf(real cutoff)
1362 {
1363     if (cutoff == 0)
1364     {
1365         cutoff = GMX_CUTOFF_INF;
1366     }
1367
1368     return cutoff;
1369 }
1370
1371 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1372                                   t_forcerec *fr, const t_inputrec *ir,
1373                                   const char *tabfn, const gmx_mtop_t *mtop,
1374                                   matrix     box)
1375 {
1376     char buf[STRLEN];
1377     int  i, j;
1378
1379     if (tabfn == NULL)
1380     {
1381         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1382         return;
1383     }
1384
1385     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1386
1387     for (i = 0; i < ir->adress->n_tf_grps; i++)
1388     {
1389         j = ir->adress->tf_table_index[i]; /* get energy group index */
1390         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1391                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1392         printf("loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[j], buf);
1393         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1394     }
1395
1396 }
1397
1398 gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
1399                           gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1400 {
1401     gmx_bool bAllvsAll;
1402
1403     bAllvsAll =
1404         (
1405             ir->rlist == 0            &&
1406             ir->rcoulomb == 0         &&
1407             ir->rvdw == 0             &&
1408             ir->ePBC == epbcNONE      &&
1409             ir->vdwtype == evdwCUT    &&
1410             ir->coulombtype == eelCUT &&
1411             ir->efep == efepNO        &&
1412             (ir->implicit_solvent == eisNO ||
1413              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1414                                                   ir->gb_algorithm == egbHCT   ||
1415                                                   ir->gb_algorithm == egbOBC))) &&
1416             getenv("GMX_NO_ALLVSALL") == NULL
1417         );
1418
1419     if (bAllvsAll && ir->opts.ngener > 1)
1420     {
1421         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1422
1423         if (bPrintNote)
1424         {
1425             if (MASTER(cr))
1426             {
1427                 fprintf(stderr, "\n%s\n", note);
1428             }
1429             if (fp != NULL)
1430             {
1431                 fprintf(fp, "\n%s\n", note);
1432             }
1433         }
1434         bAllvsAll = FALSE;
1435     }
1436
1437     if (bAllvsAll && fp && MASTER(cr))
1438     {
1439         fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1440     }
1441
1442     return bAllvsAll;
1443 }
1444
1445
1446 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1447 {
1448     int t, i;
1449
1450     /* These thread local data structures are used for bondeds only */
1451     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1452
1453     if (fr->nthreads > 1)
1454     {
1455         snew(fr->f_t, fr->nthreads);
1456         /* Thread 0 uses the global force and energy arrays */
1457         for (t = 1; t < fr->nthreads; t++)
1458         {
1459             fr->f_t[t].f        = NULL;
1460             fr->f_t[t].f_nalloc = 0;
1461             snew(fr->f_t[t].fshift, SHIFTS);
1462             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1463             for (i = 0; i < egNR; i++)
1464             {
1465                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1466             }
1467         }
1468     }
1469 }
1470
1471
1472 static void pick_nbnxn_kernel_cpu(FILE             *fp,
1473                                   const t_commrec  *cr,
1474                                   const gmx_cpuid_t cpuid_info,
1475                                   const t_inputrec *ir,
1476                                   int              *kernel_type,
1477                                   int              *ewald_excl)
1478 {
1479     *kernel_type = nbnxnk4x4_PlainC;
1480     *ewald_excl  = ewaldexclTable;
1481
1482 #ifdef GMX_NBNXN_SIMD
1483     {
1484 #ifdef GMX_NBNXN_SIMD_4XN
1485         *kernel_type = nbnxnk4xN_SIMD_4xN;
1486 #endif
1487 #ifdef GMX_NBNXN_SIMD_2XNN
1488         /* We expect the 2xNN kernels to be faster in most cases */
1489         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1490 #endif
1491
1492 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1493         if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1494         {
1495             /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1496              * 10% with HT, 50% without HT, but extra zeros interactions
1497              * can compensate. As we currently don't detect the actual use
1498              * of HT, switch to 4x8 to avoid a potential performance hit.
1499              */
1500             *kernel_type = nbnxnk4xN_SIMD_4xN;
1501         }
1502 #endif
1503         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1504         {
1505 #ifdef GMX_NBNXN_SIMD_4XN
1506             *kernel_type = nbnxnk4xN_SIMD_4xN;
1507 #else
1508             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1509 #endif
1510         }
1511         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1512         {
1513 #ifdef GMX_NBNXN_SIMD_2XNN
1514             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1515 #else
1516             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1517 #endif
1518         }
1519
1520         /* Analytical Ewald exclusion correction is only an option in the
1521          * x86 SIMD kernel. This is faster in single precision
1522          * on Bulldozer and slightly faster on Sandy Bridge.
1523          */
1524 #if (defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE
1525         *ewald_excl = ewaldexclAnalytical;
1526 #endif
1527         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1528         {
1529             *ewald_excl = ewaldexclTable;
1530         }
1531         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1532         {
1533             *ewald_excl = ewaldexclAnalytical;
1534         }
1535
1536     }
1537 #endif /* GMX_X86_SSE2 */
1538 }
1539
1540
1541 const char *lookup_nbnxn_kernel_name(int kernel_type)
1542 {
1543     const char *returnvalue = NULL;
1544     switch (kernel_type)
1545     {
1546         case nbnxnkNotSet: returnvalue     = "not set"; break;
1547         case nbnxnk4x4_PlainC: returnvalue = "plain C"; break;
1548 #ifndef GMX_NBNXN_SIMD
1549         case nbnxnk4xN_SIMD_4xN: returnvalue  = "not available"; break;
1550         case nbnxnk4xN_SIMD_2xNN: returnvalue = "not available"; break;
1551 #else
1552 #ifdef GMX_X86_SSE2
1553 #if GMX_NBNXN_SIMD_BITWIDTH == 128
1554             /* x86 SIMD intrinsics can be converted to either SSE or AVX depending
1555              * on compiler flags. As we use nearly identical intrinsics, using an AVX
1556              * compiler flag without an AVX macro effectively results in AVX kernels.
1557              * For gcc we check for __AVX__
1558              * At least a check for icc should be added (if there is a macro)
1559              */
1560 #if !(defined GMX_X86_AVX_128_FMA || defined __AVX__)
1561 #ifndef GMX_X86_SSE4_1
1562         case nbnxnk4xN_SIMD_4xN: returnvalue  = "SSE2"; break;
1563         case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE2"; break;
1564 #else
1565         case nbnxnk4xN_SIMD_4xN: returnvalue  = "SSE4.1"; break;
1566         case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE4.1"; break;
1567 #endif
1568 #else
1569         case nbnxnk4xN_SIMD_4xN: returnvalue  = "AVX-128"; break;
1570         case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-128"; break;
1571 #endif
1572 #endif
1573 #if GMX_NBNXN_SIMD_BITWIDTH == 256
1574         case nbnxnk4xN_SIMD_4xN: returnvalue  = "AVX-256"; break;
1575         case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-256"; break;
1576 #endif
1577 #else   /* not GMX_X86_SSE2 */
1578         case nbnxnk4xN_SIMD_4xN: returnvalue  = "SIMD"; break;
1579         case nbnxnk4xN_SIMD_2xNN: returnvalue = "SIMD"; break;
1580 #endif
1581 #endif
1582         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1583         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1584
1585         case nbnxnkNR:
1586         default:
1587             gmx_fatal(FARGS, "Illegal kernel type selected");
1588             returnvalue = NULL;
1589             break;
1590     }
1591     return returnvalue;
1592 };
1593
1594 static void pick_nbnxn_kernel(FILE                *fp,
1595                               const t_commrec     *cr,
1596                               const gmx_hw_info_t *hwinfo,
1597                               gmx_bool             use_cpu_acceleration,
1598                               gmx_bool             bUseGPU,
1599                               gmx_bool             bEmulateGPU,
1600                               const t_inputrec    *ir,
1601                               int                 *kernel_type,
1602                               int                 *ewald_excl,
1603                               gmx_bool             bDoNonbonded)
1604 {
1605     assert(kernel_type);
1606
1607     *kernel_type = nbnxnkNotSet;
1608     *ewald_excl  = ewaldexclTable;
1609
1610     if (bEmulateGPU)
1611     {
1612         *kernel_type = nbnxnk8x8x8_PlainC;
1613
1614         if (bDoNonbonded)
1615         {
1616             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1617         }
1618     }
1619     else if (bUseGPU)
1620     {
1621         *kernel_type = nbnxnk8x8x8_CUDA;
1622     }
1623
1624     if (*kernel_type == nbnxnkNotSet)
1625     {
1626         if (use_cpu_acceleration)
1627         {
1628             pick_nbnxn_kernel_cpu(fp, cr, hwinfo->cpuid_info, ir,
1629                                   kernel_type, ewald_excl);
1630         }
1631         else
1632         {
1633             *kernel_type = nbnxnk4x4_PlainC;
1634         }
1635     }
1636
1637     if (bDoNonbonded && fp != NULL)
1638     {
1639         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1640                 lookup_nbnxn_kernel_name(*kernel_type),
1641                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1642                 nbnxn_kernel_to_cj_size(*kernel_type));
1643     }
1644 }
1645
1646 static void pick_nbnxn_resources(FILE                *fp,
1647                                  const t_commrec     *cr,
1648                                  const gmx_hw_info_t *hwinfo,
1649                                  gmx_bool             bDoNonbonded,
1650                                  gmx_bool            *bUseGPU,
1651                                  gmx_bool            *bEmulateGPU)
1652 {
1653     gmx_bool bEmulateGPUEnvVarSet;
1654     char     gpu_err_str[STRLEN];
1655
1656     *bUseGPU = FALSE;
1657
1658     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1659
1660     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1661      * GPUs (currently) only handle non-bonded calculations, we will
1662      * automatically switch to emulation if non-bonded calculations are
1663      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1664      * way to turn off GPU initialization, data movement, and cleanup.
1665      *
1666      * GPU emulation can be useful to assess the performance one can expect by
1667      * adding GPU(s) to the machine. The conditional below allows this even
1668      * if mdrun is compiled without GPU acceleration support.
1669      * Note that you should freezing the system as otherwise it will explode.
1670      */
1671     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1672                     (!bDoNonbonded && hwinfo->bCanUseGPU));
1673
1674     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1675      */
1676     if (hwinfo->bCanUseGPU && !(*bEmulateGPU))
1677     {
1678         /* Each PP node will use the intra-node id-th device from the
1679          * list of detected/selected GPUs. */
1680         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, &hwinfo->gpu_info))
1681         {
1682             /* At this point the init should never fail as we made sure that
1683              * we have all the GPUs we need. If it still does, we'll bail. */
1684             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1685                       cr->nodeid,
1686                       get_gpu_device_id(&hwinfo->gpu_info, cr->rank_pp_intranode),
1687                       gpu_err_str);
1688         }
1689
1690         /* Here we actually turn on hardware GPU acceleration */
1691         *bUseGPU = TRUE;
1692     }
1693 }
1694
1695 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1696                             nonbonded_verlet_t *nbv,
1697                             int                 group)
1698 {
1699     gmx_bool bUsesSimpleTables = TRUE;
1700     int      grp_index;
1701
1702     switch (cutoff_scheme)
1703     {
1704         case ecutsGROUP:
1705             bUsesSimpleTables = TRUE;
1706             break;
1707         case ecutsVERLET:
1708             assert(NULL != nbv && NULL != nbv->grp);
1709             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1710             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1711             break;
1712         default:
1713             gmx_incons("unimplemented");
1714     }
1715     return bUsesSimpleTables;
1716 }
1717
1718 static void init_ewald_f_table(interaction_const_t *ic,
1719                                gmx_bool             bUsesSimpleTables,
1720                                real                 rtab)
1721 {
1722     real maxr;
1723
1724     if (bUsesSimpleTables)
1725     {
1726         /* With a spacing of 0.0005 we are at the force summation accuracy
1727          * for the SSE kernels for "normal" atomistic simulations.
1728          */
1729         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
1730                                                    ic->rcoulomb);
1731
1732         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1733         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1734     }
1735     else
1736     {
1737         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1738         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1739         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1740     }
1741
1742     sfree_aligned(ic->tabq_coul_FDV0);
1743     sfree_aligned(ic->tabq_coul_F);
1744     sfree_aligned(ic->tabq_coul_V);
1745
1746     /* Create the original table data in FDV0 */
1747     snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1748     snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1749     snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1750     table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1751                                 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff);
1752 }
1753
1754 void init_interaction_const_tables(FILE                *fp,
1755                                    interaction_const_t *ic,
1756                                    gmx_bool             bUsesSimpleTables,
1757                                    real                 rtab)
1758 {
1759     real spacing;
1760
1761     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1762     {
1763         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1764
1765         if (fp != NULL)
1766         {
1767             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1768                     1/ic->tabq_scale, ic->tabq_size);
1769         }
1770     }
1771 }
1772
1773 void init_interaction_const(FILE                 *fp,
1774                             interaction_const_t **interaction_const,
1775                             const t_forcerec     *fr,
1776                             real                  rtab)
1777 {
1778     interaction_const_t *ic;
1779     gmx_bool             bUsesSimpleTables = TRUE;
1780
1781     snew(ic, 1);
1782
1783     /* Just allocate something so we can free it */
1784     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1785     snew_aligned(ic->tabq_coul_F, 16, 32);
1786     snew_aligned(ic->tabq_coul_V, 16, 32);
1787
1788     ic->rlist       = fr->rlist;
1789     ic->rlistlong   = fr->rlistlong;
1790
1791     /* Lennard-Jones */
1792     ic->rvdw        = fr->rvdw;
1793     if (fr->vdw_modifier == eintmodPOTSHIFT)
1794     {
1795         ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1796     }
1797     else
1798     {
1799         ic->sh_invrc6 = 0;
1800     }
1801
1802     /* Electrostatics */
1803     ic->eeltype     = fr->eeltype;
1804     ic->rcoulomb    = fr->rcoulomb;
1805     ic->epsilon_r   = fr->epsilon_r;
1806     ic->epsfac      = fr->epsfac;
1807
1808     /* Ewald */
1809     ic->ewaldcoeff  = fr->ewaldcoeff;
1810     if (fr->coulomb_modifier == eintmodPOTSHIFT)
1811     {
1812         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
1813     }
1814     else
1815     {
1816         ic->sh_ewald = 0;
1817     }
1818
1819     /* Reaction-field */
1820     if (EEL_RF(ic->eeltype))
1821     {
1822         ic->epsilon_rf = fr->epsilon_rf;
1823         ic->k_rf       = fr->k_rf;
1824         ic->c_rf       = fr->c_rf;
1825     }
1826     else
1827     {
1828         /* For plain cut-off we might use the reaction-field kernels */
1829         ic->epsilon_rf = ic->epsilon_r;
1830         ic->k_rf       = 0;
1831         if (fr->coulomb_modifier == eintmodPOTSHIFT)
1832         {
1833             ic->c_rf   = 1/ic->rcoulomb;
1834         }
1835         else
1836         {
1837             ic->c_rf   = 0;
1838         }
1839     }
1840
1841     if (fp != NULL)
1842     {
1843         fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1844                 sqr(ic->sh_invrc6), ic->sh_invrc6);
1845         if (ic->eeltype == eelCUT)
1846         {
1847             fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1848         }
1849         else if (EEL_PME(ic->eeltype))
1850         {
1851             fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1852         }
1853         fprintf(fp, "\n");
1854     }
1855
1856     *interaction_const = ic;
1857
1858     if (fr->nbv != NULL && fr->nbv->bUseGPU)
1859     {
1860         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv);
1861     }
1862
1863     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1864     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1865 }
1866
1867 static void init_nb_verlet(FILE                *fp,
1868                            nonbonded_verlet_t **nb_verlet,
1869                            const t_inputrec    *ir,
1870                            const t_forcerec    *fr,
1871                            const t_commrec     *cr,
1872                            const char          *nbpu_opt)
1873 {
1874     nonbonded_verlet_t *nbv;
1875     int                 i;
1876     char               *env;
1877     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
1878
1879     nbnxn_alloc_t      *nb_alloc;
1880     nbnxn_free_t       *nb_free;
1881
1882     snew(nbv, 1);
1883
1884     pick_nbnxn_resources(fp, cr, fr->hwinfo,
1885                          fr->bNonbonded,
1886                          &nbv->bUseGPU,
1887                          &bEmulateGPU);
1888
1889     nbv->nbs = NULL;
1890
1891     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1892     for (i = 0; i < nbv->ngrp; i++)
1893     {
1894         nbv->grp[i].nbl_lists.nnbl = 0;
1895         nbv->grp[i].nbat           = NULL;
1896         nbv->grp[i].kernel_type    = nbnxnkNotSet;
1897
1898         if (i == 0) /* local */
1899         {
1900             pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1901                               nbv->bUseGPU, bEmulateGPU,
1902                               ir,
1903                               &nbv->grp[i].kernel_type,
1904                               &nbv->grp[i].ewald_excl,
1905                               fr->bNonbonded);
1906         }
1907         else /* non-local */
1908         {
1909             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
1910             {
1911                 /* Use GPU for local, select a CPU kernel for non-local */
1912                 pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1913                                   FALSE, FALSE,
1914                                   ir,
1915                                   &nbv->grp[i].kernel_type,
1916                                   &nbv->grp[i].ewald_excl,
1917                                   fr->bNonbonded);
1918
1919                 bHybridGPURun = TRUE;
1920             }
1921             else
1922             {
1923                 /* Use the same kernel for local and non-local interactions */
1924                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
1925                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
1926             }
1927         }
1928     }
1929
1930     if (nbv->bUseGPU)
1931     {
1932         /* init the NxN GPU data; the last argument tells whether we'll have
1933          * both local and non-local NB calculation on GPU */
1934         nbnxn_cuda_init(fp, &nbv->cu_nbv,
1935                         &fr->hwinfo->gpu_info, cr->rank_pp_intranode,
1936                         (nbv->ngrp > 1) && !bHybridGPURun);
1937
1938         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
1939         {
1940             char *end;
1941
1942             nbv->min_ci_balanced = strtol(env, &end, 10);
1943             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
1944             {
1945                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
1946             }
1947
1948             if (debug)
1949             {
1950                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
1951                         nbv->min_ci_balanced);
1952             }
1953         }
1954         else
1955         {
1956             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
1957             if (debug)
1958             {
1959                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
1960                         nbv->min_ci_balanced);
1961             }
1962         }
1963     }
1964     else
1965     {
1966         nbv->min_ci_balanced = 0;
1967     }
1968
1969     *nb_verlet = nbv;
1970
1971     nbnxn_init_search(&nbv->nbs,
1972                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
1973                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
1974                       gmx_omp_nthreads_get(emntNonbonded));
1975
1976     for (i = 0; i < nbv->ngrp; i++)
1977     {
1978         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
1979         {
1980             nb_alloc = &pmalloc;
1981             nb_free  = &pfree;
1982         }
1983         else
1984         {
1985             nb_alloc = NULL;
1986             nb_free  = NULL;
1987         }
1988
1989         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
1990                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
1991                                 /* 8x8x8 "non-simple" lists are ATM always combined */
1992                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
1993                                 nb_alloc, nb_free);
1994
1995         if (i == 0 ||
1996             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
1997         {
1998             snew(nbv->grp[i].nbat, 1);
1999             nbnxn_atomdata_init(fp,
2000                                 nbv->grp[i].nbat,
2001                                 nbv->grp[i].kernel_type,
2002                                 fr->ntype, fr->nbfp,
2003                                 ir->opts.ngener,
2004                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2005                                 nb_alloc, nb_free);
2006         }
2007         else
2008         {
2009             nbv->grp[i].nbat = nbv->grp[0].nbat;
2010         }
2011     }
2012 }
2013
2014 void init_forcerec(FILE              *fp,
2015                    const output_env_t oenv,
2016                    t_forcerec        *fr,
2017                    t_fcdata          *fcd,
2018                    const t_inputrec  *ir,
2019                    const gmx_mtop_t  *mtop,
2020                    const t_commrec   *cr,
2021                    matrix             box,
2022                    gmx_bool           bMolEpot,
2023                    const char        *tabfn,
2024                    const char        *tabafn,
2025                    const char        *tabpfn,
2026                    const char        *tabbfn,
2027                    const char        *nbpu_opt,
2028                    gmx_bool           bNoSolvOpt,
2029                    real               print_force)
2030 {
2031     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2032     real           rtab;
2033     char          *env;
2034     double         dbl;
2035     rvec           box_size;
2036     const t_block *cgs;
2037     gmx_bool       bGenericKernelOnly;
2038     gmx_bool       bTab, bSep14tab, bNormalnblists;
2039     t_nblists     *nbl;
2040     int           *nm_ind, egp_flags;
2041
2042     if (fr->hwinfo == NULL)
2043     {
2044         /* Detect hardware, gather information.
2045          * In mdrun, hwinfo has already been set before calling init_forcerec.
2046          * Here we ignore GPUs, as tools will not use them anyhow.
2047          */
2048         snew(fr->hwinfo, 1);
2049         gmx_detect_hardware(fp, fr->hwinfo, cr,
2050                             FALSE, FALSE, NULL);
2051     }
2052
2053     /* By default we turn acceleration on, but it might be turned off further down... */
2054     fr->use_cpu_acceleration = TRUE;
2055
2056     fr->bDomDec = DOMAINDECOMP(cr);
2057
2058     natoms = mtop->natoms;
2059
2060     if (check_box(ir->ePBC, box))
2061     {
2062         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2063     }
2064
2065     /* Test particle insertion ? */
2066     if (EI_TPI(ir->eI))
2067     {
2068         /* Set to the size of the molecule to be inserted (the last one) */
2069         /* Because of old style topologies, we have to use the last cg
2070          * instead of the last molecule type.
2071          */
2072         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2073         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2074         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2075         {
2076             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2077         }
2078     }
2079     else
2080     {
2081         fr->n_tpi = 0;
2082     }
2083
2084     /* Copy AdResS parameters */
2085     if (ir->bAdress)
2086     {
2087         fr->adress_type           = ir->adress->type;
2088         fr->adress_const_wf       = ir->adress->const_wf;
2089         fr->adress_ex_width       = ir->adress->ex_width;
2090         fr->adress_hy_width       = ir->adress->hy_width;
2091         fr->adress_icor           = ir->adress->icor;
2092         fr->adress_site           = ir->adress->site;
2093         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2094         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2095
2096
2097         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2098         for (i = 0; i < ir->adress->n_energy_grps; i++)
2099         {
2100             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2101         }
2102
2103         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2104         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2105         for (i = 0; i < fr->n_adress_tf_grps; i++)
2106         {
2107             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2108         }
2109         copy_rvec(ir->adress->refs, fr->adress_refs);
2110     }
2111     else
2112     {
2113         fr->adress_type           = eAdressOff;
2114         fr->adress_do_hybridpairs = FALSE;
2115     }
2116
2117     /* Copy the user determined parameters */
2118     fr->userint1  = ir->userint1;
2119     fr->userint2  = ir->userint2;
2120     fr->userint3  = ir->userint3;
2121     fr->userint4  = ir->userint4;
2122     fr->userreal1 = ir->userreal1;
2123     fr->userreal2 = ir->userreal2;
2124     fr->userreal3 = ir->userreal3;
2125     fr->userreal4 = ir->userreal4;
2126
2127     /* Shell stuff */
2128     fr->fc_stepsize = ir->fc_stepsize;
2129
2130     /* Free energy */
2131     fr->efep        = ir->efep;
2132     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2133     if (ir->fepvals->bScCoul)
2134     {
2135         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2136         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2137     }
2138     else
2139     {
2140         fr->sc_alphacoul  = 0;
2141         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2142     }
2143     fr->sc_power      = ir->fepvals->sc_power;
2144     fr->sc_r_power    = ir->fepvals->sc_r_power;
2145     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2146
2147     env = getenv("GMX_SCSIGMA_MIN");
2148     if (env != NULL)
2149     {
2150         dbl = 0;
2151         sscanf(env, "%lf", &dbl);
2152         fr->sc_sigma6_min = pow(dbl, 6);
2153         if (fp)
2154         {
2155             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2156         }
2157     }
2158
2159     fr->bNonbonded = TRUE;
2160     if (getenv("GMX_NO_NONBONDED") != NULL)
2161     {
2162         /* turn off non-bonded calculations */
2163         fr->bNonbonded = FALSE;
2164         md_print_warn(cr, fp,
2165                       "Found environment variable GMX_NO_NONBONDED.\n"
2166                       "Disabling nonbonded calculations.\n");
2167     }
2168
2169     bGenericKernelOnly = FALSE;
2170
2171     /* We now check in the NS code whether a particular combination of interactions
2172      * can be used with water optimization, and disable it if that is not the case.
2173      */
2174
2175     if (getenv("GMX_NB_GENERIC") != NULL)
2176     {
2177         if (fp != NULL)
2178         {
2179             fprintf(fp,
2180                     "Found environment variable GMX_NB_GENERIC.\n"
2181                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2182                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2183         }
2184         bGenericKernelOnly = TRUE;
2185     }
2186
2187     if (bGenericKernelOnly == TRUE)
2188     {
2189         bNoSolvOpt         = TRUE;
2190     }
2191
2192     if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2193     {
2194         fr->use_cpu_acceleration = FALSE;
2195         if (fp != NULL)
2196         {
2197             fprintf(fp,
2198                     "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2199                     "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2200         }
2201     }
2202
2203     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2204
2205     /* Check if we can/should do all-vs-all kernels */
2206     fr->bAllvsAll       = can_use_allvsall(ir, mtop, FALSE, NULL, NULL);
2207     fr->AllvsAll_work   = NULL;
2208     fr->AllvsAll_workgb = NULL;
2209
2210
2211     /* Neighbour searching stuff */
2212     fr->cutoff_scheme = ir->cutoff_scheme;
2213     fr->bGrid         = (ir->ns_type == ensGRID);
2214     fr->ePBC          = ir->ePBC;
2215
2216     /* Determine if we will do PBC for distances in bonded interactions */
2217     if (fr->ePBC == epbcNONE)
2218     {
2219         fr->bMolPBC = FALSE;
2220     }
2221     else
2222     {
2223         if (!DOMAINDECOMP(cr))
2224         {
2225             /* The group cut-off scheme and SHAKE assume charge groups
2226              * are whole, but not using molpbc is faster in most cases.
2227              */
2228             if (fr->cutoff_scheme == ecutsGROUP ||
2229                 (ir->eConstrAlg == econtSHAKE &&
2230                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2231                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2232             {
2233                 fr->bMolPBC = ir->bPeriodicMols;
2234             }
2235             else
2236             {
2237                 fr->bMolPBC = TRUE;
2238                 if (getenv("GMX_USE_GRAPH") != NULL)
2239                 {
2240                     fr->bMolPBC = FALSE;
2241                     if (fp)
2242                     {
2243                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2244                     }
2245                 }
2246             }
2247         }
2248         else
2249         {
2250             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2251         }
2252     }
2253     fr->bGB = (ir->implicit_solvent == eisGBSA);
2254
2255     fr->rc_scaling = ir->refcoord_scaling;
2256     copy_rvec(ir->posres_com, fr->posres_com);
2257     copy_rvec(ir->posres_comB, fr->posres_comB);
2258     fr->rlist      = cutoff_inf(ir->rlist);
2259     fr->rlistlong  = cutoff_inf(ir->rlistlong);
2260     fr->eeltype    = ir->coulombtype;
2261     fr->vdwtype    = ir->vdwtype;
2262
2263     fr->coulomb_modifier = ir->coulomb_modifier;
2264     fr->vdw_modifier     = ir->vdw_modifier;
2265
2266     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2267     switch (fr->eeltype)
2268     {
2269         case eelCUT:
2270             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2271             break;
2272
2273         case eelRF:
2274         case eelGRF:
2275         case eelRF_NEC:
2276             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2277             break;
2278
2279         case eelRF_ZERO:
2280             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2281             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2282             break;
2283
2284         case eelSWITCH:
2285         case eelSHIFT:
2286         case eelUSER:
2287         case eelENCADSHIFT:
2288         case eelPMESWITCH:
2289         case eelPMEUSER:
2290         case eelPMEUSERSWITCH:
2291             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2292             break;
2293
2294         case eelPME:
2295         case eelEWALD:
2296             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2297             break;
2298
2299         default:
2300             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2301             break;
2302     }
2303
2304     /* Vdw: Translate from mdp settings to kernel format */
2305     switch (fr->vdwtype)
2306     {
2307         case evdwCUT:
2308             if (fr->bBHAM)
2309             {
2310                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2311             }
2312             else
2313             {
2314                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2315             }
2316             break;
2317
2318         case evdwSWITCH:
2319         case evdwSHIFT:
2320         case evdwUSER:
2321         case evdwENCADSHIFT:
2322             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2323             break;
2324
2325         default:
2326             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2327             break;
2328     }
2329
2330     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2331     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2332     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2333
2334     fr->bTwinRange = fr->rlistlong > fr->rlist;
2335     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2336
2337     fr->reppow     = mtop->ffparams.reppow;
2338
2339     if (ir->cutoff_scheme == ecutsGROUP)
2340     {
2341         fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
2342                           !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2343         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2344         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2345                            fr->eeltype == eelEWALD ||
2346                            fr->eeltype == eelPME ||
2347                            fr->eeltype == eelRF ||
2348                            fr->eeltype == eelRF_ZERO);
2349
2350         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2351          * going to be faster to tabulate the interaction than calling the generic kernel.
2352          */
2353         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2354         {
2355             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2356             {
2357                 fr->bcoultab = TRUE;
2358             }
2359         }
2360         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2361                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2362                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2363                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2364         {
2365             if (fr->rcoulomb != fr->rvdw)
2366             {
2367                 fr->bcoultab = TRUE;
2368             }
2369         }
2370
2371         if (getenv("GMX_REQUIRE_TABLES"))
2372         {
2373             fr->bvdwtab  = TRUE;
2374             fr->bcoultab = TRUE;
2375         }
2376
2377         if (fp)
2378         {
2379             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2380             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2381         }
2382
2383         if (fr->bvdwtab == TRUE)
2384         {
2385             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2386             fr->nbkernel_vdw_modifier    = eintmodNONE;
2387         }
2388         if (fr->bcoultab == TRUE)
2389         {
2390             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2391             fr->nbkernel_elec_modifier    = eintmodNONE;
2392         }
2393     }
2394
2395     if (ir->cutoff_scheme == ecutsVERLET)
2396     {
2397         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2398         {
2399             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2400         }
2401         fr->bvdwtab  = FALSE;
2402         fr->bcoultab = FALSE;
2403     }
2404
2405     /* Tables are used for direct ewald sum */
2406     if (fr->bEwald)
2407     {
2408         if (EEL_PME(ir->coulombtype))
2409         {
2410             if (fp)
2411             {
2412                 fprintf(fp, "Will do PME sum in reciprocal space.\n");
2413             }
2414             if (ir->coulombtype == eelP3M_AD)
2415             {
2416                 please_cite(fp, "Hockney1988");
2417                 please_cite(fp, "Ballenegger2012");
2418             }
2419             else
2420             {
2421                 please_cite(fp, "Essmann95a");
2422             }
2423
2424             if (ir->ewald_geometry == eewg3DC)
2425             {
2426                 if (fp)
2427                 {
2428                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2429                 }
2430                 please_cite(fp, "In-Chul99a");
2431             }
2432         }
2433         fr->ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
2434         init_ewald_tab(&(fr->ewald_table), cr, ir, fp);
2435         if (fp)
2436         {
2437             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2438                     1/fr->ewaldcoeff);
2439         }
2440     }
2441
2442     /* Electrostatics */
2443     fr->epsilon_r       = ir->epsilon_r;
2444     fr->epsilon_rf      = ir->epsilon_rf;
2445     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2446     fr->rcoulomb_switch = ir->rcoulomb_switch;
2447     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2448
2449     /* Parameters for generalized RF */
2450     fr->zsquare = 0.0;
2451     fr->temp    = 0.0;
2452
2453     if (fr->eeltype == eelGRF)
2454     {
2455         init_generalized_rf(fp, mtop, ir, fr);
2456     }
2457     else if (fr->eeltype == eelSHIFT)
2458     {
2459         for (m = 0; (m < DIM); m++)
2460         {
2461             box_size[m] = box[m][m];
2462         }
2463
2464         if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
2465         {
2466             set_shift_consts(fp, fr->rcoulomb_switch, fr->rcoulomb, box_size, fr);
2467         }
2468     }
2469
2470     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
2471                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2472                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2473                        IR_ELEC_FIELD(*ir) ||
2474                        (fr->adress_icor != eAdressICOff)
2475                        );
2476
2477     if (fr->cutoff_scheme == ecutsGROUP &&
2478         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2479     {
2480         /* Count the total number of charge groups */
2481         fr->cg_nalloc = ncg_mtop(mtop);
2482         srenew(fr->cg_cm, fr->cg_nalloc);
2483     }
2484     if (fr->shift_vec == NULL)
2485     {
2486         snew(fr->shift_vec, SHIFTS);
2487     }
2488
2489     if (fr->fshift == NULL)
2490     {
2491         snew(fr->fshift, SHIFTS);
2492     }
2493
2494     if (fr->nbfp == NULL)
2495     {
2496         fr->ntype = mtop->ffparams.atnr;
2497         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2498     }
2499
2500     /* Copy the energy group exclusions */
2501     fr->egp_flags = ir->opts.egp_flags;
2502
2503     /* Van der Waals stuff */
2504     fr->rvdw        = cutoff_inf(ir->rvdw);
2505     fr->rvdw_switch = ir->rvdw_switch;
2506     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2507     {
2508         if (fr->rvdw_switch >= fr->rvdw)
2509         {
2510             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2511                       fr->rvdw_switch, fr->rvdw);
2512         }
2513         if (fp)
2514         {
2515             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2516                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2517                     fr->rvdw_switch, fr->rvdw);
2518         }
2519     }
2520
2521     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2522     {
2523         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2524     }
2525
2526     if (fp)
2527     {
2528         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2529                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2530     }
2531
2532     fr->eDispCorr = ir->eDispCorr;
2533     if (ir->eDispCorr != edispcNO)
2534     {
2535         set_avcsixtwelve(fp, fr, mtop);
2536     }
2537
2538     if (fr->bBHAM)
2539     {
2540         set_bham_b_max(fp, fr, mtop);
2541     }
2542
2543     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2544
2545     /* Copy the GBSA data (radius, volume and surftens for each
2546      * atomtype) from the topology atomtype section to forcerec.
2547      */
2548     snew(fr->atype_radius, fr->ntype);
2549     snew(fr->atype_vol, fr->ntype);
2550     snew(fr->atype_surftens, fr->ntype);
2551     snew(fr->atype_gb_radius, fr->ntype);
2552     snew(fr->atype_S_hct, fr->ntype);
2553
2554     if (mtop->atomtypes.nr > 0)
2555     {
2556         for (i = 0; i < fr->ntype; i++)
2557         {
2558             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2559         }
2560         for (i = 0; i < fr->ntype; i++)
2561         {
2562             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2563         }
2564         for (i = 0; i < fr->ntype; i++)
2565         {
2566             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2567         }
2568         for (i = 0; i < fr->ntype; i++)
2569         {
2570             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2571         }
2572         for (i = 0; i < fr->ntype; i++)
2573         {
2574             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2575         }
2576     }
2577
2578     /* Generate the GB table if needed */
2579     if (fr->bGB)
2580     {
2581 #ifdef GMX_DOUBLE
2582         fr->gbtabscale = 2000;
2583 #else
2584         fr->gbtabscale = 500;
2585 #endif
2586
2587         fr->gbtabr = 100;
2588         fr->gbtab  = make_gb_table(fp, oenv, fr, tabpfn, fr->gbtabscale);
2589
2590         init_gb(&fr->born, cr, fr, ir, mtop, ir->rgbradii, ir->gb_algorithm);
2591
2592         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2593         if (!DOMAINDECOMP(cr))
2594         {
2595             make_local_gb(cr, fr->born, ir->gb_algorithm);
2596         }
2597     }
2598
2599     /* Set the charge scaling */
2600     if (fr->epsilon_r != 0)
2601     {
2602         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2603     }
2604     else
2605     {
2606         /* eps = 0 is infinite dieletric: no coulomb interactions */
2607         fr->epsfac = 0;
2608     }
2609
2610     /* Reaction field constants */
2611     if (EEL_RF(fr->eeltype))
2612     {
2613         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2614                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2615                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2616     }
2617
2618     set_chargesum(fp, fr, mtop);
2619
2620     /* if we are using LR electrostatics, and they are tabulated,
2621      * the tables will contain modified coulomb interactions.
2622      * Since we want to use the non-shifted ones for 1-4
2623      * coulombic interactions, we must have an extra set of tables.
2624      */
2625
2626     /* Construct tables.
2627      * A little unnecessary to make both vdw and coul tables sometimes,
2628      * but what the heck... */
2629
2630     bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2631
2632     bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2633                   fr->bBHAM || fr->bEwald) &&
2634                  (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2635                   gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2636                   gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2637
2638     negp_pp   = ir->opts.ngener - ir->nwall;
2639     negptable = 0;
2640     if (!bTab)
2641     {
2642         bNormalnblists = TRUE;
2643         fr->nnblists   = 1;
2644     }
2645     else
2646     {
2647         bNormalnblists = (ir->eDispCorr != edispcNO);
2648         for (egi = 0; egi < negp_pp; egi++)
2649         {
2650             for (egj = egi; egj < negp_pp; egj++)
2651             {
2652                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2653                 if (!(egp_flags & EGP_EXCL))
2654                 {
2655                     if (egp_flags & EGP_TABLE)
2656                     {
2657                         negptable++;
2658                     }
2659                     else
2660                     {
2661                         bNormalnblists = TRUE;
2662                     }
2663                 }
2664             }
2665         }
2666         if (bNormalnblists)
2667         {
2668             fr->nnblists = negptable + 1;
2669         }
2670         else
2671         {
2672             fr->nnblists = negptable;
2673         }
2674         if (fr->nnblists > 1)
2675         {
2676             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2677         }
2678     }
2679
2680     if (ir->adress)
2681     {
2682         fr->nnblists *= 2;
2683     }
2684
2685     snew(fr->nblists, fr->nnblists);
2686
2687     /* This code automatically gives table length tabext without cut-off's,
2688      * in that case grompp should already have checked that we do not need
2689      * normal tables and we only generate tables for 1-4 interactions.
2690      */
2691     rtab = ir->rlistlong + ir->tabext;
2692
2693     if (bTab)
2694     {
2695         /* make tables for ordinary interactions */
2696         if (bNormalnblists)
2697         {
2698             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2699             if (ir->adress)
2700             {
2701                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2702             }
2703             if (!bSep14tab)
2704             {
2705                 fr->tab14 = fr->nblists[0].table_elec_vdw;
2706             }
2707             m = 1;
2708         }
2709         else
2710         {
2711             m = 0;
2712         }
2713         if (negptable > 0)
2714         {
2715             /* Read the special tables for certain energy group pairs */
2716             nm_ind = mtop->groups.grps[egcENER].nm_ind;
2717             for (egi = 0; egi < negp_pp; egi++)
2718             {
2719                 for (egj = egi; egj < negp_pp; egj++)
2720                 {
2721                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2722                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2723                     {
2724                         nbl = &(fr->nblists[m]);
2725                         if (fr->nnblists > 1)
2726                         {
2727                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2728                         }
2729                         /* Read the table file with the two energy groups names appended */
2730                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2731                                         *mtop->groups.grpname[nm_ind[egi]],
2732                                         *mtop->groups.grpname[nm_ind[egj]],
2733                                         &fr->nblists[m]);
2734                         if (ir->adress)
2735                         {
2736                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2737                                             *mtop->groups.grpname[nm_ind[egi]],
2738                                             *mtop->groups.grpname[nm_ind[egj]],
2739                                             &fr->nblists[fr->nnblists/2+m]);
2740                         }
2741                         m++;
2742                     }
2743                     else if (fr->nnblists > 1)
2744                     {
2745                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2746                     }
2747                 }
2748             }
2749         }
2750     }
2751     if (bSep14tab)
2752     {
2753         /* generate extra tables with plain Coulomb for 1-4 interactions only */
2754         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2755                                 GMX_MAKETABLES_14ONLY);
2756     }
2757
2758     /* Read AdResS Thermo Force table if needed */
2759     if (fr->adress_icor == eAdressICThermoForce)
2760     {
2761         /* old todo replace */
2762
2763         if (ir->adress->n_tf_grps > 0)
2764         {
2765             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2766
2767         }
2768         else
2769         {
2770             /* load the default table */
2771             snew(fr->atf_tabs, 1);
2772             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2773         }
2774     }
2775
2776     /* Wall stuff */
2777     fr->nwall = ir->nwall;
2778     if (ir->nwall && ir->wall_type == ewtTABLE)
2779     {
2780         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2781     }
2782
2783     if (fcd && tabbfn)
2784     {
2785         fcd->bondtab  = make_bonded_tables(fp,
2786                                            F_TABBONDS, F_TABBONDSNC,
2787                                            mtop, tabbfn, "b");
2788         fcd->angletab = make_bonded_tables(fp,
2789                                            F_TABANGLES, -1,
2790                                            mtop, tabbfn, "a");
2791         fcd->dihtab   = make_bonded_tables(fp,
2792                                            F_TABDIHS, -1,
2793                                            mtop, tabbfn, "d");
2794     }
2795     else
2796     {
2797         if (debug)
2798         {
2799             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2800         }
2801     }
2802
2803     /* QM/MM initialization if requested
2804      */
2805     if (ir->bQMMM)
2806     {
2807         fprintf(stderr, "QM/MM calculation requested.\n");
2808     }
2809
2810     fr->bQMMM      = ir->bQMMM;
2811     fr->qr         = mk_QMMMrec();
2812
2813     /* Set all the static charge group info */
2814     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2815                                    &fr->bExcl_IntraCGAll_InterCGNone);
2816     if (DOMAINDECOMP(cr))
2817     {
2818         fr->cginfo = NULL;
2819     }
2820     else
2821     {
2822         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2823     }
2824
2825     if (!DOMAINDECOMP(cr))
2826     {
2827         /* When using particle decomposition, the effect of the second argument,
2828          * which sets fr->hcg, is corrected later in do_md and init_em.
2829          */
2830         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2831                             mtop->natoms, mtop->natoms, mtop->natoms);
2832     }
2833
2834     fr->print_force = print_force;
2835
2836
2837     /* coarse load balancing vars */
2838     fr->t_fnbf    = 0.;
2839     fr->t_wait    = 0.;
2840     fr->timesteps = 0;
2841
2842     /* Initialize neighbor search */
2843     init_ns(fp, cr, &fr->ns, fr, mtop, box);
2844
2845     if (cr->duty & DUTY_PP)
2846     {
2847         gmx_nonbonded_setup(fp, fr, bGenericKernelOnly);
2848         /*
2849            if (ir->bAdress)
2850             {
2851                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2852             }
2853          */
2854     }
2855
2856     /* Initialize the thread working data for bonded interactions */
2857     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2858
2859     snew(fr->excl_load, fr->nthreads+1);
2860
2861     if (fr->cutoff_scheme == ecutsVERLET)
2862     {
2863         if (ir->rcoulomb != ir->rvdw)
2864         {
2865             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2866         }
2867
2868         init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2869     }
2870
2871     /* fr->ic is used both by verlet and group kernels (to some extent) now */
2872     init_interaction_const(fp, &fr->ic, fr, rtab);
2873     if (ir->eDispCorr != edispcNO)
2874     {
2875         calc_enervirdiff(fp, ir->eDispCorr, fr);
2876     }
2877 }
2878
2879 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2880 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
2881 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
2882
2883 void pr_forcerec(FILE *fp, t_forcerec *fr, t_commrec *cr)
2884 {
2885     int i;
2886
2887     pr_real(fp, fr->rlist);
2888     pr_real(fp, fr->rcoulomb);
2889     pr_real(fp, fr->fudgeQQ);
2890     pr_bool(fp, fr->bGrid);
2891     pr_bool(fp, fr->bTwinRange);
2892     /*pr_int(fp,fr->cg0);
2893        pr_int(fp,fr->hcg);*/
2894     for (i = 0; i < fr->nnblists; i++)
2895     {
2896         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
2897     }
2898     pr_real(fp, fr->rcoulomb_switch);
2899     pr_real(fp, fr->rcoulomb);
2900
2901     fflush(fp);
2902 }
2903
2904 void forcerec_set_excl_load(t_forcerec *fr,
2905                             const gmx_localtop_t *top, const t_commrec *cr)
2906 {
2907     const int *ind, *a;
2908     int        t, i, j, ntot, n, ntarget;
2909
2910     if (cr != NULL && PARTDECOMP(cr))
2911     {
2912         /* No OpenMP with particle decomposition */
2913         pd_at_range(cr,
2914                     &fr->excl_load[0],
2915                     &fr->excl_load[1]);
2916
2917         return;
2918     }
2919
2920     ind = top->excls.index;
2921     a   = top->excls.a;
2922
2923     ntot = 0;
2924     for (i = 0; i < top->excls.nr; i++)
2925     {
2926         for (j = ind[i]; j < ind[i+1]; j++)
2927         {
2928             if (a[j] > i)
2929             {
2930                 ntot++;
2931             }
2932         }
2933     }
2934
2935     fr->excl_load[0] = 0;
2936     n                = 0;
2937     i                = 0;
2938     for (t = 1; t <= fr->nthreads; t++)
2939     {
2940         ntarget = (ntot*t)/fr->nthreads;
2941         while (i < top->excls.nr && n < ntarget)
2942         {
2943             for (j = ind[i]; j < ind[i+1]; j++)
2944             {
2945                 if (a[j] > i)
2946                 {
2947                     n++;
2948                 }
2949             }
2950             i++;
2951         }
2952         fr->excl_load[t] = i;
2953     }
2954 }