src/gromacs/mdlib/forcerec.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROwing Monsters And Cloning Shrimps
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include <math.h>
  41 #include <string.h>
  42 #include <assert.h>
  43 #include "sysstuff.h"
  44 #include "typedefs.h"
  45 #include "vec.h"
  46 #include "maths.h"
  47 #include "macros.h"
  48 #include "smalloc.h"
  49 #include "macros.h"
  50 #include "gmx_fatal.h"
  51 #include "gmx_fatal_collective.h"
  52 #include "physics.h"
  53 #include "force.h"
  54 #include "tables.h"
  55 #include "nonbonded.h"
  56 #include "invblock.h"
  57 #include "names.h"
  58 #include "network.h"
  59 #include "pbc.h"
  60 #include "ns.h"
  61 #include "mshift.h"
  62 #include "txtdump.h"
  63 #include "coulomb.h"
  64 #include "md_support.h"
  65 #include "md_logging.h"
  66 #include "domdec.h"
  67 #include "partdec.h"
  68 #include "qmmm.h"
  69 #include "copyrite.h"
  70 #include "mtop_util.h"
  71 #include "nbnxn_search.h"
  72 #include "nbnxn_atomdata.h"
  73 #include "nbnxn_consts.h"
  74 #include "statutil.h"
  75 #include "gmx_omp_nthreads.h"
  76 #include "gmx_detect_hardware.h"
  77
  78 #ifdef _MSC_VER
  79 /* MSVC definition for __cpuid() */
  80 #include <intrin.h>
  81 #endif
  82
  83 #include "types/nbnxn_cuda_types_ext.h"
  84 #include "gpu_utils.h"
  85 #include "nbnxn_cuda_data_mgmt.h"
  86 #include "pmalloc_cuda.h"
  87
  88 t_forcerec *mk_forcerec(void)
  89 {
  90     t_forcerec *fr;
  91
  92     snew(fr, 1);
  93
  94     return fr;
  95 }
  96
  97 #ifdef DEBUG
  98 static void pr_nbfp(FILE *fp, real *nbfp, gmx_bool bBHAM, int atnr)
  99 {
 100     int i, j;
 101
 102     for (i = 0; (i < atnr); i++)
 103     {
 104         for (j = 0; (j < atnr); j++)
 105         {
 106             fprintf(fp, "%2d - %2d", i, j);
 107             if (bBHAM)
 108             {
 109                 fprintf(fp, "  a=%10g, b=%10g, c=%10g\n", BHAMA(nbfp, atnr, i, j),
 110                         BHAMB(nbfp, atnr, i, j), BHAMC(nbfp, atnr, i, j)/6.0);
 111             }
 112             else
 113             {
 114                 fprintf(fp, "  c6=%10g, c12=%10g\n", C6(nbfp, atnr, i, j)/6.0,
 115                         C12(nbfp, atnr, i, j)/12.0);
 116             }
 117         }
 118     }
 119 }
 120 #endif
 121
 122 static real *mk_nbfp(const gmx_ffparams_t *idef, gmx_bool bBHAM)
 123 {
 124     real *nbfp;
 125     int   i, j, k, atnr;
 126
 127     atnr = idef->atnr;
 128     if (bBHAM)
 129     {
 130         snew(nbfp, 3*atnr*atnr);
 131         for (i = k = 0; (i < atnr); i++)
 132         {
 133             for (j = 0; (j < atnr); j++, k++)
 134             {
 135                 BHAMA(nbfp, atnr, i, j) = idef->iparams[k].bham.a;
 136                 BHAMB(nbfp, atnr, i, j) = idef->iparams[k].bham.b;
 137                 /* nbfp now includes the 6.0 derivative prefactor */
 138                 BHAMC(nbfp, atnr, i, j) = idef->iparams[k].bham.c*6.0;
 139             }
 140         }
 141     }
 142     else
 143     {
 144         snew(nbfp, 2*atnr*atnr);
 145         for (i = k = 0; (i < atnr); i++)
 146         {
 147             for (j = 0; (j < atnr); j++, k++)
 148             {
 149                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
 150                 C6(nbfp, atnr, i, j)   = idef->iparams[k].lj.c6*6.0;
 151                 C12(nbfp, atnr, i, j)  = idef->iparams[k].lj.c12*12.0;
 152             }
 153         }
 154     }
 155
 156     return nbfp;
 157 }
 158
 159 /* This routine sets fr->solvent_opt to the most common solvent in the
 160  * system, e.g. esolSPC or esolTIP4P. It will also mark each charge group in
 161  * the fr->solvent_type array with the correct type (or esolNO).
 162  *
 163  * Charge groups that fulfill the conditions but are not identical to the
 164  * most common one will be marked as esolNO in the solvent_type array.
 165  *
 166  * TIP3p is identical to SPC for these purposes, so we call it
 167  * SPC in the arrays (Apologies to Bill Jorgensen ;-)
 168  *
 169  * NOTE: QM particle should not
 170  * become an optimized solvent. Not even if there is only one charge
 171  * group in the Qm
 172  */
 173
 174 typedef struct
 175 {
 176     int    model;
 177     int    count;
 178     int    vdwtype[4];
 179     real   charge[4];
 180 } solvent_parameters_t;
 181
 182 static void
 183 check_solvent_cg(const gmx_moltype_t    *molt,
 184                  int                     cg0,
 185                  int                     nmol,
 186                  const unsigned char    *qm_grpnr,
 187                  const t_grps           *qm_grps,
 188                  t_forcerec   *          fr,
 189                  int                    *n_solvent_parameters,
 190                  solvent_parameters_t  **solvent_parameters_p,
 191                  int                     cginfo,
 192                  int                    *cg_sp)
 193 {
 194     const t_blocka     *  excl;
 195     t_atom               *atom;
 196     int                   j, k;
 197     int                   j0, j1, nj;
 198     gmx_bool              perturbed;
 199     gmx_bool              has_vdw[4];
 200     gmx_bool              match;
 201     real                  tmp_charge[4];
 202     int                   tmp_vdwtype[4];
 203     int                   tjA;
 204     gmx_bool              qm;
 205     solvent_parameters_t *solvent_parameters;
 206
 207     /* We use a list with parameters for each solvent type.
 208      * Every time we discover a new molecule that fulfills the basic
 209      * conditions for a solvent we compare with the previous entries
 210      * in these lists. If the parameters are the same we just increment
 211      * the counter for that type, and otherwise we create a new type
 212      * based on the current molecule.
 213      *
 214      * Once we've finished going through all molecules we check which
 215      * solvent is most common, and mark all those molecules while we
 216      * clear the flag on all others.
 217      */
 218
 219     solvent_parameters = *solvent_parameters_p;
 220
 221     /* Mark the cg first as non optimized */
 222     *cg_sp = -1;
 223
 224     /* Check if this cg has no exclusions with atoms in other charge groups
 225      * and all atoms inside the charge group excluded.
 226      * We only have 3 or 4 atom solvent loops.
 227      */
 228     if (GET_CGINFO_EXCL_INTER(cginfo) ||
 229         !GET_CGINFO_EXCL_INTRA(cginfo))
 230     {
 231         return;
 232     }
 233
 234     /* Get the indices of the first atom in this charge group */
 235     j0     = molt->cgs.index[cg0];
 236     j1     = molt->cgs.index[cg0+1];
 237
 238     /* Number of atoms in our molecule */
 239     nj     = j1 - j0;
 240
 241     if (debug)
 242     {
 243         fprintf(debug,
 244                 "Moltype '%s': there are %d atoms in this charge group\n",
 245                 *molt->name, nj);
 246     }
 247
 248     /* Check if it could be an SPC (3 atoms) or TIP4p (4) water,
 249      * otherwise skip it.
 250      */
 251     if (nj < 3 || nj > 4)
 252     {
 253         return;
 254     }
 255
 256     /* Check if we are doing QM on this group */
 257     qm = FALSE;
 258     if (qm_grpnr != NULL)
 259     {
 260         for (j = j0; j < j1 && !qm; j++)
 261         {
 262             qm = (qm_grpnr[j] < qm_grps->nr - 1);
 263         }
 264     }
 265     /* Cannot use solvent optimization with QM */
 266     if (qm)
 267     {
 268         return;
 269     }
 270
 271     atom = molt->atoms.atom;
 272
 273     /* Still looks like a solvent, time to check parameters */
 274
 275     /* If it is perturbed (free energy) we can't use the solvent loops,
 276      * so then we just skip to the next molecule.
 277      */
 278     perturbed = FALSE;
 279
 280     for (j = j0; j < j1 && !perturbed; j++)
 281     {
 282         perturbed = PERTURBED(atom[j]);
 283     }
 284
 285     if (perturbed)
 286     {
 287         return;
 288     }
 289
 290     /* Now it's only a question if the VdW and charge parameters
 291      * are OK. Before doing the check we compare and see if they are
 292      * identical to a possible previous solvent type.
 293      * First we assign the current types and charges.
 294      */
 295     for (j = 0; j < nj; j++)
 296     {
 297         tmp_vdwtype[j] = atom[j0+j].type;
 298         tmp_charge[j]  = atom[j0+j].q;
 299     }
 300
 301     /* Does it match any previous solvent type? */
 302     for (k = 0; k < *n_solvent_parameters; k++)
 303     {
 304         match = TRUE;
 305
 306
 307         /* We can only match SPC with 3 atoms and TIP4p with 4 atoms */
 308         if ( (solvent_parameters[k].model == esolSPC   && nj != 3)  ||
 309              (solvent_parameters[k].model == esolTIP4P && nj != 4) )
 310         {
 311             match = FALSE;
 312         }
 313
 314         /* Check that types & charges match for all atoms in molecule */
 315         for (j = 0; j < nj && match == TRUE; j++)
 316         {
 317             if (tmp_vdwtype[j] != solvent_parameters[k].vdwtype[j])
 318             {
 319                 match = FALSE;
 320             }
 321             if (tmp_charge[j] != solvent_parameters[k].charge[j])
 322             {
 323                 match = FALSE;
 324             }
 325         }
 326         if (match == TRUE)
 327         {
 328             /* Congratulations! We have a matched solvent.
 329              * Flag it with this type for later processing.
 330              */
 331             *cg_sp = k;
 332             solvent_parameters[k].count += nmol;
 333
 334             /* We are done with this charge group */
 335             return;
 336         }
 337     }
 338
 339     /* If we get here, we have a tentative new solvent type.
 340      * Before we add it we must check that it fulfills the requirements
 341      * of the solvent optimized loops. First determine which atoms have
 342      * VdW interactions.
 343      */
 344     for (j = 0; j < nj; j++)
 345     {
 346         has_vdw[j] = FALSE;
 347         tjA        = tmp_vdwtype[j];
 348
 349         /* Go through all other tpes and see if any have non-zero
 350          * VdW parameters when combined with this one.
 351          */
 352         for (k = 0; k < fr->ntype && (has_vdw[j] == FALSE); k++)
 353         {
 354             /* We already checked that the atoms weren't perturbed,
 355              * so we only need to check state A now.
 356              */
 357             if (fr->bBHAM)
 358             {
 359                 has_vdw[j] = (has_vdw[j] ||
 360                               (BHAMA(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 361                               (BHAMB(fr->nbfp, fr->ntype, tjA, k) != 0.0) ||
 362                               (BHAMC(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 363             }
 364             else
 365             {
 366                 /* Standard LJ */
 367                 has_vdw[j] = (has_vdw[j] ||
 368                               (C6(fr->nbfp, fr->ntype, tjA, k)  != 0.0) ||
 369                               (C12(fr->nbfp, fr->ntype, tjA, k) != 0.0));
 370             }
 371         }
 372     }
 373
 374     /* Now we know all we need to make the final check and assignment. */
 375     if (nj == 3)
 376     {
 377         /* So, is it an SPC?
 378          * For this we require thatn all atoms have charge,
 379          * the charges on atom 2 & 3 should be the same, and only
 380          * atom 1 might have VdW.
 381          */
 382         if (has_vdw[1] == FALSE &&
 383             has_vdw[2] == FALSE &&
 384             tmp_charge[0]  != 0 &&
 385             tmp_charge[1]  != 0 &&
 386             tmp_charge[2]  == tmp_charge[1])
 387         {
 388             srenew(solvent_parameters, *n_solvent_parameters+1);
 389             solvent_parameters[*n_solvent_parameters].model = esolSPC;
 390             solvent_parameters[*n_solvent_parameters].count = nmol;
 391             for (k = 0; k < 3; k++)
 392             {
 393                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 394                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 395             }
 396
 397             *cg_sp = *n_solvent_parameters;
 398             (*n_solvent_parameters)++;
 399         }
 400     }
 401     else if (nj == 4)
 402     {
 403         /* Or could it be a TIP4P?
 404          * For this we require thatn atoms 2,3,4 have charge, but not atom 1.
 405          * Only atom 1 mght have VdW.
 406          */
 407         if (has_vdw[1] == FALSE &&
 408             has_vdw[2] == FALSE &&
 409             has_vdw[3] == FALSE &&
 410             tmp_charge[0]  == 0 &&
 411             tmp_charge[1]  != 0 &&
 412             tmp_charge[2]  == tmp_charge[1] &&
 413             tmp_charge[3]  != 0)
 414         {
 415             srenew(solvent_parameters, *n_solvent_parameters+1);
 416             solvent_parameters[*n_solvent_parameters].model = esolTIP4P;
 417             solvent_parameters[*n_solvent_parameters].count = nmol;
 418             for (k = 0; k < 4; k++)
 419             {
 420                 solvent_parameters[*n_solvent_parameters].vdwtype[k] = tmp_vdwtype[k];
 421                 solvent_parameters[*n_solvent_parameters].charge[k]  = tmp_charge[k];
 422             }
 423
 424             *cg_sp = *n_solvent_parameters;
 425             (*n_solvent_parameters)++;
 426         }
 427     }
 428
 429     *solvent_parameters_p = solvent_parameters;
 430 }
 431
 432 static void
 433 check_solvent(FILE  *                fp,
 434               const gmx_mtop_t  *    mtop,
 435               t_forcerec  *          fr,
 436               cginfo_mb_t           *cginfo_mb)
 437 {
 438     const t_block     *   cgs;
 439     const t_block     *   mols;
 440     const gmx_moltype_t  *molt;
 441     int                   mb, mol, cg_mol, at_offset, cg_offset, am, cgm, i, nmol_ch, nmol;
 442     int                   n_solvent_parameters;
 443     solvent_parameters_t *solvent_parameters;
 444     int                 **cg_sp;
 445     int                   bestsp, bestsol;
 446
 447     if (debug)
 448     {
 449         fprintf(debug, "Going to determine what solvent types we have.\n");
 450     }
 451
 452     mols = &mtop->mols;
 453
 454     n_solvent_parameters = 0;
 455     solvent_parameters   = NULL;
 456     /* Allocate temporary array for solvent type */
 457     snew(cg_sp, mtop->nmolblock);
 458
 459     cg_offset = 0;
 460     at_offset = 0;
 461     for (mb = 0; mb < mtop->nmolblock; mb++)
 462     {
 463         molt = &mtop->moltype[mtop->molblock[mb].type];
 464         cgs  = &molt->cgs;
 465         /* Here we have to loop over all individual molecules
 466          * because we need to check for QMMM particles.
 467          */
 468         snew(cg_sp[mb], cginfo_mb[mb].cg_mod);
 469         nmol_ch = cginfo_mb[mb].cg_mod/cgs->nr;
 470         nmol    = mtop->molblock[mb].nmol/nmol_ch;
 471         for (mol = 0; mol < nmol_ch; mol++)
 472         {
 473             cgm = mol*cgs->nr;
 474             am  = mol*cgs->index[cgs->nr];
 475             for (cg_mol = 0; cg_mol < cgs->nr; cg_mol++)
 476             {
 477                 check_solvent_cg(molt, cg_mol, nmol,
 478                                  mtop->groups.grpnr[egcQMMM] ?
 479                                  mtop->groups.grpnr[egcQMMM]+at_offset+am : 0,
 480                                  &mtop->groups.grps[egcQMMM],
 481                                  fr,
 482                                  &n_solvent_parameters, &solvent_parameters,
 483                                  cginfo_mb[mb].cginfo[cgm+cg_mol],
 484                                  &cg_sp[mb][cgm+cg_mol]);
 485             }
 486         }
 487         cg_offset += cgs->nr;
 488         at_offset += cgs->index[cgs->nr];
 489     }
 490
 491     /* Puh! We finished going through all charge groups.
 492      * Now find the most common solvent model.
 493      */
 494
 495     /* Most common solvent this far */
 496     bestsp = -2;
 497     for (i = 0; i < n_solvent_parameters; i++)
 498     {
 499         if (bestsp == -2 ||
 500             solvent_parameters[i].count > solvent_parameters[bestsp].count)
 501         {
 502             bestsp = i;
 503         }
 504     }
 505
 506     if (bestsp >= 0)
 507     {
 508         bestsol = solvent_parameters[bestsp].model;
 509     }
 510     else
 511     {
 512         bestsol = esolNO;
 513     }
 514
 515 #ifdef DISABLE_WATER_NLIST
 516     bestsol = esolNO;
 517 #endif
 518
 519     fr->nWatMol = 0;
 520     for (mb = 0; mb < mtop->nmolblock; mb++)
 521     {
 522         cgs  = &mtop->moltype[mtop->molblock[mb].type].cgs;
 523         nmol = (mtop->molblock[mb].nmol*cgs->nr)/cginfo_mb[mb].cg_mod;
 524         for (i = 0; i < cginfo_mb[mb].cg_mod; i++)
 525         {
 526             if (cg_sp[mb][i] == bestsp)
 527             {
 528                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], bestsol);
 529                 fr->nWatMol += nmol;
 530             }
 531             else
 532             {
 533                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[i], esolNO);
 534             }
 535         }
 536         sfree(cg_sp[mb]);
 537     }
 538     sfree(cg_sp);
 539
 540     if (bestsol != esolNO && fp != NULL)
 541     {
 542         fprintf(fp, "\nEnabling %s-like water optimization for %d molecules.\n\n",
 543                 esol_names[bestsol],
 544                 solvent_parameters[bestsp].count);
 545     }
 546
 547     sfree(solvent_parameters);
 548     fr->solvent_opt = bestsol;
 549 }
 550
 551 enum {
 552     acNONE = 0, acCONSTRAINT, acSETTLE
 553 };
 554
 555 static cginfo_mb_t *init_cginfo_mb(FILE *fplog, const gmx_mtop_t *mtop,
 556                                    t_forcerec *fr, gmx_bool bNoSolvOpt,
 557                                    gmx_bool *bExcl_IntraCGAll_InterCGNone)
 558 {
 559     const t_block        *cgs;
 560     const t_blocka       *excl;
 561     const gmx_moltype_t  *molt;
 562     const gmx_molblock_t *molb;
 563     cginfo_mb_t          *cginfo_mb;
 564     gmx_bool             *type_VDW;
 565     int                  *cginfo;
 566     int                   cg_offset, a_offset, cgm, am;
 567     int                   mb, m, ncg_tot, cg, a0, a1, gid, ai, j, aj, excl_nalloc;
 568     int                  *a_con;
 569     int                   ftype;
 570     int                   ia;
 571     gmx_bool              bId, *bExcl, bExclIntraAll, bExclInter, bHaveVDW, bHaveQ;
 572
 573     ncg_tot = ncg_mtop(mtop);
 574     snew(cginfo_mb, mtop->nmolblock);
 575
 576     snew(type_VDW, fr->ntype);
 577     for (ai = 0; ai < fr->ntype; ai++)
 578     {
 579         type_VDW[ai] = FALSE;
 580         for (j = 0; j < fr->ntype; j++)
 581         {
 582             type_VDW[ai] = type_VDW[ai] ||
 583                 fr->bBHAM ||
 584                 C6(fr->nbfp, fr->ntype, ai, j) != 0 ||
 585                 C12(fr->nbfp, fr->ntype, ai, j) != 0;
 586         }
 587     }
 588
 589     *bExcl_IntraCGAll_InterCGNone = TRUE;
 590
 591     excl_nalloc = 10;
 592     snew(bExcl, excl_nalloc);
 593     cg_offset = 0;
 594     a_offset  = 0;
 595     for (mb = 0; mb < mtop->nmolblock; mb++)
 596     {
 597         molb = &mtop->molblock[mb];
 598         molt = &mtop->moltype[molb->type];
 599         cgs  = &molt->cgs;
 600         excl = &molt->excls;
 601
 602         /* Check if the cginfo is identical for all molecules in this block.
 603          * If so, we only need an array of the size of one molecule.
 604          * Otherwise we make an array of #mol times #cgs per molecule.
 605          */
 606         bId = TRUE;
 607         am  = 0;
 608         for (m = 0; m < molb->nmol; m++)
 609         {
 610             am = m*cgs->index[cgs->nr];
 611             for (cg = 0; cg < cgs->nr; cg++)
 612             {
 613                 a0 = cgs->index[cg];
 614                 a1 = cgs->index[cg+1];
 615                 if (ggrpnr(&mtop->groups, egcENER, a_offset+am+a0) !=
 616                     ggrpnr(&mtop->groups, egcENER, a_offset   +a0))
 617                 {
 618                     bId = FALSE;
 619                 }
 620                 if (mtop->groups.grpnr[egcQMMM] != NULL)
 621                 {
 622                     for (ai = a0; ai < a1; ai++)
 623                     {
 624                         if (mtop->groups.grpnr[egcQMMM][a_offset+am+ai] !=
 625                             mtop->groups.grpnr[egcQMMM][a_offset   +ai])
 626                         {
 627                             bId = FALSE;
 628                         }
 629                     }
 630                 }
 631             }
 632         }
 633
 634         cginfo_mb[mb].cg_start = cg_offset;
 635         cginfo_mb[mb].cg_end   = cg_offset + molb->nmol*cgs->nr;
 636         cginfo_mb[mb].cg_mod   = (bId ? 1 : molb->nmol)*cgs->nr;
 637         snew(cginfo_mb[mb].cginfo, cginfo_mb[mb].cg_mod);
 638         cginfo = cginfo_mb[mb].cginfo;
 639
 640         /* Set constraints flags for constrained atoms */
 641         snew(a_con, molt->atoms.nr);
 642         for (ftype = 0; ftype < F_NRE; ftype++)
 643         {
 644             if (interaction_function[ftype].flags & IF_CONSTRAINT)
 645             {
 646                 int nral;
 647
 648                 nral = NRAL(ftype);
 649                 for (ia = 0; ia < molt->ilist[ftype].nr; ia += 1+nral)
 650                 {
 651                     int a;
 652
 653                     for (a = 0; a < nral; a++)
 654                     {
 655                         a_con[molt->ilist[ftype].iatoms[ia+1+a]] =
 656                             (ftype == F_SETTLE ? acSETTLE : acCONSTRAINT);
 657                     }
 658                 }
 659             }
 660         }
 661
 662         for (m = 0; m < (bId ? 1 : molb->nmol); m++)
 663         {
 664             cgm = m*cgs->nr;
 665             am  = m*cgs->index[cgs->nr];
 666             for (cg = 0; cg < cgs->nr; cg++)
 667             {
 668                 a0 = cgs->index[cg];
 669                 a1 = cgs->index[cg+1];
 670
 671                 /* Store the energy group in cginfo */
 672                 gid = ggrpnr(&mtop->groups, egcENER, a_offset+am+a0);
 673                 SET_CGINFO_GID(cginfo[cgm+cg], gid);
 674
 675                 /* Check the intra/inter charge group exclusions */
 676                 if (a1-a0 > excl_nalloc)
 677                 {
 678                     excl_nalloc = a1 - a0;
 679                     srenew(bExcl, excl_nalloc);
 680                 }
 681                 /* bExclIntraAll: all intra cg interactions excluded
 682                  * bExclInter:    any inter cg interactions excluded
 683                  */
 684                 bExclIntraAll = TRUE;
 685                 bExclInter    = FALSE;
 686                 bHaveVDW      = FALSE;
 687                 bHaveQ        = FALSE;
 688                 for (ai = a0; ai < a1; ai++)
 689                 {
 690                     /* Check VDW and electrostatic interactions */
 691                     bHaveVDW = bHaveVDW || (type_VDW[molt->atoms.atom[ai].type] ||
 692                                             type_VDW[molt->atoms.atom[ai].typeB]);
 693                     bHaveQ  = bHaveQ    || (molt->atoms.atom[ai].q != 0 ||
 694                                             molt->atoms.atom[ai].qB != 0);
 695
 696                     /* Clear the exclusion list for atom ai */
 697                     for (aj = a0; aj < a1; aj++)
 698                     {
 699                         bExcl[aj-a0] = FALSE;
 700                     }
 701                     /* Loop over all the exclusions of atom ai */
 702                     for (j = excl->index[ai]; j < excl->index[ai+1]; j++)
 703                     {
 704                         aj = excl->a[j];
 705                         if (aj < a0 || aj >= a1)
 706                         {
 707                             bExclInter = TRUE;
 708                         }
 709                         else
 710                         {
 711                             bExcl[aj-a0] = TRUE;
 712                         }
 713                     }
 714                     /* Check if ai excludes a0 to a1 */
 715                     for (aj = a0; aj < a1; aj++)
 716                     {
 717                         if (!bExcl[aj-a0])
 718                         {
 719                             bExclIntraAll = FALSE;
 720                         }
 721                     }
 722
 723                     switch (a_con[ai])
 724                     {
 725                         case acCONSTRAINT:
 726                             SET_CGINFO_CONSTR(cginfo[cgm+cg]);
 727                             break;
 728                         case acSETTLE:
 729                             SET_CGINFO_SETTLE(cginfo[cgm+cg]);
 730                             break;
 731                         default:
 732                             break;
 733                     }
 734                 }
 735                 if (bExclIntraAll)
 736                 {
 737                     SET_CGINFO_EXCL_INTRA(cginfo[cgm+cg]);
 738                 }
 739                 if (bExclInter)
 740                 {
 741                     SET_CGINFO_EXCL_INTER(cginfo[cgm+cg]);
 742                 }
 743                 if (a1 - a0 > MAX_CHARGEGROUP_SIZE)
 744                 {
 745                     /* The size in cginfo is currently only read with DD */
 746                     gmx_fatal(FARGS, "A charge group has size %d which is larger than the limit of %d atoms", a1-a0, MAX_CHARGEGROUP_SIZE);
 747                 }
 748                 if (bHaveVDW)
 749                 {
 750                     SET_CGINFO_HAS_VDW(cginfo[cgm+cg]);
 751                 }
 752                 if (bHaveQ)
 753                 {
 754                     SET_CGINFO_HAS_Q(cginfo[cgm+cg]);
 755                 }
 756                 /* Store the charge group size */
 757                 SET_CGINFO_NATOMS(cginfo[cgm+cg], a1-a0);
 758
 759                 if (!bExclIntraAll || bExclInter)
 760                 {
 761                     *bExcl_IntraCGAll_InterCGNone = FALSE;
 762                 }
 763             }
 764         }
 765
 766         sfree(a_con);
 767
 768         cg_offset += molb->nmol*cgs->nr;
 769         a_offset  += molb->nmol*cgs->index[cgs->nr];
 770     }
 771     sfree(bExcl);
 772
 773     /* the solvent optimizer is called after the QM is initialized,
 774      * because we don't want to have the QM subsystemto become an
 775      * optimized solvent
 776      */
 777
 778     check_solvent(fplog, mtop, fr, cginfo_mb);
 779
 780     if (getenv("GMX_NO_SOLV_OPT"))
 781     {
 782         if (fplog)
 783         {
 784             fprintf(fplog, "Found environment variable GMX_NO_SOLV_OPT.\n"
 785                     "Disabling all solvent optimization\n");
 786         }
 787         fr->solvent_opt = esolNO;
 788     }
 789     if (bNoSolvOpt)
 790     {
 791         fr->solvent_opt = esolNO;
 792     }
 793     if (!fr->solvent_opt)
 794     {
 795         for (mb = 0; mb < mtop->nmolblock; mb++)
 796         {
 797             for (cg = 0; cg < cginfo_mb[mb].cg_mod; cg++)
 798             {
 799                 SET_CGINFO_SOLOPT(cginfo_mb[mb].cginfo[cg], esolNO);
 800             }
 801         }
 802     }
 803
 804     return cginfo_mb;
 805 }
 806
 807 static int *cginfo_expand(int nmb, cginfo_mb_t *cgi_mb)
 808 {
 809     int  ncg, mb, cg;
 810     int *cginfo;
 811
 812     ncg = cgi_mb[nmb-1].cg_end;
 813     snew(cginfo, ncg);
 814     mb = 0;
 815     for (cg = 0; cg < ncg; cg++)
 816     {
 817         while (cg >= cgi_mb[mb].cg_end)
 818         {
 819             mb++;
 820         }
 821         cginfo[cg] =
 822             cgi_mb[mb].cginfo[(cg - cgi_mb[mb].cg_start) % cgi_mb[mb].cg_mod];
 823     }
 824
 825     return cginfo;
 826 }
 827
 828 static void set_chargesum(FILE *log, t_forcerec *fr, const gmx_mtop_t *mtop)
 829 {
 830     double         qsum, q2sum, q;
 831     int            mb, nmol, i;
 832     const t_atoms *atoms;
 833
 834     qsum  = 0;
 835     q2sum = 0;
 836     for (mb = 0; mb < mtop->nmolblock; mb++)
 837     {
 838         nmol  = mtop->molblock[mb].nmol;
 839         atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 840         for (i = 0; i < atoms->nr; i++)
 841         {
 842             q      = atoms->atom[i].q;
 843             qsum  += nmol*q;
 844             q2sum += nmol*q*q;
 845         }
 846     }
 847     fr->qsum[0]  = qsum;
 848     fr->q2sum[0] = q2sum;
 849     if (fr->efep != efepNO)
 850     {
 851         qsum  = 0;
 852         q2sum = 0;
 853         for (mb = 0; mb < mtop->nmolblock; mb++)
 854         {
 855             nmol  = mtop->molblock[mb].nmol;
 856             atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 857             for (i = 0; i < atoms->nr; i++)
 858             {
 859                 q      = atoms->atom[i].qB;
 860                 qsum  += nmol*q;
 861                 q2sum += nmol*q*q;
 862             }
 863             fr->qsum[1]  = qsum;
 864             fr->q2sum[1] = q2sum;
 865         }
 866     }
 867     else
 868     {
 869         fr->qsum[1]  = fr->qsum[0];
 870         fr->q2sum[1] = fr->q2sum[0];
 871     }
 872     if (log)
 873     {
 874         if (fr->efep == efepNO)
 875         {
 876             fprintf(log, "System total charge: %.3f\n", fr->qsum[0]);
 877         }
 878         else
 879         {
 880             fprintf(log, "System total charge, top. A: %.3f top. B: %.3f\n",
 881                     fr->qsum[0], fr->qsum[1]);
 882         }
 883     }
 884 }
 885
 886 void update_forcerec(FILE *log, t_forcerec *fr, matrix box)
 887 {
 888     if (fr->eeltype == eelGRF)
 889     {
 890         calc_rffac(NULL, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
 891                    fr->rcoulomb, fr->temp, fr->zsquare, box,
 892                    &fr->kappa, &fr->k_rf, &fr->c_rf);
 893     }
 894 }
 895
 896 void set_avcsixtwelve(FILE *fplog, t_forcerec *fr, const gmx_mtop_t *mtop)
 897 {
 898     const t_atoms  *atoms, *atoms_tpi;
 899     const t_blocka *excl;
 900     int             mb, nmol, nmolc, i, j, tpi, tpj, j1, j2, k, n, nexcl, q;
 901 #if (defined SIZEOF_LONG_LONG_INT) && (SIZEOF_LONG_LONG_INT >= 8)
 902     long long int   npair, npair_ij, tmpi, tmpj;
 903 #else
 904     double          npair, npair_ij, tmpi, tmpj;
 905 #endif
 906     double          csix, ctwelve;
 907     int             ntp, *typecount;
 908     gmx_bool        bBHAM;
 909     real           *nbfp;
 910
 911     ntp   = fr->ntype;
 912     bBHAM = fr->bBHAM;
 913     nbfp  = fr->nbfp;
 914
 915     for (q = 0; q < (fr->efep == efepNO ? 1 : 2); q++)
 916     {
 917         csix    = 0;
 918         ctwelve = 0;
 919         npair   = 0;
 920         nexcl   = 0;
 921         if (!fr->n_tpi)
 922         {
 923             /* Count the types so we avoid natoms^2 operations */
 924             snew(typecount, ntp);
 925             for (mb = 0; mb < mtop->nmolblock; mb++)
 926             {
 927                 nmol  = mtop->molblock[mb].nmol;
 928                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 929                 for (i = 0; i < atoms->nr; i++)
 930                 {
 931                     if (q == 0)
 932                     {
 933                         tpi = atoms->atom[i].type;
 934                     }
 935                     else
 936                     {
 937                         tpi = atoms->atom[i].typeB;
 938                     }
 939                     typecount[tpi] += nmol;
 940                 }
 941             }
 942             for (tpi = 0; tpi < ntp; tpi++)
 943             {
 944                 for (tpj = tpi; tpj < ntp; tpj++)
 945                 {
 946                     tmpi = typecount[tpi];
 947                     tmpj = typecount[tpj];
 948                     if (tpi != tpj)
 949                     {
 950                         npair_ij = tmpi*tmpj;
 951                     }
 952                     else
 953                     {
 954                         npair_ij = tmpi*(tmpi - 1)/2;
 955                     }
 956                     if (bBHAM)
 957                     {
 958                         /* nbfp now includes the 6.0 derivative prefactor */
 959                         csix    += npair_ij*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
 960                     }
 961                     else
 962                     {
 963                         /* nbfp now includes the 6.0/12.0 derivative prefactors */
 964                         csix    += npair_ij*   C6(nbfp, ntp, tpi, tpj)/6.0;
 965                         ctwelve += npair_ij*  C12(nbfp, ntp, tpi, tpj)/12.0;
 966                     }
 967                     npair += npair_ij;
 968                 }
 969             }
 970             sfree(typecount);
 971             /* Subtract the excluded pairs.
 972              * The main reason for substracting exclusions is that in some cases
 973              * some combinations might never occur and the parameters could have
 974              * any value. These unused values should not influence the dispersion
 975              * correction.
 976              */
 977             for (mb = 0; mb < mtop->nmolblock; mb++)
 978             {
 979                 nmol  = mtop->molblock[mb].nmol;
 980                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
 981                 excl  = &mtop->moltype[mtop->molblock[mb].type].excls;
 982                 for (i = 0; (i < atoms->nr); i++)
 983                 {
 984                     if (q == 0)
 985                     {
 986                         tpi = atoms->atom[i].type;
 987                     }
 988                     else
 989                     {
 990                         tpi = atoms->atom[i].typeB;
 991                     }
 992                     j1  = excl->index[i];
 993                     j2  = excl->index[i+1];
 994                     for (j = j1; j < j2; j++)
 995                     {
 996                         k = excl->a[j];
 997                         if (k > i)
 998                         {
 999                             if (q == 0)
1000                             {
1001                                 tpj = atoms->atom[k].type;
1002                             }
1003                             else
1004                             {
1005                                 tpj = atoms->atom[k].typeB;
1006                             }
1007                             if (bBHAM)
1008                             {
1009                                 /* nbfp now includes the 6.0 derivative prefactor */
1010                                 csix -= nmol*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1011                             }
1012                             else
1013                             {
1014                                 /* nbfp now includes the 6.0/12.0 derivative prefactors */
1015                                 csix    -= nmol*C6 (nbfp, ntp, tpi, tpj)/6.0;
1016                                 ctwelve -= nmol*C12(nbfp, ntp, tpi, tpj)/12.0;
1017                             }
1018                             nexcl += nmol;
1019                         }
1020                     }
1021                 }
1022             }
1023         }
1024         else
1025         {
1026             /* Only correct for the interaction of the test particle
1027              * with the rest of the system.
1028              */
1029             atoms_tpi =
1030                 &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].atoms;
1031
1032             npair = 0;
1033             for (mb = 0; mb < mtop->nmolblock; mb++)
1034             {
1035                 nmol  = mtop->molblock[mb].nmol;
1036                 atoms = &mtop->moltype[mtop->molblock[mb].type].atoms;
1037                 for (j = 0; j < atoms->nr; j++)
1038                 {
1039                     nmolc = nmol;
1040                     /* Remove the interaction of the test charge group
1041                      * with itself.
1042                      */
1043                     if (mb == mtop->nmolblock-1)
1044                     {
1045                         nmolc--;
1046
1047                         if (mb == 0 && nmol == 1)
1048                         {
1049                             gmx_fatal(FARGS, "Old format tpr with TPI, please generate a new tpr file");
1050                         }
1051                     }
1052                     if (q == 0)
1053                     {
1054                         tpj = atoms->atom[j].type;
1055                     }
1056                     else
1057                     {
1058                         tpj = atoms->atom[j].typeB;
1059                     }
1060                     for (i = 0; i < fr->n_tpi; i++)
1061                     {
1062                         if (q == 0)
1063                         {
1064                             tpi = atoms_tpi->atom[i].type;
1065                         }
1066                         else
1067                         {
1068                             tpi = atoms_tpi->atom[i].typeB;
1069                         }
1070                         if (bBHAM)
1071                         {
1072                             /* nbfp now includes the 6.0 derivative prefactor */
1073                             csix    += nmolc*BHAMC(nbfp, ntp, tpi, tpj)/6.0;
1074                         }
1075                         else
1076                         {
1077                             /* nbfp now includes the 6.0/12.0 derivative prefactors */
1078                             csix    += nmolc*C6 (nbfp, ntp, tpi, tpj)/6.0;
1079                             ctwelve += nmolc*C12(nbfp, ntp, tpi, tpj)/12.0;
1080                         }
1081                         npair += nmolc;
1082                     }
1083                 }
1084             }
1085         }
1086         if (npair - nexcl <= 0 && fplog)
1087         {
1088             fprintf(fplog, "\nWARNING: There are no atom pairs for dispersion correction\n\n");
1089             csix     = 0;
1090             ctwelve  = 0;
1091         }
1092         else
1093         {
1094             csix    /= npair - nexcl;
1095             ctwelve /= npair - nexcl;
1096         }
1097         if (debug)
1098         {
1099             fprintf(debug, "Counted %d exclusions\n", nexcl);
1100             fprintf(debug, "Average C6 parameter is: %10g\n", (double)csix);
1101             fprintf(debug, "Average C12 parameter is: %10g\n", (double)ctwelve);
1102         }
1103         fr->avcsix[q]    = csix;
1104         fr->avctwelve[q] = ctwelve;
1105     }
1106     if (fplog != NULL)
1107     {
1108         if (fr->eDispCorr == edispcAllEner ||
1109             fr->eDispCorr == edispcAllEnerPres)
1110         {
1111             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e, <C12> %10.4e\n",
1112                     fr->avcsix[0], fr->avctwelve[0]);
1113         }
1114         else
1115         {
1116             fprintf(fplog, "Long Range LJ corr.: <C6> %10.4e\n", fr->avcsix[0]);
1117         }
1118     }
1119 }
1120
1121
1122 static void set_bham_b_max(FILE *fplog, t_forcerec *fr,
1123                            const gmx_mtop_t *mtop)
1124 {
1125     const t_atoms *at1, *at2;
1126     int            mt1, mt2, i, j, tpi, tpj, ntypes;
1127     real           b, bmin;
1128     real          *nbfp;
1129
1130     if (fplog)
1131     {
1132         fprintf(fplog, "Determining largest Buckingham b parameter for table\n");
1133     }
1134     nbfp   = fr->nbfp;
1135     ntypes = fr->ntype;
1136
1137     bmin           = -1;
1138     fr->bham_b_max = 0;
1139     for (mt1 = 0; mt1 < mtop->nmoltype; mt1++)
1140     {
1141         at1 = &mtop->moltype[mt1].atoms;
1142         for (i = 0; (i < at1->nr); i++)
1143         {
1144             tpi = at1->atom[i].type;
1145             if (tpi >= ntypes)
1146             {
1147                 gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", i, tpi, ntypes);
1148             }
1149
1150             for (mt2 = mt1; mt2 < mtop->nmoltype; mt2++)
1151             {
1152                 at2 = &mtop->moltype[mt2].atoms;
1153                 for (j = 0; (j < at2->nr); j++)
1154                 {
1155                     tpj = at2->atom[j].type;
1156                     if (tpj >= ntypes)
1157                     {
1158                         gmx_fatal(FARGS, "Atomtype[%d] = %d, maximum = %d", j, tpj, ntypes);
1159                     }
1160                     b = BHAMB(nbfp, ntypes, tpi, tpj);
1161                     if (b > fr->bham_b_max)
1162                     {
1163                         fr->bham_b_max = b;
1164                     }
1165                     if ((b < bmin) || (bmin == -1))
1166                     {
1167                         bmin = b;
1168                     }
1169                 }
1170             }
1171         }
1172     }
1173     if (fplog)
1174     {
1175         fprintf(fplog, "Buckingham b parameters, min: %g, max: %g\n",
1176                 bmin, fr->bham_b_max);
1177     }
1178 }
1179
1180 static void make_nbf_tables(FILE *fp, const output_env_t oenv,
1181                             t_forcerec *fr, real rtab,
1182                             const t_commrec *cr,
1183                             const char *tabfn, char *eg1, char *eg2,
1184                             t_nblists *nbl)
1185 {
1186     char buf[STRLEN];
1187     int  i, j;
1188
1189     if (tabfn == NULL)
1190     {
1191         if (debug)
1192         {
1193             fprintf(debug, "No table file name passed, can not read table, can not do non-bonded interactions\n");
1194         }
1195         return;
1196     }
1197
1198     sprintf(buf, "%s", tabfn);
1199     if (eg1 && eg2)
1200     {
1201         /* Append the two energy group names */
1202         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "_%s_%s.%s",
1203                 eg1, eg2, ftp2ext(efXVG));
1204     }
1205     nbl->table_elec_vdw = make_tables(fp, oenv, fr, MASTER(cr), buf, rtab, 0);
1206     /* Copy the contents of the table to separate coulomb and LJ tables too,
1207      * to improve cache performance.
1208      */
1209     /* For performance reasons we want
1210      * the table data to be aligned to 16-byte. The pointers could be freed
1211      * but currently aren't.
1212      */
1213     nbl->table_elec.interaction   = GMX_TABLE_INTERACTION_ELEC;
1214     nbl->table_elec.format        = nbl->table_elec_vdw.format;
1215     nbl->table_elec.r             = nbl->table_elec_vdw.r;
1216     nbl->table_elec.n             = nbl->table_elec_vdw.n;
1217     nbl->table_elec.scale         = nbl->table_elec_vdw.scale;
1218     nbl->table_elec.scale_exp     = nbl->table_elec_vdw.scale_exp;
1219     nbl->table_elec.formatsize    = nbl->table_elec_vdw.formatsize;
1220     nbl->table_elec.ninteractions = 1;
1221     nbl->table_elec.stride        = nbl->table_elec.formatsize * nbl->table_elec.ninteractions;
1222     snew_aligned(nbl->table_elec.data, nbl->table_elec.stride*(nbl->table_elec.n+1), 32);
1223
1224     nbl->table_vdw.interaction   = GMX_TABLE_INTERACTION_VDWREP_VDWDISP;
1225     nbl->table_vdw.format        = nbl->table_elec_vdw.format;
1226     nbl->table_vdw.r             = nbl->table_elec_vdw.r;
1227     nbl->table_vdw.n             = nbl->table_elec_vdw.n;
1228     nbl->table_vdw.scale         = nbl->table_elec_vdw.scale;
1229     nbl->table_vdw.scale_exp     = nbl->table_elec_vdw.scale_exp;
1230     nbl->table_vdw.formatsize    = nbl->table_elec_vdw.formatsize;
1231     nbl->table_vdw.ninteractions = 2;
1232     nbl->table_vdw.stride        = nbl->table_vdw.formatsize * nbl->table_vdw.ninteractions;
1233     snew_aligned(nbl->table_vdw.data, nbl->table_vdw.stride*(nbl->table_vdw.n+1), 32);
1234
1235     for (i = 0; i <= nbl->table_elec_vdw.n; i++)
1236     {
1237         for (j = 0; j < 4; j++)
1238         {
1239             nbl->table_elec.data[4*i+j] = nbl->table_elec_vdw.data[12*i+j];
1240         }
1241         for (j = 0; j < 8; j++)
1242         {
1243             nbl->table_vdw.data[8*i+j] = nbl->table_elec_vdw.data[12*i+4+j];
1244         }
1245     }
1246 }
1247
1248 static void count_tables(int ftype1, int ftype2, const gmx_mtop_t *mtop,
1249                          int *ncount, int **count)
1250 {
1251     const gmx_moltype_t *molt;
1252     const t_ilist       *il;
1253     int                  mt, ftype, stride, i, j, tabnr;
1254
1255     for (mt = 0; mt < mtop->nmoltype; mt++)
1256     {
1257         molt = &mtop->moltype[mt];
1258         for (ftype = 0; ftype < F_NRE; ftype++)
1259         {
1260             if (ftype == ftype1 || ftype == ftype2)
1261             {
1262                 il     = &molt->ilist[ftype];
1263                 stride = 1 + NRAL(ftype);
1264                 for (i = 0; i < il->nr; i += stride)
1265                 {
1266                     tabnr = mtop->ffparams.iparams[il->iatoms[i]].tab.table;
1267                     if (tabnr < 0)
1268                     {
1269                         gmx_fatal(FARGS, "A bonded table number is smaller than 0: %d\n", tabnr);
1270                     }
1271                     if (tabnr >= *ncount)
1272                     {
1273                         srenew(*count, tabnr+1);
1274                         for (j = *ncount; j < tabnr+1; j++)
1275                         {
1276                             (*count)[j] = 0;
1277                         }
1278                         *ncount = tabnr+1;
1279                     }
1280                     (*count)[tabnr]++;
1281                 }
1282             }
1283         }
1284     }
1285 }
1286
1287 static bondedtable_t *make_bonded_tables(FILE *fplog,
1288                                          int ftype1, int ftype2,
1289                                          const gmx_mtop_t *mtop,
1290                                          const char *basefn, const char *tabext)
1291 {
1292     int            i, ncount, *count;
1293     char           tabfn[STRLEN];
1294     bondedtable_t *tab;
1295
1296     tab = NULL;
1297
1298     ncount = 0;
1299     count  = NULL;
1300     count_tables(ftype1, ftype2, mtop, &ncount, &count);
1301
1302     if (ncount > 0)
1303     {
1304         snew(tab, ncount);
1305         for (i = 0; i < ncount; i++)
1306         {
1307             if (count[i] > 0)
1308             {
1309                 sprintf(tabfn, "%s", basefn);
1310                 sprintf(tabfn + strlen(basefn) - strlen(ftp2ext(efXVG)) - 1, "_%s%d.%s",
1311                         tabext, i, ftp2ext(efXVG));
1312                 tab[i] = make_bonded_table(fplog, tabfn, NRAL(ftype1)-2);
1313             }
1314         }
1315         sfree(count);
1316     }
1317
1318     return tab;
1319 }
1320
1321 void forcerec_set_ranges(t_forcerec *fr,
1322                          int ncg_home, int ncg_force,
1323                          int natoms_force,
1324                          int natoms_force_constr, int natoms_f_novirsum)
1325 {
1326     fr->cg0 = 0;
1327     fr->hcg = ncg_home;
1328
1329     /* fr->ncg_force is unused in the standard code,
1330      * but it can be useful for modified code dealing with charge groups.
1331      */
1332     fr->ncg_force           = ncg_force;
1333     fr->natoms_force        = natoms_force;
1334     fr->natoms_force_constr = natoms_force_constr;
1335
1336     if (fr->natoms_force_constr > fr->nalloc_force)
1337     {
1338         fr->nalloc_force = over_alloc_dd(fr->natoms_force_constr);
1339
1340         if (fr->bTwinRange)
1341         {
1342             srenew(fr->f_twin, fr->nalloc_force);
1343         }
1344     }
1345
1346     if (fr->bF_NoVirSum)
1347     {
1348         fr->f_novirsum_n = natoms_f_novirsum;
1349         if (fr->f_novirsum_n > fr->f_novirsum_nalloc)
1350         {
1351             fr->f_novirsum_nalloc = over_alloc_dd(fr->f_novirsum_n);
1352             srenew(fr->f_novirsum_alloc, fr->f_novirsum_nalloc);
1353         }
1354     }
1355     else
1356     {
1357         fr->f_novirsum_n = 0;
1358     }
1359 }
1360
1361 static real cutoff_inf(real cutoff)
1362 {
1363     if (cutoff == 0)
1364     {
1365         cutoff = GMX_CUTOFF_INF;
1366     }
1367
1368     return cutoff;
1369 }
1370
1371 static void make_adress_tf_tables(FILE *fp, const output_env_t oenv,
1372                                   t_forcerec *fr, const t_inputrec *ir,
1373                                   const char *tabfn, const gmx_mtop_t *mtop,
1374                                   matrix     box)
1375 {
1376     char buf[STRLEN];
1377     int  i, j;
1378
1379     if (tabfn == NULL)
1380     {
1381         gmx_fatal(FARGS, "No thermoforce table file given. Use -tabletf to specify a file\n");
1382         return;
1383     }
1384
1385     snew(fr->atf_tabs, ir->adress->n_tf_grps);
1386
1387     sprintf(buf, "%s", tabfn);
1388     for (i = 0; i < ir->adress->n_tf_grps; i++)
1389     {
1390         j = ir->adress->tf_table_index[i]; /* get energy group index */
1391         sprintf(buf + strlen(tabfn) - strlen(ftp2ext(efXVG)) - 1, "tf_%s.%s",
1392                 *(mtop->groups.grpname[mtop->groups.grps[egcENER].nm_ind[j]]), ftp2ext(efXVG));
1393         if (fp)
1394         {
1395             fprintf(fp, "loading tf table for energygrp index %d from %s\n", ir->adress->tf_table_index[i], buf);
1396         }
1397         fr->atf_tabs[i] = make_atf_table(fp, oenv, fr, buf, box);
1398     }
1399
1400 }
1401
1402 gmx_bool can_use_allvsall(const t_inputrec *ir, const gmx_mtop_t *mtop,
1403                           gmx_bool bPrintNote, t_commrec *cr, FILE *fp)
1404 {
1405     gmx_bool bAllvsAll;
1406
1407     bAllvsAll =
1408         (
1409             ir->rlist == 0            &&
1410             ir->rcoulomb == 0         &&
1411             ir->rvdw == 0             &&
1412             ir->ePBC == epbcNONE      &&
1413             ir->vdwtype == evdwCUT    &&
1414             ir->coulombtype == eelCUT &&
1415             ir->efep == efepNO        &&
1416             (ir->implicit_solvent == eisNO ||
1417              (ir->implicit_solvent == eisGBSA && (ir->gb_algorithm == egbSTILL ||
1418                                                   ir->gb_algorithm == egbHCT   ||
1419                                                   ir->gb_algorithm == egbOBC))) &&
1420             getenv("GMX_NO_ALLVSALL") == NULL
1421         );
1422
1423     if (bAllvsAll && ir->opts.ngener > 1)
1424     {
1425         const char *note = "NOTE: Can not use all-vs-all force loops, because there are multiple energy monitor groups; you might get significantly higher performance when using only a single energy monitor group.\n";
1426
1427         if (bPrintNote)
1428         {
1429             if (MASTER(cr))
1430             {
1431                 fprintf(stderr, "\n%s\n", note);
1432             }
1433             if (fp != NULL)
1434             {
1435                 fprintf(fp, "\n%s\n", note);
1436             }
1437         }
1438         bAllvsAll = FALSE;
1439     }
1440
1441     if (bAllvsAll && fp && MASTER(cr))
1442     {
1443         fprintf(fp, "\nUsing accelerated all-vs-all kernels.\n\n");
1444     }
1445
1446     return bAllvsAll;
1447 }
1448
1449
1450 static void init_forcerec_f_threads(t_forcerec *fr, int nenergrp)
1451 {
1452     int t, i;
1453
1454     /* These thread local data structures are used for bondeds only */
1455     fr->nthreads = gmx_omp_nthreads_get(emntBonded);
1456
1457     if (fr->nthreads > 1)
1458     {
1459         snew(fr->f_t, fr->nthreads);
1460         /* Thread 0 uses the global force and energy arrays */
1461         for (t = 1; t < fr->nthreads; t++)
1462         {
1463             fr->f_t[t].f        = NULL;
1464             fr->f_t[t].f_nalloc = 0;
1465             snew(fr->f_t[t].fshift, SHIFTS);
1466             fr->f_t[t].grpp.nener = nenergrp*nenergrp;
1467             for (i = 0; i < egNR; i++)
1468             {
1469                 snew(fr->f_t[t].grpp.ener[i], fr->f_t[t].grpp.nener);
1470             }
1471         }
1472     }
1473 }
1474
1475
1476 static void pick_nbnxn_kernel_cpu(FILE             *fp,
1477                                   const t_commrec  *cr,
1478                                   const gmx_cpuid_t cpuid_info,
1479                                   const t_inputrec *ir,
1480                                   int              *kernel_type,
1481                                   int              *ewald_excl)
1482 {
1483     *kernel_type = nbnxnk4x4_PlainC;
1484     *ewald_excl  = ewaldexclTable;
1485
1486 #ifdef GMX_NBNXN_SIMD
1487     {
1488 #ifdef GMX_NBNXN_SIMD_4XN
1489         *kernel_type = nbnxnk4xN_SIMD_4xN;
1490 #endif
1491 #ifdef GMX_NBNXN_SIMD_2XNN
1492         /* We expect the 2xNN kernels to be faster in most cases */
1493         *kernel_type = nbnxnk4xN_SIMD_2xNN;
1494 #endif
1495
1496 #if defined GMX_NBNXN_SIMD_4XN && defined GMX_X86_AVX_256
1497         if (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT)
1498         {
1499             /* The raw pair rate of the 4x8 kernel is higher than 2x(4+4),
1500              * 10% with HT, 50% without HT, but extra zeros interactions
1501              * can compensate. As we currently don't detect the actual use
1502              * of HT, switch to 4x8 to avoid a potential performance hit.
1503              */
1504             *kernel_type = nbnxnk4xN_SIMD_4xN;
1505         }
1506 #endif
1507         if (getenv("GMX_NBNXN_SIMD_4XN") != NULL)
1508         {
1509 #ifdef GMX_NBNXN_SIMD_4XN
1510             *kernel_type = nbnxnk4xN_SIMD_4xN;
1511 #else
1512             gmx_fatal(FARGS, "SIMD 4xN kernels requested, but Gromacs has been compiled without support for these kernels");
1513 #endif
1514         }
1515         if (getenv("GMX_NBNXN_SIMD_2XNN") != NULL)
1516         {
1517 #ifdef GMX_NBNXN_SIMD_2XNN
1518             *kernel_type = nbnxnk4xN_SIMD_2xNN;
1519 #else
1520             gmx_fatal(FARGS, "SIMD 2x(N+N) kernels requested, but Gromacs has been compiled without support for these kernels");
1521 #endif
1522         }
1523
1524         /* Analytical Ewald exclusion correction is only an option in the
1525          * x86 SIMD kernel. This is faster in single precision
1526          * on Bulldozer and slightly faster on Sandy Bridge.
1527          */
1528 #if (defined GMX_X86_AVX_128_FMA || defined GMX_X86_AVX_256) && !defined GMX_DOUBLE
1529         *ewald_excl = ewaldexclAnalytical;
1530 #endif
1531         if (getenv("GMX_NBNXN_EWALD_TABLE") != NULL)
1532         {
1533             *ewald_excl = ewaldexclTable;
1534         }
1535         if (getenv("GMX_NBNXN_EWALD_ANALYTICAL") != NULL)
1536         {
1537             *ewald_excl = ewaldexclAnalytical;
1538         }
1539
1540     }
1541 #endif /* GMX_X86_SSE2 */
1542 }
1543
1544
1545 const char *lookup_nbnxn_kernel_name(int kernel_type)
1546 {
1547     const char *returnvalue = NULL;
1548     switch (kernel_type)
1549     {
1550         case nbnxnkNotSet: returnvalue     = "not set"; break;
1551         case nbnxnk4x4_PlainC: returnvalue = "plain C"; break;
1552 #ifndef GMX_NBNXN_SIMD
1553         case nbnxnk4xN_SIMD_4xN: returnvalue  = "not available"; break;
1554         case nbnxnk4xN_SIMD_2xNN: returnvalue = "not available"; break;
1555 #else
1556 #ifdef GMX_X86_SSE2
1557 #if GMX_NBNXN_SIMD_BITWIDTH == 128
1558             /* x86 SIMD intrinsics can be converted to either SSE or AVX depending
1559              * on compiler flags. As we use nearly identical intrinsics, using an AVX
1560              * compiler flag without an AVX macro effectively results in AVX kernels.
1561              * For gcc we check for __AVX__
1562              * At least a check for icc should be added (if there is a macro)
1563              */
1564 #if !(defined GMX_X86_AVX_128_FMA || defined __AVX__)
1565 #ifndef GMX_X86_SSE4_1
1566         case nbnxnk4xN_SIMD_4xN: returnvalue  = "SSE2"; break;
1567         case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE2"; break;
1568 #else
1569         case nbnxnk4xN_SIMD_4xN: returnvalue  = "SSE4.1"; break;
1570         case nbnxnk4xN_SIMD_2xNN: returnvalue = "SSE4.1"; break;
1571 #endif
1572 #else
1573         case nbnxnk4xN_SIMD_4xN: returnvalue  = "AVX-128"; break;
1574         case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-128"; break;
1575 #endif
1576 #endif
1577 #if GMX_NBNXN_SIMD_BITWIDTH == 256
1578         case nbnxnk4xN_SIMD_4xN: returnvalue  = "AVX-256"; break;
1579         case nbnxnk4xN_SIMD_2xNN: returnvalue = "AVX-256"; break;
1580 #endif
1581 #else   /* not GMX_X86_SSE2 */
1582         case nbnxnk4xN_SIMD_4xN: returnvalue  = "SIMD"; break;
1583         case nbnxnk4xN_SIMD_2xNN: returnvalue = "SIMD"; break;
1584 #endif
1585 #endif
1586         case nbnxnk8x8x8_CUDA: returnvalue   = "CUDA"; break;
1587         case nbnxnk8x8x8_PlainC: returnvalue = "plain C"; break;
1588
1589         case nbnxnkNR:
1590         default:
1591             gmx_fatal(FARGS, "Illegal kernel type selected");
1592             returnvalue = NULL;
1593             break;
1594     }
1595     return returnvalue;
1596 };
1597
1598 static void pick_nbnxn_kernel(FILE                *fp,
1599                               const t_commrec     *cr,
1600                               const gmx_hw_info_t *hwinfo,
1601                               gmx_bool             use_cpu_acceleration,
1602                               gmx_bool             bUseGPU,
1603                               gmx_bool             bEmulateGPU,
1604                               const t_inputrec    *ir,
1605                               int                 *kernel_type,
1606                               int                 *ewald_excl,
1607                               gmx_bool             bDoNonbonded)
1608 {
1609     assert(kernel_type);
1610
1611     *kernel_type = nbnxnkNotSet;
1612     *ewald_excl  = ewaldexclTable;
1613
1614     if (bEmulateGPU)
1615     {
1616         *kernel_type = nbnxnk8x8x8_PlainC;
1617
1618         if (bDoNonbonded)
1619         {
1620             md_print_warn(cr, fp, "Emulating a GPU run on the CPU (slow)");
1621         }
1622     }
1623     else if (bUseGPU)
1624     {
1625         *kernel_type = nbnxnk8x8x8_CUDA;
1626     }
1627
1628     if (*kernel_type == nbnxnkNotSet)
1629     {
1630         if (use_cpu_acceleration)
1631         {
1632             pick_nbnxn_kernel_cpu(fp, cr, hwinfo->cpuid_info, ir,
1633                                   kernel_type, ewald_excl);
1634         }
1635         else
1636         {
1637             *kernel_type = nbnxnk4x4_PlainC;
1638         }
1639     }
1640
1641     if (bDoNonbonded && fp != NULL)
1642     {
1643         fprintf(fp, "\nUsing %s %dx%d non-bonded kernels\n\n",
1644                 lookup_nbnxn_kernel_name(*kernel_type),
1645                 nbnxn_kernel_pairlist_simple(*kernel_type) ? NBNXN_CPU_CLUSTER_I_SIZE : NBNXN_GPU_CLUSTER_SIZE,
1646                 nbnxn_kernel_to_cj_size(*kernel_type));
1647     }
1648 }
1649
1650 static void pick_nbnxn_resources(FILE                *fp,
1651                                  const t_commrec     *cr,
1652                                  const gmx_hw_info_t *hwinfo,
1653                                  gmx_bool             bDoNonbonded,
1654                                  gmx_bool            *bUseGPU,
1655                                  gmx_bool            *bEmulateGPU)
1656 {
1657     gmx_bool bEmulateGPUEnvVarSet;
1658     char     gpu_err_str[STRLEN];
1659
1660     *bUseGPU = FALSE;
1661
1662     bEmulateGPUEnvVarSet = (getenv("GMX_EMULATE_GPU") != NULL);
1663
1664     /* Run GPU emulation mode if GMX_EMULATE_GPU is defined. Because
1665      * GPUs (currently) only handle non-bonded calculations, we will
1666      * automatically switch to emulation if non-bonded calculations are
1667      * turned off via GMX_NO_NONBONDED - this is the simple and elegant
1668      * way to turn off GPU initialization, data movement, and cleanup.
1669      *
1670      * GPU emulation can be useful to assess the performance one can expect by
1671      * adding GPU(s) to the machine. The conditional below allows this even
1672      * if mdrun is compiled without GPU acceleration support.
1673      * Note that you should freezing the system as otherwise it will explode.
1674      */
1675     *bEmulateGPU = (bEmulateGPUEnvVarSet ||
1676                     (!bDoNonbonded && hwinfo->bCanUseGPU));
1677
1678     /* Enable GPU mode when GPUs are available or no GPU emulation is requested.
1679      */
1680     if (hwinfo->bCanUseGPU && !(*bEmulateGPU))
1681     {
1682         /* Each PP node will use the intra-node id-th device from the
1683          * list of detected/selected GPUs. */
1684         if (!init_gpu(cr->rank_pp_intranode, gpu_err_str, &hwinfo->gpu_info))
1685         {
1686             /* At this point the init should never fail as we made sure that
1687              * we have all the GPUs we need. If it still does, we'll bail. */
1688             gmx_fatal(FARGS, "On node %d failed to initialize GPU #%d: %s",
1689                       cr->nodeid,
1690                       get_gpu_device_id(&hwinfo->gpu_info, cr->rank_pp_intranode),
1691                       gpu_err_str);
1692         }
1693
1694         /* Here we actually turn on hardware GPU acceleration */
1695         *bUseGPU = TRUE;
1696     }
1697 }
1698
1699 gmx_bool uses_simple_tables(int                 cutoff_scheme,
1700                             nonbonded_verlet_t *nbv,
1701                             int                 group)
1702 {
1703     gmx_bool bUsesSimpleTables = TRUE;
1704     int      grp_index;
1705
1706     switch (cutoff_scheme)
1707     {
1708         case ecutsGROUP:
1709             bUsesSimpleTables = TRUE;
1710             break;
1711         case ecutsVERLET:
1712             assert(NULL != nbv && NULL != nbv->grp);
1713             grp_index         = (group < 0) ? 0 : (nbv->ngrp - 1);
1714             bUsesSimpleTables = nbnxn_kernel_pairlist_simple(nbv->grp[grp_index].kernel_type);
1715             break;
1716         default:
1717             gmx_incons("unimplemented");
1718     }
1719     return bUsesSimpleTables;
1720 }
1721
1722 static void init_ewald_f_table(interaction_const_t *ic,
1723                                gmx_bool             bUsesSimpleTables,
1724                                real                 rtab)
1725 {
1726     real maxr;
1727
1728     if (bUsesSimpleTables)
1729     {
1730         /* With a spacing of 0.0005 we are at the force summation accuracy
1731          * for the SSE kernels for "normal" atomistic simulations.
1732          */
1733         ic->tabq_scale = ewald_spline3_table_scale(ic->ewaldcoeff,
1734                                                    ic->rcoulomb);
1735
1736         maxr           = (rtab > ic->rcoulomb) ? rtab : ic->rcoulomb;
1737         ic->tabq_size  = (int)(maxr*ic->tabq_scale) + 2;
1738     }
1739     else
1740     {
1741         ic->tabq_size = GPU_EWALD_COULOMB_FORCE_TABLE_SIZE;
1742         /* Subtract 2 iso 1 to avoid access out of range due to rounding */
1743         ic->tabq_scale = (ic->tabq_size - 2)/ic->rcoulomb;
1744     }
1745
1746     sfree_aligned(ic->tabq_coul_FDV0);
1747     sfree_aligned(ic->tabq_coul_F);
1748     sfree_aligned(ic->tabq_coul_V);
1749
1750     /* Create the original table data in FDV0 */
1751     snew_aligned(ic->tabq_coul_FDV0, ic->tabq_size*4, 32);
1752     snew_aligned(ic->tabq_coul_F, ic->tabq_size, 32);
1753     snew_aligned(ic->tabq_coul_V, ic->tabq_size, 32);
1754     table_spline3_fill_ewald_lr(ic->tabq_coul_F, ic->tabq_coul_V, ic->tabq_coul_FDV0,
1755                                 ic->tabq_size, 1/ic->tabq_scale, ic->ewaldcoeff);
1756 }
1757
1758 void init_interaction_const_tables(FILE                *fp,
1759                                    interaction_const_t *ic,
1760                                    gmx_bool             bUsesSimpleTables,
1761                                    real                 rtab)
1762 {
1763     real spacing;
1764
1765     if (ic->eeltype == eelEWALD || EEL_PME(ic->eeltype))
1766     {
1767         init_ewald_f_table(ic, bUsesSimpleTables, rtab);
1768
1769         if (fp != NULL)
1770         {
1771             fprintf(fp, "Initialized non-bonded Ewald correction tables, spacing: %.2e size: %d\n\n",
1772                     1/ic->tabq_scale, ic->tabq_size);
1773         }
1774     }
1775 }
1776
1777 void init_interaction_const(FILE                 *fp,
1778                             interaction_const_t **interaction_const,
1779                             const t_forcerec     *fr,
1780                             real                  rtab)
1781 {
1782     interaction_const_t *ic;
1783     gmx_bool             bUsesSimpleTables = TRUE;
1784
1785     snew(ic, 1);
1786
1787     /* Just allocate something so we can free it */
1788     snew_aligned(ic->tabq_coul_FDV0, 16, 32);
1789     snew_aligned(ic->tabq_coul_F, 16, 32);
1790     snew_aligned(ic->tabq_coul_V, 16, 32);
1791
1792     ic->rlist       = fr->rlist;
1793     ic->rlistlong   = fr->rlistlong;
1794
1795     /* Lennard-Jones */
1796     ic->rvdw        = fr->rvdw;
1797     if (fr->vdw_modifier == eintmodPOTSHIFT)
1798     {
1799         ic->sh_invrc6 = pow(ic->rvdw, -6.0);
1800     }
1801     else
1802     {
1803         ic->sh_invrc6 = 0;
1804     }
1805
1806     /* Electrostatics */
1807     ic->eeltype     = fr->eeltype;
1808     ic->rcoulomb    = fr->rcoulomb;
1809     ic->epsilon_r   = fr->epsilon_r;
1810     ic->epsfac      = fr->epsfac;
1811
1812     /* Ewald */
1813     ic->ewaldcoeff  = fr->ewaldcoeff;
1814     if (fr->coulomb_modifier == eintmodPOTSHIFT)
1815     {
1816         ic->sh_ewald = gmx_erfc(ic->ewaldcoeff*ic->rcoulomb);
1817     }
1818     else
1819     {
1820         ic->sh_ewald = 0;
1821     }
1822
1823     /* Reaction-field */
1824     if (EEL_RF(ic->eeltype))
1825     {
1826         ic->epsilon_rf = fr->epsilon_rf;
1827         ic->k_rf       = fr->k_rf;
1828         ic->c_rf       = fr->c_rf;
1829     }
1830     else
1831     {
1832         /* For plain cut-off we might use the reaction-field kernels */
1833         ic->epsilon_rf = ic->epsilon_r;
1834         ic->k_rf       = 0;
1835         if (fr->coulomb_modifier == eintmodPOTSHIFT)
1836         {
1837             ic->c_rf   = 1/ic->rcoulomb;
1838         }
1839         else
1840         {
1841             ic->c_rf   = 0;
1842         }
1843     }
1844
1845     if (fp != NULL)
1846     {
1847         fprintf(fp, "Potential shift: LJ r^-12: %.3f r^-6 %.3f",
1848                 sqr(ic->sh_invrc6), ic->sh_invrc6);
1849         if (ic->eeltype == eelCUT)
1850         {
1851             fprintf(fp, ", Coulomb %.3f", ic->c_rf);
1852         }
1853         else if (EEL_PME(ic->eeltype))
1854         {
1855             fprintf(fp, ", Ewald %.3e", ic->sh_ewald);
1856         }
1857         fprintf(fp, "\n");
1858     }
1859
1860     *interaction_const = ic;
1861
1862     if (fr->nbv != NULL && fr->nbv->bUseGPU)
1863     {
1864         nbnxn_cuda_init_const(fr->nbv->cu_nbv, ic, fr->nbv->grp);
1865     }
1866
1867     bUsesSimpleTables = uses_simple_tables(fr->cutoff_scheme, fr->nbv, -1);
1868     init_interaction_const_tables(fp, ic, bUsesSimpleTables, rtab);
1869 }
1870
1871 static void init_nb_verlet(FILE                *fp,
1872                            nonbonded_verlet_t **nb_verlet,
1873                            const t_inputrec    *ir,
1874                            const t_forcerec    *fr,
1875                            const t_commrec     *cr,
1876                            const char          *nbpu_opt)
1877 {
1878     nonbonded_verlet_t *nbv;
1879     int                 i;
1880     char               *env;
1881     gmx_bool            bEmulateGPU, bHybridGPURun = FALSE;
1882
1883     nbnxn_alloc_t      *nb_alloc;
1884     nbnxn_free_t       *nb_free;
1885
1886     snew(nbv, 1);
1887
1888     pick_nbnxn_resources(fp, cr, fr->hwinfo,
1889                          fr->bNonbonded,
1890                          &nbv->bUseGPU,
1891                          &bEmulateGPU);
1892
1893     nbv->nbs = NULL;
1894
1895     nbv->ngrp = (DOMAINDECOMP(cr) ? 2 : 1);
1896     for (i = 0; i < nbv->ngrp; i++)
1897     {
1898         nbv->grp[i].nbl_lists.nnbl = 0;
1899         nbv->grp[i].nbat           = NULL;
1900         nbv->grp[i].kernel_type    = nbnxnkNotSet;
1901
1902         if (i == 0) /* local */
1903         {
1904             pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1905                               nbv->bUseGPU, bEmulateGPU,
1906                               ir,
1907                               &nbv->grp[i].kernel_type,
1908                               &nbv->grp[i].ewald_excl,
1909                               fr->bNonbonded);
1910         }
1911         else /* non-local */
1912         {
1913             if (nbpu_opt != NULL && strcmp(nbpu_opt, "gpu_cpu") == 0)
1914             {
1915                 /* Use GPU for local, select a CPU kernel for non-local */
1916                 pick_nbnxn_kernel(fp, cr, fr->hwinfo, fr->use_cpu_acceleration,
1917                                   FALSE, FALSE,
1918                                   ir,
1919                                   &nbv->grp[i].kernel_type,
1920                                   &nbv->grp[i].ewald_excl,
1921                                   fr->bNonbonded);
1922
1923                 bHybridGPURun = TRUE;
1924             }
1925             else
1926             {
1927                 /* Use the same kernel for local and non-local interactions */
1928                 nbv->grp[i].kernel_type = nbv->grp[0].kernel_type;
1929                 nbv->grp[i].ewald_excl  = nbv->grp[0].ewald_excl;
1930             }
1931         }
1932     }
1933
1934     if (nbv->bUseGPU)
1935     {
1936         /* init the NxN GPU data; the last argument tells whether we'll have
1937          * both local and non-local NB calculation on GPU */
1938         nbnxn_cuda_init(fp, &nbv->cu_nbv,
1939                         &fr->hwinfo->gpu_info, cr->rank_pp_intranode,
1940                         (nbv->ngrp > 1) && !bHybridGPURun);
1941
1942         if ((env = getenv("GMX_NB_MIN_CI")) != NULL)
1943         {
1944             char *end;
1945
1946             nbv->min_ci_balanced = strtol(env, &end, 10);
1947             if (!end || (*end != 0) || nbv->min_ci_balanced <= 0)
1948             {
1949                 gmx_fatal(FARGS, "Invalid value passed in GMX_NB_MIN_CI=%s, positive integer required", env);
1950             }
1951
1952             if (debug)
1953             {
1954                 fprintf(debug, "Neighbor-list balancing parameter: %d (passed as env. var.)\n",
1955                         nbv->min_ci_balanced);
1956             }
1957         }
1958         else
1959         {
1960             nbv->min_ci_balanced = nbnxn_cuda_min_ci_balanced(nbv->cu_nbv);
1961             if (debug)
1962             {
1963                 fprintf(debug, "Neighbor-list balancing parameter: %d (auto-adjusted to the number of GPU multi-processors)\n",
1964                         nbv->min_ci_balanced);
1965             }
1966         }
1967     }
1968     else
1969     {
1970         nbv->min_ci_balanced = 0;
1971     }
1972
1973     *nb_verlet = nbv;
1974
1975     nbnxn_init_search(&nbv->nbs,
1976                       DOMAINDECOMP(cr) ? &cr->dd->nc : NULL,
1977                       DOMAINDECOMP(cr) ? domdec_zones(cr->dd) : NULL,
1978                       gmx_omp_nthreads_get(emntNonbonded));
1979
1980     for (i = 0; i < nbv->ngrp; i++)
1981     {
1982         if (nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
1983         {
1984             nb_alloc = &pmalloc;
1985             nb_free  = &pfree;
1986         }
1987         else
1988         {
1989             nb_alloc = NULL;
1990             nb_free  = NULL;
1991         }
1992
1993         nbnxn_init_pairlist_set(&nbv->grp[i].nbl_lists,
1994                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
1995                                 /* 8x8x8 "non-simple" lists are ATM always combined */
1996                                 !nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type),
1997                                 nb_alloc, nb_free);
1998
1999         if (i == 0 ||
2000             nbv->grp[0].kernel_type != nbv->grp[i].kernel_type)
2001         {
2002             snew(nbv->grp[i].nbat, 1);
2003             nbnxn_atomdata_init(fp,
2004                                 nbv->grp[i].nbat,
2005                                 nbv->grp[i].kernel_type,
2006                                 fr->ntype, fr->nbfp,
2007                                 ir->opts.ngener,
2008                                 nbnxn_kernel_pairlist_simple(nbv->grp[i].kernel_type) ? gmx_omp_nthreads_get(emntNonbonded) : 1,
2009                                 nb_alloc, nb_free);
2010         }
2011         else
2012         {
2013             nbv->grp[i].nbat = nbv->grp[0].nbat;
2014         }
2015     }
2016 }
2017
2018 void init_forcerec(FILE              *fp,
2019                    const output_env_t oenv,
2020                    t_forcerec        *fr,
2021                    t_fcdata          *fcd,
2022                    const t_inputrec  *ir,
2023                    const gmx_mtop_t  *mtop,
2024                    const t_commrec   *cr,
2025                    matrix             box,
2026                    gmx_bool           bMolEpot,
2027                    const char        *tabfn,
2028                    const char        *tabafn,
2029                    const char        *tabpfn,
2030                    const char        *tabbfn,
2031                    const char        *nbpu_opt,
2032                    gmx_bool           bNoSolvOpt,
2033                    real               print_force)
2034 {
2035     int            i, j, m, natoms, ngrp, negp_pp, negptable, egi, egj;
2036     real           rtab;
2037     char          *env;
2038     double         dbl;
2039     rvec           box_size;
2040     const t_block *cgs;
2041     gmx_bool       bGenericKernelOnly;
2042     gmx_bool       bTab, bSep14tab, bNormalnblists;
2043     t_nblists     *nbl;
2044     int           *nm_ind, egp_flags;
2045
2046     if (fr->hwinfo == NULL)
2047     {
2048         /* Detect hardware, gather information.
2049          * In mdrun, hwinfo has already been set before calling init_forcerec.
2050          * Here we ignore GPUs, as tools will not use them anyhow.
2051          */
2052         snew(fr->hwinfo, 1);
2053         gmx_detect_hardware(fp, fr->hwinfo, cr,
2054                             FALSE, FALSE, NULL);
2055     }
2056
2057     /* By default we turn acceleration on, but it might be turned off further down... */
2058     fr->use_cpu_acceleration = TRUE;
2059
2060     fr->bDomDec = DOMAINDECOMP(cr);
2061
2062     natoms = mtop->natoms;
2063
2064     if (check_box(ir->ePBC, box))
2065     {
2066         gmx_fatal(FARGS, check_box(ir->ePBC, box));
2067     }
2068
2069     /* Test particle insertion ? */
2070     if (EI_TPI(ir->eI))
2071     {
2072         /* Set to the size of the molecule to be inserted (the last one) */
2073         /* Because of old style topologies, we have to use the last cg
2074          * instead of the last molecule type.
2075          */
2076         cgs       = &mtop->moltype[mtop->molblock[mtop->nmolblock-1].type].cgs;
2077         fr->n_tpi = cgs->index[cgs->nr] - cgs->index[cgs->nr-1];
2078         if (fr->n_tpi != mtop->mols.index[mtop->mols.nr] - mtop->mols.index[mtop->mols.nr-1])
2079         {
2080             gmx_fatal(FARGS, "The molecule to insert can not consist of multiple charge groups.\nMake it a single charge group.");
2081         }
2082     }
2083     else
2084     {
2085         fr->n_tpi = 0;
2086     }
2087
2088     /* Copy AdResS parameters */
2089     if (ir->bAdress)
2090     {
2091         fr->adress_type           = ir->adress->type;
2092         fr->adress_const_wf       = ir->adress->const_wf;
2093         fr->adress_ex_width       = ir->adress->ex_width;
2094         fr->adress_hy_width       = ir->adress->hy_width;
2095         fr->adress_icor           = ir->adress->icor;
2096         fr->adress_site           = ir->adress->site;
2097         fr->adress_ex_forcecap    = ir->adress->ex_forcecap;
2098         fr->adress_do_hybridpairs = ir->adress->do_hybridpairs;
2099
2100
2101         snew(fr->adress_group_explicit, ir->adress->n_energy_grps);
2102         for (i = 0; i < ir->adress->n_energy_grps; i++)
2103         {
2104             fr->adress_group_explicit[i] = ir->adress->group_explicit[i];
2105         }
2106
2107         fr->n_adress_tf_grps = ir->adress->n_tf_grps;
2108         snew(fr->adress_tf_table_index, fr->n_adress_tf_grps);
2109         for (i = 0; i < fr->n_adress_tf_grps; i++)
2110         {
2111             fr->adress_tf_table_index[i] = ir->adress->tf_table_index[i];
2112         }
2113         copy_rvec(ir->adress->refs, fr->adress_refs);
2114     }
2115     else
2116     {
2117         fr->adress_type           = eAdressOff;
2118         fr->adress_do_hybridpairs = FALSE;
2119     }
2120
2121     /* Copy the user determined parameters */
2122     fr->userint1  = ir->userint1;
2123     fr->userint2  = ir->userint2;
2124     fr->userint3  = ir->userint3;
2125     fr->userint4  = ir->userint4;
2126     fr->userreal1 = ir->userreal1;
2127     fr->userreal2 = ir->userreal2;
2128     fr->userreal3 = ir->userreal3;
2129     fr->userreal4 = ir->userreal4;
2130
2131     /* Shell stuff */
2132     fr->fc_stepsize = ir->fc_stepsize;
2133
2134     /* Free energy */
2135     fr->efep        = ir->efep;
2136     fr->sc_alphavdw = ir->fepvals->sc_alpha;
2137     if (ir->fepvals->bScCoul)
2138     {
2139         fr->sc_alphacoul  = ir->fepvals->sc_alpha;
2140         fr->sc_sigma6_min = pow(ir->fepvals->sc_sigma_min, 6);
2141     }
2142     else
2143     {
2144         fr->sc_alphacoul  = 0;
2145         fr->sc_sigma6_min = 0; /* only needed when bScCoul is on */
2146     }
2147     fr->sc_power      = ir->fepvals->sc_power;
2148     fr->sc_r_power    = ir->fepvals->sc_r_power;
2149     fr->sc_sigma6_def = pow(ir->fepvals->sc_sigma, 6);
2150
2151     env = getenv("GMX_SCSIGMA_MIN");
2152     if (env != NULL)
2153     {
2154         dbl = 0;
2155         sscanf(env, "%lf", &dbl);
2156         fr->sc_sigma6_min = pow(dbl, 6);
2157         if (fp)
2158         {
2159             fprintf(fp, "Setting the minimum soft core sigma to %g nm\n", dbl);
2160         }
2161     }
2162
2163     fr->bNonbonded = TRUE;
2164     if (getenv("GMX_NO_NONBONDED") != NULL)
2165     {
2166         /* turn off non-bonded calculations */
2167         fr->bNonbonded = FALSE;
2168         md_print_warn(cr, fp,
2169                       "Found environment variable GMX_NO_NONBONDED.\n"
2170                       "Disabling nonbonded calculations.\n");
2171     }
2172
2173     bGenericKernelOnly = FALSE;
2174
2175     /* We now check in the NS code whether a particular combination of interactions
2176      * can be used with water optimization, and disable it if that is not the case.
2177      */
2178
2179     if (getenv("GMX_NB_GENERIC") != NULL)
2180     {
2181         if (fp != NULL)
2182         {
2183             fprintf(fp,
2184                     "Found environment variable GMX_NB_GENERIC.\n"
2185                     "Disabling all interaction-specific nonbonded kernels, will only\n"
2186                     "use the slow generic ones in src/gmxlib/nonbonded/nb_generic.c\n\n");
2187         }
2188         bGenericKernelOnly = TRUE;
2189     }
2190
2191     if (bGenericKernelOnly == TRUE)
2192     {
2193         bNoSolvOpt         = TRUE;
2194     }
2195
2196     if ( (getenv("GMX_DISABLE_CPU_ACCELERATION") != NULL) || (getenv("GMX_NOOPTIMIZEDKERNELS") != NULL) )
2197     {
2198         fr->use_cpu_acceleration = FALSE;
2199         if (fp != NULL)
2200         {
2201             fprintf(fp,
2202                     "\nFound environment variable GMX_DISABLE_CPU_ACCELERATION.\n"
2203                     "Disabling all CPU architecture-specific (e.g. SSE2/SSE4/AVX) routines.\n\n");
2204         }
2205     }
2206
2207     fr->bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
2208
2209     /* Check if we can/should do all-vs-all kernels */
2210     fr->bAllvsAll       = can_use_allvsall(ir, mtop, FALSE, NULL, NULL);
2211     fr->AllvsAll_work   = NULL;
2212     fr->AllvsAll_workgb = NULL;
2213
2214     /* All-vs-all kernels have not been implemented in 4.6, and
2215      * the SIMD group kernels are also buggy in this case. Non-accelerated
2216      * group kernels are OK. See Redmine #1249. */
2217     if (fr->bAllvsAll)
2218     {
2219         fr->bAllvsAll = FALSE;
2220         fr->use_cpu_acceleration = FALSE;
2221         if (fp != NULL)
2222         {
2223             fprintf(fp,
2224                     "\nYour simulation settings would have triggered the efficient all-vs-all\n"
2225                     "kernels in GROMACS 4.5, but these have not been implemented in GROMACS\n"
2226                     "4.6. Also, we can't use the accelerated SIMD kernels here because\n"
2227                     "of an unfixed bug. The reference C kernels are correct, though, so\n"
2228                     "we are proceeding by disabling all CPU architecture-specific\n"
2229                     "(e.g. SSE2/SSE4/AVX) routines. If performance is important, please\n"
2230                     "use GROMACS 4.5.7 or try cutoff-scheme = Verlet.\n\n");
2231         }
2232     }
2233
2234     /* Neighbour searching stuff */
2235     fr->cutoff_scheme = ir->cutoff_scheme;
2236     fr->bGrid         = (ir->ns_type == ensGRID);
2237     fr->ePBC          = ir->ePBC;
2238
2239     /* Determine if we will do PBC for distances in bonded interactions */
2240     if (fr->ePBC == epbcNONE)
2241     {
2242         fr->bMolPBC = FALSE;
2243     }
2244     else
2245     {
2246         if (!DOMAINDECOMP(cr))
2247         {
2248             /* The group cut-off scheme and SHAKE assume charge groups
2249              * are whole, but not using molpbc is faster in most cases.
2250              */
2251             if (fr->cutoff_scheme == ecutsGROUP ||
2252                 (ir->eConstrAlg == econtSHAKE &&
2253                  (gmx_mtop_ftype_count(mtop, F_CONSTR) > 0 ||
2254                   gmx_mtop_ftype_count(mtop, F_CONSTRNC) > 0)))
2255             {
2256                 fr->bMolPBC = ir->bPeriodicMols;
2257             }
2258             else
2259             {
2260                 fr->bMolPBC = TRUE;
2261                 if (getenv("GMX_USE_GRAPH") != NULL)
2262                 {
2263                     fr->bMolPBC = FALSE;
2264                     if (fp)
2265                     {
2266                         fprintf(fp, "\nGMX_MOLPBC is set, using the graph for bonded interactions\n\n");
2267                     }
2268                 }
2269             }
2270         }
2271         else
2272         {
2273             fr->bMolPBC = dd_bonded_molpbc(cr->dd, fr->ePBC);
2274         }
2275     }
2276     fr->bGB = (ir->implicit_solvent == eisGBSA);
2277
2278     fr->rc_scaling = ir->refcoord_scaling;
2279     copy_rvec(ir->posres_com, fr->posres_com);
2280     copy_rvec(ir->posres_comB, fr->posres_comB);
2281     fr->rlist      = cutoff_inf(ir->rlist);
2282     fr->rlistlong  = cutoff_inf(ir->rlistlong);
2283     fr->eeltype    = ir->coulombtype;
2284     fr->vdwtype    = ir->vdwtype;
2285
2286     fr->coulomb_modifier = ir->coulomb_modifier;
2287     fr->vdw_modifier     = ir->vdw_modifier;
2288
2289     /* Electrostatics: Translate from interaction-setting-in-mdp-file to kernel interaction format */
2290     switch (fr->eeltype)
2291     {
2292         case eelCUT:
2293             fr->nbkernel_elec_interaction = (fr->bGB) ? GMX_NBKERNEL_ELEC_GENERALIZEDBORN : GMX_NBKERNEL_ELEC_COULOMB;
2294             break;
2295
2296         case eelRF:
2297         case eelGRF:
2298         case eelRF_NEC:
2299             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2300             break;
2301
2302         case eelRF_ZERO:
2303             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_REACTIONFIELD;
2304             fr->coulomb_modifier          = eintmodEXACTCUTOFF;
2305             break;
2306
2307         case eelSWITCH:
2308         case eelSHIFT:
2309         case eelUSER:
2310         case eelENCADSHIFT:
2311         case eelPMESWITCH:
2312         case eelPMEUSER:
2313         case eelPMEUSERSWITCH:
2314             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2315             break;
2316
2317         case eelPME:
2318         case eelEWALD:
2319             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_EWALD;
2320             break;
2321
2322         default:
2323             gmx_fatal(FARGS, "Unsupported electrostatic interaction: %s", eel_names[fr->eeltype]);
2324             break;
2325     }
2326
2327     /* Vdw: Translate from mdp settings to kernel format */
2328     switch (fr->vdwtype)
2329     {
2330         case evdwCUT:
2331             if (fr->bBHAM)
2332             {
2333                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_BUCKINGHAM;
2334             }
2335             else
2336             {
2337                 fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_LENNARDJONES;
2338             }
2339             break;
2340
2341         case evdwSWITCH:
2342         case evdwSHIFT:
2343         case evdwUSER:
2344         case evdwENCADSHIFT:
2345             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2346             break;
2347
2348         default:
2349             gmx_fatal(FARGS, "Unsupported vdw interaction: %s", evdw_names[fr->vdwtype]);
2350             break;
2351     }
2352
2353     /* These start out identical to ir, but might be altered if we e.g. tabulate the interaction in the kernel */
2354     fr->nbkernel_elec_modifier    = fr->coulomb_modifier;
2355     fr->nbkernel_vdw_modifier     = fr->vdw_modifier;
2356
2357     fr->bTwinRange = fr->rlistlong > fr->rlist;
2358     fr->bEwald     = (EEL_PME(fr->eeltype) || fr->eeltype == eelEWALD);
2359
2360     fr->reppow     = mtop->ffparams.reppow;
2361
2362     if (ir->cutoff_scheme == ecutsGROUP)
2363     {
2364         fr->bvdwtab    = (fr->vdwtype != evdwCUT ||
2365                           !gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS));
2366         /* We have special kernels for standard Ewald and PME, but the pme-switch ones are tabulated above */
2367         fr->bcoultab   = !(fr->eeltype == eelCUT ||
2368                            fr->eeltype == eelEWALD ||
2369                            fr->eeltype == eelPME ||
2370                            fr->eeltype == eelRF ||
2371                            fr->eeltype == eelRF_ZERO);
2372
2373         /* If the user absolutely wants different switch/shift settings for coul/vdw, it is likely
2374          * going to be faster to tabulate the interaction than calling the generic kernel.
2375          */
2376         if (fr->nbkernel_elec_modifier == eintmodPOTSWITCH && fr->nbkernel_vdw_modifier == eintmodPOTSWITCH)
2377         {
2378             if ((fr->rcoulomb_switch != fr->rvdw_switch) || (fr->rcoulomb != fr->rvdw))
2379             {
2380                 fr->bcoultab = TRUE;
2381             }
2382         }
2383         else if ((fr->nbkernel_elec_modifier == eintmodPOTSHIFT && fr->nbkernel_vdw_modifier == eintmodPOTSHIFT) ||
2384                  ((fr->nbkernel_elec_interaction == GMX_NBKERNEL_ELEC_REACTIONFIELD &&
2385                    fr->nbkernel_elec_modifier == eintmodEXACTCUTOFF &&
2386                    (fr->nbkernel_vdw_modifier == eintmodPOTSWITCH || fr->nbkernel_vdw_modifier == eintmodPOTSHIFT))))
2387         {
2388             if (fr->rcoulomb != fr->rvdw)
2389             {
2390                 fr->bcoultab = TRUE;
2391             }
2392         }
2393
2394         if (getenv("GMX_REQUIRE_TABLES"))
2395         {
2396             fr->bvdwtab  = TRUE;
2397             fr->bcoultab = TRUE;
2398         }
2399
2400         if (fp)
2401         {
2402             fprintf(fp, "Table routines are used for coulomb: %s\n", bool_names[fr->bcoultab]);
2403             fprintf(fp, "Table routines are used for vdw:     %s\n", bool_names[fr->bvdwtab ]);
2404         }
2405
2406         if (fr->bvdwtab == TRUE)
2407         {
2408             fr->nbkernel_vdw_interaction = GMX_NBKERNEL_VDW_CUBICSPLINETABLE;
2409             fr->nbkernel_vdw_modifier    = eintmodNONE;
2410         }
2411         if (fr->bcoultab == TRUE)
2412         {
2413             fr->nbkernel_elec_interaction = GMX_NBKERNEL_ELEC_CUBICSPLINETABLE;
2414             fr->nbkernel_elec_modifier    = eintmodNONE;
2415         }
2416     }
2417
2418     if (ir->cutoff_scheme == ecutsVERLET)
2419     {
2420         if (!gmx_within_tol(fr->reppow, 12.0, 10*GMX_DOUBLE_EPS))
2421         {
2422             gmx_fatal(FARGS, "Cut-off scheme %S only supports LJ repulsion power 12", ecutscheme_names[ir->cutoff_scheme]);
2423         }
2424         fr->bvdwtab  = FALSE;
2425         fr->bcoultab = FALSE;
2426     }
2427
2428     /* Tables are used for direct ewald sum */
2429     if (fr->bEwald)
2430     {
2431         if (EEL_PME(ir->coulombtype))
2432         {
2433             if (fp)
2434             {
2435                 fprintf(fp, "Will do PME sum in reciprocal space.\n");
2436             }
2437             if (ir->coulombtype == eelP3M_AD)
2438             {
2439                 please_cite(fp, "Hockney1988");
2440                 please_cite(fp, "Ballenegger2012");
2441             }
2442             else
2443             {
2444                 please_cite(fp, "Essmann95a");
2445             }
2446
2447             if (ir->ewald_geometry == eewg3DC)
2448             {
2449                 if (fp)
2450                 {
2451                     fprintf(fp, "Using the Ewald3DC correction for systems with a slab geometry.\n");
2452                 }
2453                 please_cite(fp, "In-Chul99a");
2454             }
2455         }
2456         fr->ewaldcoeff = calc_ewaldcoeff(ir->rcoulomb, ir->ewald_rtol);
2457         init_ewald_tab(&(fr->ewald_table), ir, fp);
2458         if (fp)
2459         {
2460             fprintf(fp, "Using a Gaussian width (1/beta) of %g nm for Ewald\n",
2461                     1/fr->ewaldcoeff);
2462         }
2463     }
2464
2465     /* Electrostatics */
2466     fr->epsilon_r       = ir->epsilon_r;
2467     fr->epsilon_rf      = ir->epsilon_rf;
2468     fr->fudgeQQ         = mtop->ffparams.fudgeQQ;
2469     fr->rcoulomb_switch = ir->rcoulomb_switch;
2470     fr->rcoulomb        = cutoff_inf(ir->rcoulomb);
2471
2472     /* Parameters for generalized RF */
2473     fr->zsquare = 0.0;
2474     fr->temp    = 0.0;
2475
2476     if (fr->eeltype == eelGRF)
2477     {
2478         init_generalized_rf(fp, mtop, ir, fr);
2479     }
2480     else if (fr->eeltype == eelSHIFT)
2481     {
2482         for (m = 0; (m < DIM); m++)
2483         {
2484             box_size[m] = box[m][m];
2485         }
2486
2487         if ((fr->eeltype == eelSHIFT && fr->rcoulomb > fr->rcoulomb_switch))
2488         {
2489             set_shift_consts(fr->rcoulomb_switch, fr->rcoulomb, box_size);
2490         }
2491     }
2492
2493     fr->bF_NoVirSum = (EEL_FULL(fr->eeltype) ||
2494                        gmx_mtop_ftype_count(mtop, F_POSRES) > 0 ||
2495                        gmx_mtop_ftype_count(mtop, F_FBPOSRES) > 0 ||
2496                        IR_ELEC_FIELD(*ir) ||
2497                        (fr->adress_icor != eAdressICOff)
2498                        );
2499
2500     if (fr->cutoff_scheme == ecutsGROUP &&
2501         ncg_mtop(mtop) > fr->cg_nalloc && !DOMAINDECOMP(cr))
2502     {
2503         /* Count the total number of charge groups */
2504         fr->cg_nalloc = ncg_mtop(mtop);
2505         srenew(fr->cg_cm, fr->cg_nalloc);
2506     }
2507     if (fr->shift_vec == NULL)
2508     {
2509         snew(fr->shift_vec, SHIFTS);
2510     }
2511
2512     if (fr->fshift == NULL)
2513     {
2514         snew(fr->fshift, SHIFTS);
2515     }
2516
2517     if (fr->nbfp == NULL)
2518     {
2519         fr->ntype = mtop->ffparams.atnr;
2520         fr->nbfp  = mk_nbfp(&mtop->ffparams, fr->bBHAM);
2521     }
2522
2523     /* Copy the energy group exclusions */
2524     fr->egp_flags = ir->opts.egp_flags;
2525
2526     /* Van der Waals stuff */
2527     fr->rvdw        = cutoff_inf(ir->rvdw);
2528     fr->rvdw_switch = ir->rvdw_switch;
2529     if ((fr->vdwtype != evdwCUT) && (fr->vdwtype != evdwUSER) && !fr->bBHAM)
2530     {
2531         if (fr->rvdw_switch >= fr->rvdw)
2532         {
2533             gmx_fatal(FARGS, "rvdw_switch (%f) must be < rvdw (%f)",
2534                       fr->rvdw_switch, fr->rvdw);
2535         }
2536         if (fp)
2537         {
2538             fprintf(fp, "Using %s Lennard-Jones, switch between %g and %g nm\n",
2539                     (fr->eeltype == eelSWITCH) ? "switched" : "shifted",
2540                     fr->rvdw_switch, fr->rvdw);
2541         }
2542     }
2543
2544     if (fr->bBHAM && (fr->vdwtype == evdwSHIFT || fr->vdwtype == evdwSWITCH))
2545     {
2546         gmx_fatal(FARGS, "Switch/shift interaction not supported with Buckingham");
2547     }
2548
2549     if (fp)
2550     {
2551         fprintf(fp, "Cut-off's:   NS: %g   Coulomb: %g   %s: %g\n",
2552                 fr->rlist, fr->rcoulomb, fr->bBHAM ? "BHAM" : "LJ", fr->rvdw);
2553     }
2554
2555     fr->eDispCorr = ir->eDispCorr;
2556     if (ir->eDispCorr != edispcNO)
2557     {
2558         set_avcsixtwelve(fp, fr, mtop);
2559     }
2560
2561     if (fr->bBHAM)
2562     {
2563         set_bham_b_max(fp, fr, mtop);
2564     }
2565
2566     fr->gb_epsilon_solvent = ir->gb_epsilon_solvent;
2567
2568     /* Copy the GBSA data (radius, volume and surftens for each
2569      * atomtype) from the topology atomtype section to forcerec.
2570      */
2571     snew(fr->atype_radius, fr->ntype);
2572     snew(fr->atype_vol, fr->ntype);
2573     snew(fr->atype_surftens, fr->ntype);
2574     snew(fr->atype_gb_radius, fr->ntype);
2575     snew(fr->atype_S_hct, fr->ntype);
2576
2577     if (mtop->atomtypes.nr > 0)
2578     {
2579         for (i = 0; i < fr->ntype; i++)
2580         {
2581             fr->atype_radius[i] = mtop->atomtypes.radius[i];
2582         }
2583         for (i = 0; i < fr->ntype; i++)
2584         {
2585             fr->atype_vol[i] = mtop->atomtypes.vol[i];
2586         }
2587         for (i = 0; i < fr->ntype; i++)
2588         {
2589             fr->atype_surftens[i] = mtop->atomtypes.surftens[i];
2590         }
2591         for (i = 0; i < fr->ntype; i++)
2592         {
2593             fr->atype_gb_radius[i] = mtop->atomtypes.gb_radius[i];
2594         }
2595         for (i = 0; i < fr->ntype; i++)
2596         {
2597             fr->atype_S_hct[i] = mtop->atomtypes.S_hct[i];
2598         }
2599     }
2600
2601     /* Generate the GB table if needed */
2602     if (fr->bGB)
2603     {
2604 #ifdef GMX_DOUBLE
2605         fr->gbtabscale = 2000;
2606 #else
2607         fr->gbtabscale = 500;
2608 #endif
2609
2610         fr->gbtabr = 100;
2611         fr->gbtab  = make_gb_table(fp, oenv, fr, tabpfn, fr->gbtabscale);
2612
2613         init_gb(&fr->born, cr, fr, ir, mtop, ir->rgbradii, ir->gb_algorithm);
2614
2615         /* Copy local gb data (for dd, this is done in dd_partition_system) */
2616         if (!DOMAINDECOMP(cr))
2617         {
2618             make_local_gb(cr, fr->born, ir->gb_algorithm);
2619         }
2620     }
2621
2622     /* Set the charge scaling */
2623     if (fr->epsilon_r != 0)
2624     {
2625         fr->epsfac = ONE_4PI_EPS0/fr->epsilon_r;
2626     }
2627     else
2628     {
2629         /* eps = 0 is infinite dieletric: no coulomb interactions */
2630         fr->epsfac = 0;
2631     }
2632
2633     /* Reaction field constants */
2634     if (EEL_RF(fr->eeltype))
2635     {
2636         calc_rffac(fp, fr->eeltype, fr->epsilon_r, fr->epsilon_rf,
2637                    fr->rcoulomb, fr->temp, fr->zsquare, box,
2638                    &fr->kappa, &fr->k_rf, &fr->c_rf);
2639     }
2640
2641     set_chargesum(fp, fr, mtop);
2642
2643     /* if we are using LR electrostatics, and they are tabulated,
2644      * the tables will contain modified coulomb interactions.
2645      * Since we want to use the non-shifted ones for 1-4
2646      * coulombic interactions, we must have an extra set of tables.
2647      */
2648
2649     /* Construct tables.
2650      * A little unnecessary to make both vdw and coul tables sometimes,
2651      * but what the heck... */
2652
2653     bTab = fr->bcoultab || fr->bvdwtab || fr->bEwald;
2654
2655     bSep14tab = ((!bTab || fr->eeltype != eelCUT || fr->vdwtype != evdwCUT ||
2656                   fr->bBHAM || fr->bEwald) &&
2657                  (gmx_mtop_ftype_count(mtop, F_LJ14) > 0 ||
2658                   gmx_mtop_ftype_count(mtop, F_LJC14_Q) > 0 ||
2659                   gmx_mtop_ftype_count(mtop, F_LJC_PAIRS_NB) > 0));
2660
2661     negp_pp   = ir->opts.ngener - ir->nwall;
2662     negptable = 0;
2663     if (!bTab)
2664     {
2665         bNormalnblists = TRUE;
2666         fr->nnblists   = 1;
2667     }
2668     else
2669     {
2670         bNormalnblists = (ir->eDispCorr != edispcNO);
2671         for (egi = 0; egi < negp_pp; egi++)
2672         {
2673             for (egj = egi; egj < negp_pp; egj++)
2674             {
2675                 egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2676                 if (!(egp_flags & EGP_EXCL))
2677                 {
2678                     if (egp_flags & EGP_TABLE)
2679                     {
2680                         negptable++;
2681                     }
2682                     else
2683                     {
2684                         bNormalnblists = TRUE;
2685                     }
2686                 }
2687             }
2688         }
2689         if (bNormalnblists)
2690         {
2691             fr->nnblists = negptable + 1;
2692         }
2693         else
2694         {
2695             fr->nnblists = negptable;
2696         }
2697         if (fr->nnblists > 1)
2698         {
2699             snew(fr->gid2nblists, ir->opts.ngener*ir->opts.ngener);
2700         }
2701     }
2702
2703     if (ir->adress)
2704     {
2705         fr->nnblists *= 2;
2706     }
2707
2708     snew(fr->nblists, fr->nnblists);
2709
2710     /* This code automatically gives table length tabext without cut-off's,
2711      * in that case grompp should already have checked that we do not need
2712      * normal tables and we only generate tables for 1-4 interactions.
2713      */
2714     rtab = ir->rlistlong + ir->tabext;
2715
2716     if (bTab)
2717     {
2718         /* make tables for ordinary interactions */
2719         if (bNormalnblists)
2720         {
2721             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[0]);
2722             if (ir->adress)
2723             {
2724                 make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn, NULL, NULL, &fr->nblists[fr->nnblists/2]);
2725             }
2726             if (!bSep14tab)
2727             {
2728                 fr->tab14 = fr->nblists[0].table_elec_vdw;
2729             }
2730             m = 1;
2731         }
2732         else
2733         {
2734             m = 0;
2735         }
2736         if (negptable > 0)
2737         {
2738             /* Read the special tables for certain energy group pairs */
2739             nm_ind = mtop->groups.grps[egcENER].nm_ind;
2740             for (egi = 0; egi < negp_pp; egi++)
2741             {
2742                 for (egj = egi; egj < negp_pp; egj++)
2743                 {
2744                     egp_flags = ir->opts.egp_flags[GID(egi, egj, ir->opts.ngener)];
2745                     if ((egp_flags & EGP_TABLE) && !(egp_flags & EGP_EXCL))
2746                     {
2747                         nbl = &(fr->nblists[m]);
2748                         if (fr->nnblists > 1)
2749                         {
2750                             fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = m;
2751                         }
2752                         /* Read the table file with the two energy groups names appended */
2753                         make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2754                                         *mtop->groups.grpname[nm_ind[egi]],
2755                                         *mtop->groups.grpname[nm_ind[egj]],
2756                                         &fr->nblists[m]);
2757                         if (ir->adress)
2758                         {
2759                             make_nbf_tables(fp, oenv, fr, rtab, cr, tabfn,
2760                                             *mtop->groups.grpname[nm_ind[egi]],
2761                                             *mtop->groups.grpname[nm_ind[egj]],
2762                                             &fr->nblists[fr->nnblists/2+m]);
2763                         }
2764                         m++;
2765                     }
2766                     else if (fr->nnblists > 1)
2767                     {
2768                         fr->gid2nblists[GID(egi, egj, ir->opts.ngener)] = 0;
2769                     }
2770                 }
2771             }
2772         }
2773     }
2774     if (bSep14tab)
2775     {
2776         /* generate extra tables with plain Coulomb for 1-4 interactions only */
2777         fr->tab14 = make_tables(fp, oenv, fr, MASTER(cr), tabpfn, rtab,
2778                                 GMX_MAKETABLES_14ONLY);
2779     }
2780
2781     /* Read AdResS Thermo Force table if needed */
2782     if (fr->adress_icor == eAdressICThermoForce)
2783     {
2784         /* old todo replace */
2785
2786         if (ir->adress->n_tf_grps > 0)
2787         {
2788             make_adress_tf_tables(fp, oenv, fr, ir, tabfn, mtop, box);
2789
2790         }
2791         else
2792         {
2793             /* load the default table */
2794             snew(fr->atf_tabs, 1);
2795             fr->atf_tabs[DEFAULT_TF_TABLE] = make_atf_table(fp, oenv, fr, tabafn, box);
2796         }
2797     }
2798
2799     /* Wall stuff */
2800     fr->nwall = ir->nwall;
2801     if (ir->nwall && ir->wall_type == ewtTABLE)
2802     {
2803         make_wall_tables(fp, oenv, ir, tabfn, &mtop->groups, fr);
2804     }
2805
2806     if (fcd && tabbfn)
2807     {
2808         fcd->bondtab  = make_bonded_tables(fp,
2809                                            F_TABBONDS, F_TABBONDSNC,
2810                                            mtop, tabbfn, "b");
2811         fcd->angletab = make_bonded_tables(fp,
2812                                            F_TABANGLES, -1,
2813                                            mtop, tabbfn, "a");
2814         fcd->dihtab   = make_bonded_tables(fp,
2815                                            F_TABDIHS, -1,
2816                                            mtop, tabbfn, "d");
2817     }
2818     else
2819     {
2820         if (debug)
2821         {
2822             fprintf(debug, "No fcdata or table file name passed, can not read table, can not do bonded interactions\n");
2823         }
2824     }
2825
2826     /* QM/MM initialization if requested
2827      */
2828     if (ir->bQMMM)
2829     {
2830         fprintf(stderr, "QM/MM calculation requested.\n");
2831     }
2832
2833     fr->bQMMM      = ir->bQMMM;
2834     fr->qr         = mk_QMMMrec();
2835
2836     /* Set all the static charge group info */
2837     fr->cginfo_mb = init_cginfo_mb(fp, mtop, fr, bNoSolvOpt,
2838                                    &fr->bExcl_IntraCGAll_InterCGNone);
2839     if (DOMAINDECOMP(cr))
2840     {
2841         fr->cginfo = NULL;
2842     }
2843     else
2844     {
2845         fr->cginfo = cginfo_expand(mtop->nmolblock, fr->cginfo_mb);
2846     }
2847
2848     if (!DOMAINDECOMP(cr))
2849     {
2850         /* When using particle decomposition, the effect of the second argument,
2851          * which sets fr->hcg, is corrected later in do_md and init_em.
2852          */
2853         forcerec_set_ranges(fr, ncg_mtop(mtop), ncg_mtop(mtop),
2854                             mtop->natoms, mtop->natoms, mtop->natoms);
2855     }
2856
2857     fr->print_force = print_force;
2858
2859
2860     /* coarse load balancing vars */
2861     fr->t_fnbf    = 0.;
2862     fr->t_wait    = 0.;
2863     fr->timesteps = 0;
2864
2865     /* Initialize neighbor search */
2866     init_ns(fp, cr, &fr->ns, fr, mtop, box);
2867
2868     if (cr->duty & DUTY_PP)
2869     {
2870         gmx_nonbonded_setup(fp, fr, bGenericKernelOnly);
2871         /*
2872            if (ir->bAdress)
2873             {
2874                 gmx_setup_adress_kernels(fp,bGenericKernelOnly);
2875             }
2876          */
2877     }
2878
2879     /* Initialize the thread working data for bonded interactions */
2880     init_forcerec_f_threads(fr, mtop->groups.grps[egcENER].nr);
2881
2882     snew(fr->excl_load, fr->nthreads+1);
2883
2884     if (fr->cutoff_scheme == ecutsVERLET)
2885     {
2886         if (ir->rcoulomb != ir->rvdw)
2887         {
2888             gmx_fatal(FARGS, "With Verlet lists rcoulomb and rvdw should be identical");
2889         }
2890
2891         init_nb_verlet(fp, &fr->nbv, ir, fr, cr, nbpu_opt);
2892     }
2893
2894     /* fr->ic is used both by verlet and group kernels (to some extent) now */
2895     init_interaction_const(fp, &fr->ic, fr, rtab);
2896     if (ir->eDispCorr != edispcNO)
2897     {
2898         calc_enervirdiff(fp, ir->eDispCorr, fr);
2899     }
2900 }
2901
2902 #define pr_real(fp, r) fprintf(fp, "%s: %e\n",#r, r)
2903 #define pr_int(fp, i)  fprintf((fp), "%s: %d\n",#i, i)
2904 #define pr_bool(fp, b) fprintf((fp), "%s: %s\n",#b, bool_names[b])
2905
2906 void pr_forcerec(FILE *fp, t_forcerec *fr, t_commrec *cr)
2907 {
2908     int i;
2909
2910     pr_real(fp, fr->rlist);
2911     pr_real(fp, fr->rcoulomb);
2912     pr_real(fp, fr->fudgeQQ);
2913     pr_bool(fp, fr->bGrid);
2914     pr_bool(fp, fr->bTwinRange);
2915     /*pr_int(fp,fr->cg0);
2916        pr_int(fp,fr->hcg);*/
2917     for (i = 0; i < fr->nnblists; i++)
2918     {
2919         pr_int(fp, fr->nblists[i].table_elec_vdw.n);
2920     }
2921     pr_real(fp, fr->rcoulomb_switch);
2922     pr_real(fp, fr->rcoulomb);
2923
2924     fflush(fp);
2925 }
2926
2927 void forcerec_set_excl_load(t_forcerec *fr,
2928                             const gmx_localtop_t *top, const t_commrec *cr)
2929 {
2930     const int *ind, *a;
2931     int        t, i, j, ntot, n, ntarget;
2932
2933     if (cr != NULL && PARTDECOMP(cr))
2934     {
2935         /* No OpenMP with particle decomposition */
2936         pd_at_range(cr,
2937                     &fr->excl_load[0],
2938                     &fr->excl_load[1]);
2939
2940         return;
2941     }
2942
2943     ind = top->excls.index;
2944     a   = top->excls.a;
2945
2946     ntot = 0;
2947     for (i = 0; i < top->excls.nr; i++)
2948     {
2949         for (j = ind[i]; j < ind[i+1]; j++)
2950         {
2951             if (a[j] > i)
2952             {
2953                 ntot++;
2954             }
2955         }
2956     }
2957
2958     fr->excl_load[0] = 0;
2959     n                = 0;
2960     i                = 0;
2961     for (t = 1; t <= fr->nthreads; t++)
2962     {
2963         ntarget = (ntot*t)/fr->nthreads;
2964         while (i < top->excls.nr && n < ntarget)
2965         {
2966             for (j = ind[i]; j < ind[i+1]; j++)
2967             {
2968                 if (a[j] > i)
2969                 {
2970                     n++;
2971                 }
2972             }
2973             i++;
2974         }
2975         fr->excl_load[t] = i;
2976     }
2977 }