src/gromacs/gmxlib/nonbonded/nonbonded.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 3.2.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2004, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * GROningen Mixture of Alchemy and Childrens' Stories
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #ifdef GMX_THREAD_MPI
  41 #include <thread_mpi.h>
  42 #endif
  43
  44 #include <stdio.h>
  45 #include <stdlib.h>
  46 #include "typedefs.h"
  47 #include "txtdump.h"
  48 #include "smalloc.h"
  49 #include "ns.h"
  50 #include "vec.h"
  51 #include "maths.h"
  52 #include "macros.h"
  53 #include "string2.h"
  54 #include "force.h"
  55 #include "names.h"
  56 #include "main.h"
  57 #include "xvgr.h"
  58 #include "gmx_fatal.h"
  59 #include "physics.h"
  60 #include "force.h"
  61 #include "bondf.h"
  62 #include "nrnb.h"
  63 #include "smalloc.h"
  64 #include "nonbonded.h"
  65
  66 #include "nb_kernel.h"
  67 #include "nb_free_energy.h"
  68 #include "nb_generic.h"
  69 #include "nb_generic_cg.h"
  70 #include "nb_generic_adress.h"
  71
  72 /* Different default (c) and accelerated interaction-specific kernels */
  73 #include "nb_kernel_c/nb_kernel_c.h"
  74
  75 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
  76 #    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
  77 #endif
  78 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
  79 #    include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
  80 #endif
  81 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
  82 #    include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
  83 #endif
  84 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
  85 #    include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
  86 #endif
  87 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
  88 #    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
  89 #endif
  90 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
  91 #    include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
  92 #endif
  93 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
  94 #    include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
  95 #endif
  96 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
  97 #    include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
  98 #endif
  99
 100
 101 #ifdef GMX_THREAD_MPI
 102 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 103 #endif
 104 static gmx_bool            nonbonded_setup_done  = FALSE;
 105
 106
 107 void
 108 gmx_nonbonded_setup(FILE *         fplog,
 109                     t_forcerec *   fr,
 110                     gmx_bool       bGenericKernelOnly)
 111 {
 112 #ifdef GMX_THREAD_MPI
 113     tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 114 #endif
 115     /* Here we are guaranteed only one thread made it. */
 116     if (nonbonded_setup_done == FALSE)
 117     {
 118         if (bGenericKernelOnly == FALSE)
 119         {
 120             /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 121             nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
 122
 123             if (!(fr != NULL && fr->use_cpu_acceleration == FALSE))
 124             {
 125                 /* Add interaction-specific kernels for different architectures */
 126                 /* Single precision */
 127 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 128                 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
 129 #endif
 130 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 131                 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
 132 #endif
 133 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 134                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
 135 #endif
 136 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 137                 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
 138 #endif
 139                 /* Double precision */
 140 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 141                 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
 142 #endif
 143 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 144                 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
 145 #endif
 146 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 147                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
 148 #endif
 149 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 150                 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 151 #endif
 152                 ; /* empty statement to avoid a completely empty block */
 153             }
 154         }
 155         /* Create a hash for faster lookups */
 156         nb_kernel_list_hash_init();
 157
 158         nonbonded_setup_done = TRUE;
 159     }
 160 #ifdef GMX_THREAD_MPI
 161     tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
 162 #endif
 163 }
 164
 165
 166
 167 void
 168 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
 169 {
 170     const char *     elec;
 171     const char *     elec_mod;
 172     const char *     vdw;
 173     const char *     vdw_mod;
 174     const char *     geom;
 175     const char *     other;
 176     const char *     vf;
 177
 178     struct
 179     {
 180         const char *  arch;
 181         int           simd_padding_width;
 182     }
 183     arch_and_padding[] =
 184     {
 185         /* Single precision */
 186 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 187         { "avx_256_single", 8 },
 188 #endif
 189 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 190         { "avx_128_fma_single", 4 },
 191 #endif
 192 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 193         { "sse4_1_single", 4 },
 194 #endif
 195 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 196         { "sse2_single", 4 },
 197 #endif
 198         /* Double precision */
 199 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 200         { "avx_256_double", 4 },
 201 #endif
 202 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 203         /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
 204          * since the kernels execute a loop unrolled a factor 2, followed by
 205          * a possible single odd-element epilogue.
 206          */
 207         { "avx_128_fma_double", 1 },
 208 #endif
 209 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 210         /* No padding - see comment above */
 211         { "sse2_double", 1 },
 212 #endif
 213 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 214         /* No padding - see comment above */
 215         { "sse4_1_double", 1 },
 216 #endif
 217         { "c", 1 },
 218     };
 219     int              narch = asize(arch_and_padding);
 220     int              i;
 221
 222     if (nonbonded_setup_done == FALSE)
 223     {
 224         /* We typically call this setup routine before starting timers,
 225          * but if that has not been done for whatever reason we do it now.
 226          */
 227         gmx_nonbonded_setup(NULL, NULL, FALSE);
 228     }
 229
 230     /* Not used yet */
 231     other = "";
 232
 233     nl->kernelptr_vf = NULL;
 234     nl->kernelptr_v  = NULL;
 235     nl->kernelptr_f  = NULL;
 236
 237     elec     = gmx_nbkernel_elec_names[nl->ielec];
 238     elec_mod = eintmod_names[nl->ielecmod];
 239     vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
 240     vdw_mod  = eintmod_names[nl->ivdwmod];
 241     geom     = gmx_nblist_geometry_names[nl->igeometry];
 242
 243     if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
 244     {
 245         nl->kernelptr_vf       = (void *) gmx_nb_generic_adress_kernel;
 246         nl->kernelptr_f        = (void *) gmx_nb_generic_adress_kernel;
 247         nl->simd_padding_width = 1;
 248         return;
 249     }
 250
 251     if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 252     {
 253         nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
 254         nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
 255         nl->simd_padding_width = 1;
 256     }
 257     else if (!gmx_strcasecmp_min(geom, "CG-CG"))
 258     {
 259         nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
 260         nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
 261         nl->simd_padding_width = 1;
 262     }
 263     else
 264     {
 265         /* Try to find a specific kernel first */
 266
 267         for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
 268         {
 269             nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 270             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 271         }
 272         for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
 273         {
 274             nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
 275             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 276
 277             /* If there is not force-only optimized kernel, is there a potential & force one? */
 278             if (nl->kernelptr_f == NULL)
 279             {
 280                 nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 281                 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 282             }
 283         }
 284
 285         /* Give up, pick a generic one instead */
 286         if (nl->kernelptr_vf == NULL)
 287         {
 288             nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
 289             nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
 290             nl->simd_padding_width = 1;
 291             if (debug)
 292             {
 293                 fprintf(debug,
 294                         "WARNING - Slow generic NB kernel used for neighborlist with\n"
 295                         "    Elec: '%s', Modifier: '%s'\n"
 296                         "    Vdw:  '%s', Modifier: '%s'\n"
 297                         "    Geom: '%s', Other: '%s'\n\n",
 298                         elec, elec_mod, vdw, vdw_mod, geom, other);
 299             }
 300         }
 301     }
 302
 303     return;
 304 }
 305
 306 void do_nonbonded(t_commrec *cr, t_forcerec *fr,
 307                   rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
 308                   gmx_grppairener_t *grppener, rvec box_size,
 309                   t_nrnb *nrnb, real *lambda, real *dvdl,
 310                   int nls, int eNL, int flags)
 311 {
 312     t_nblist *        nlist;
 313     int               n, n0, n1, i, i0, i1, sz, range;
 314     t_nblists *       nblists;
 315     nb_kernel_data_t  kernel_data;
 316     nb_kernel_t *     kernelptr = NULL;
 317     rvec *            f;
 318
 319     kernel_data.flags                   = flags;
 320     kernel_data.exclusions              = excl;
 321     kernel_data.lambda                  = lambda;
 322     kernel_data.dvdl                    = dvdl;
 323
 324     if (fr->bAllvsAll)
 325     {
 326         return;
 327     }
 328
 329     if (eNL >= 0)
 330     {
 331         i0 = eNL;
 332         i1 = i0+1;
 333     }
 334     else
 335     {
 336         i0 = 0;
 337         i1 = eNL_NR;
 338     }
 339
 340     if (nls >= 0)
 341     {
 342         n0 = nls;
 343         n1 = nls+1;
 344     }
 345     else
 346     {
 347         n0 = 0;
 348         n1 = fr->nnblists;
 349     }
 350
 351     for (n = n0; (n < n1); n++)
 352     {
 353         nblists = &fr->nblists[n];
 354
 355         kernel_data.table_elec              = &nblists->table_elec;
 356         kernel_data.table_vdw               = &nblists->table_vdw;
 357         kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 358
 359         for (range = 0; range < 2; range++)
 360         {
 361             /* Are we doing short/long-range? */
 362             if (range == 0)
 363             {
 364                 /* Short-range */
 365                 if (!(flags & GMX_NONBONDED_DO_SR))
 366                 {
 367                     continue;
 368                 }
 369                 kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
 370                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
 371                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
 372                 nlist = nblists->nlist_sr;
 373                 f                                   = f_shortrange;
 374             }
 375             else if (range == 1)
 376             {
 377                 /* Long-range */
 378                 if (!(flags & GMX_NONBONDED_DO_LR))
 379                 {
 380                     continue;
 381                 }
 382                 kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
 383                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
 384                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
 385                 nlist = nblists->nlist_lr;
 386                 f                                   = f_longrange;
 387             }
 388
 389             for (i = i0; (i < i1); i++)
 390             {
 391                 if (nlist[i].nri > 0)
 392                 {
 393                     if (flags & GMX_NONBONDED_DO_POTENTIAL)
 394                     {
 395                         /* Potential and force */
 396                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
 397                     }
 398                     else
 399                     {
 400                         /* Force only, no potential */
 401                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
 402                     }
 403
 404                     if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
 405                     {
 406                         /* We don't need the non-perturbed interactions */
 407                         continue;
 408                     }
 409                     (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
 410                 }
 411             }
 412         }
 413     }
 414 }
 415
 416 static void
 417 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
 418 {
 419     gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
 420                 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
 421                 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
 422                 "a smaller molecule you are decoupling during a free energy calculation.\n"
 423                 "Since interactions at distances beyond the table cannot be computed,\n"
 424                 "they are skipped until they are inside the table limit again. You will\n"
 425                 "only see this message once, even if it occurs for several interactions.\n\n"
 426                 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
 427                 "probably something wrong with your system. Only change the table-extension\n"
 428                 "distance in the mdp file if you are really sure that is the reason.\n",
 429                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
 430
 431     if (debug)
 432     {
 433         fprintf(debug,
 434                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
 435                 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
 436                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
 437     }
 438 }
 439
 440
 441
 442 /* This might logically belong better in the nb_generic.c module, but it is only
 443  * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
 444  * extra functional call for every single pair listed in the topology.
 445  */
 446 static real
 447 nb_evaluate_single(real r2, real tabscale, real *vftab,
 448                    real qq, real c6, real c12, real *velec, real *vvdw)
 449 {
 450     real       rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 451     int        ntab;
 452
 453     /* Do the tabulated interactions - first table lookup */
 454     rinv             = gmx_invsqrt(r2);
 455     r                = r2*rinv;
 456     rtab             = r*tabscale;
 457     ntab             = rtab;
 458     eps              = rtab-ntab;
 459     eps2             = eps*eps;
 460     ntab             = 12*ntab;
 461     /* Electrostatics */
 462     Y                = vftab[ntab];
 463     F                = vftab[ntab+1];
 464     Geps             = eps*vftab[ntab+2];
 465     Heps2            = eps2*vftab[ntab+3];
 466     Fp               = F+Geps+Heps2;
 467     VVe              = Y+eps*Fp;
 468     FFe              = Fp+Geps+2.0*Heps2;
 469     /* Dispersion */
 470     Y                = vftab[ntab+4];
 471     F                = vftab[ntab+5];
 472     Geps             = eps*vftab[ntab+6];
 473     Heps2            = eps2*vftab[ntab+7];
 474     Fp               = F+Geps+Heps2;
 475     VVd              = Y+eps*Fp;
 476     FFd              = Fp+Geps+2.0*Heps2;
 477     /* Repulsion */
 478     Y                = vftab[ntab+8];
 479     F                = vftab[ntab+9];
 480     Geps             = eps*vftab[ntab+10];
 481     Heps2            = eps2*vftab[ntab+11];
 482     Fp               = F+Geps+Heps2;
 483     VVr              = Y+eps*Fp;
 484     FFr              = Fp+Geps+2.0*Heps2;
 485
 486     *velec           = qq*VVe;
 487     *vvdw            = c6*VVd+c12*VVr;
 488
 489     fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
 490
 491     return fscal;
 492 }
 493
 494
 495 real
 496 do_nonbonded_listed(int ftype, int nbonds,
 497                     const t_iatom iatoms[], const t_iparams iparams[],
 498                     const rvec x[], rvec f[], rvec fshift[],
 499                     const t_pbc *pbc, const t_graph *g,
 500                     real *lambda, real *dvdl,
 501                     const t_mdatoms *md,
 502                     const t_forcerec *fr, gmx_grppairener_t *grppener,
 503                     int *global_atom_index)
 504 {
 505     int              ielec, ivdw;
 506     real             qq, c6, c12;
 507     rvec             dx;
 508     ivec             dt;
 509     int              i, j, itype, ai, aj, gid;
 510     int              fshift_index;
 511     real             r2, rinv;
 512     real             fscal, velec, vvdw;
 513     real *           energygrp_elec;
 514     real *           energygrp_vdw;
 515     static gmx_bool  warned_rlimit = FALSE;
 516     /* Free energy stuff */
 517     gmx_bool         bFreeEnergy;
 518     real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 519     real             qqB, c6B, c12B, sigma2_def, sigma2_min;
 520
 521
 522     switch (ftype)
 523     {
 524         case F_LJ14:
 525         case F_LJC14_Q:
 526             energygrp_elec = grppener->ener[egCOUL14];
 527             energygrp_vdw  = grppener->ener[egLJ14];
 528             break;
 529         case F_LJC_PAIRS_NB:
 530             energygrp_elec = grppener->ener[egCOULSR];
 531             energygrp_vdw  = grppener->ener[egLJSR];
 532             break;
 533         default:
 534             energygrp_elec = NULL; /* Keep compiler happy */
 535             energygrp_vdw  = NULL; /* Keep compiler happy */
 536             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 537             break;
 538     }
 539
 540     if (fr->efep != efepNO)
 541     {
 542         /* Lambda factor for state A=1-lambda and B=lambda */
 543         LFC[0] = 1.0 - lambda[efptCOUL];
 544         LFV[0] = 1.0 - lambda[efptVDW];
 545         LFC[1] = lambda[efptCOUL];
 546         LFV[1] = lambda[efptVDW];
 547
 548         /*derivative of the lambda factor for state A and B */
 549         DLF[0] = -1;
 550         DLF[1] = 1;
 551
 552         /* precalculate */
 553         sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
 554         sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
 555
 556         for (i = 0; i < 2; i++)
 557         {
 558             lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 559             dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
 560             lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 561             dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
 562         }
 563     }
 564     else
 565     {
 566         sigma2_min = sigma2_def = 0;
 567     }
 568
 569     bFreeEnergy = FALSE;
 570     for (i = 0; (i < nbonds); )
 571     {
 572         itype = iatoms[i++];
 573         ai    = iatoms[i++];
 574         aj    = iatoms[i++];
 575         gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
 576
 577         /* Get parameters */
 578         switch (ftype)
 579         {
 580             case F_LJ14:
 581                 bFreeEnergy =
 582                     (fr->efep != efepNO &&
 583                      ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
 584                       iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
 585                       iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 586                 qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
 587                 c6               = iparams[itype].lj14.c6A;
 588                 c12              = iparams[itype].lj14.c12A;
 589                 break;
 590             case F_LJC14_Q:
 591                 qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
 592                 c6               = iparams[itype].ljc14.c6;
 593                 c12              = iparams[itype].ljc14.c12;
 594                 break;
 595             case F_LJC_PAIRS_NB:
 596                 qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
 597                 c6               = iparams[itype].ljcnb.c6;
 598                 c12              = iparams[itype].ljcnb.c12;
 599                 break;
 600             default:
 601                 /* Cannot happen since we called gmx_fatal() above in this case */
 602                 qq = c6 = c12 = 0; /* Keep compiler happy */
 603                 break;
 604         }
 605
 606         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 607          * included in the general nfbp array now. This means the tables are scaled down by the
 608          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 609          * be scaled up.
 610          */
 611         c6  *= 6.0;
 612         c12 *= 12.0;
 613
 614         /* Do we need to apply full periodic boundary conditions? */
 615         if (fr->bMolPBC == TRUE)
 616         {
 617             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 618         }
 619         else
 620         {
 621             fshift_index = CENTRAL;
 622             rvec_sub(x[ai], x[aj], dx);
 623         }
 624         r2           = norm2(dx);
 625
 626         if (r2 >= fr->tab14.r*fr->tab14.r)
 627         {
 628             if (warned_rlimit == FALSE)
 629             {
 630                 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
 631                 warned_rlimit = TRUE;
 632             }
 633             continue;
 634         }
 635
 636         if (bFreeEnergy)
 637         {
 638             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 639             qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
 640             c6B              = iparams[itype].lj14.c6B*6.0;
 641             c12B             = iparams[itype].lj14.c12B*12.0;
 642
 643             fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
 644                                                               fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
 645                                                               LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
 646                                                               fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
 647         }
 648         else
 649         {
 650             /* Evaluate tabulated interaction without free energy */
 651             fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
 652         }
 653
 654         energygrp_elec[gid]  += velec;
 655         energygrp_vdw[gid]   += vvdw;
 656         svmul(fscal, dx, dx);
 657
 658         /* Add the forces */
 659         rvec_inc(f[ai], dx);
 660         rvec_dec(f[aj], dx);
 661
 662         if (g)
 663         {
 664             /* Correct the shift forces using the graph */
 665             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 666             fshift_index = IVEC2IS(dt);
 667         }
 668         if (fshift_index != CENTRAL)
 669         {
 670             rvec_inc(fshift[fshift_index], dx);
 671             rvec_dec(fshift[CENTRAL], dx);
 672         }
 673     }
 674     return 0.0;
 675 }