src/gromacs/gmxlib/nonbonded/nonbonded.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "gromacs/legacyheaders/nonbonded.h"
  40
  41 #include "config.h"
  42
  43 #include <stdio.h>
  44 #include <stdlib.h>
  45
  46 #include "thread_mpi/threads.h"
  47
  48 #include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
  49 #include "gromacs/gmxlib/nonbonded/nb_generic.h"
  50 #include "gromacs/gmxlib/nonbonded/nb_generic_adress.h"
  51 #include "gromacs/gmxlib/nonbonded/nb_generic_cg.h"
  52 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  53 #include "gromacs/legacyheaders/force.h"
  54 #include "gromacs/legacyheaders/macros.h"
  55 #include "gromacs/legacyheaders/names.h"
  56 #include "gromacs/legacyheaders/nrnb.h"
  57 #include "gromacs/legacyheaders/ns.h"
  58 #include "gromacs/legacyheaders/txtdump.h"
  59 #include "gromacs/legacyheaders/typedefs.h"
  60 #include "gromacs/listed-forces/bonded.h"
  61 #include "gromacs/math/utilities.h"
  62 #include "gromacs/math/vec.h"
  63 #include "gromacs/pbcutil/ishift.h"
  64 #include "gromacs/pbcutil/mshift.h"
  65 #include "gromacs/pbcutil/pbc.h"
  66 #include "gromacs/simd/simd.h"
  67 #include "gromacs/utility/cstringutil.h"
  68 #include "gromacs/utility/fatalerror.h"
  69 #include "gromacs/utility/smalloc.h"
  70
  71 /* Different default (c) and SIMD instructions interaction-specific kernels */
  72 #include "gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h"
  73
  74 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
  75 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h"
  76 #endif
  77 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
  78 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
  79 #endif
  80 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
  81 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
  82 #endif
  83 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
  84 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
  85 #endif
  86 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
  87 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h"
  88 #endif
  89 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
  90 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
  91 #endif
  92 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
  93 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
  94 #endif
  95 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
  96 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
  97 #endif
  98 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
  99 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
 100 #endif
 101
 102
 103 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 104 static gmx_bool            nonbonded_setup_done  = FALSE;
 105
 106
 107 void
 108 gmx_nonbonded_setup(t_forcerec *   fr,
 109                     gmx_bool       bGenericKernelOnly)
 110 {
 111     tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 112     /* Here we are guaranteed only one thread made it. */
 113     if (nonbonded_setup_done == FALSE)
 114     {
 115         if (bGenericKernelOnly == FALSE)
 116         {
 117             /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 118             nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
 119
 120             if (!(fr != NULL && fr->use_simd_kernels == FALSE))
 121             {
 122                 /* Add interaction-specific kernels for different architectures */
 123                 /* Single precision */
 124 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
 125                 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
 126 #endif
 127 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
 128                 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
 129 #endif
 130 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 131                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
 132 #endif
 133 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
 134                 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
 135 #endif
 136                 /* Double precision */
 137 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
 138                 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
 139 #endif
 140 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
 141                 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
 142 #endif
 143 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
 144                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
 145 #endif
 146 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
 147                 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 148 #endif
 149 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 150                 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
 151 #endif
 152                 ; /* empty statement to avoid a completely empty block */
 153             }
 154         }
 155         /* Create a hash for faster lookups */
 156         nb_kernel_list_hash_init();
 157
 158         nonbonded_setup_done = TRUE;
 159     }
 160     tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
 161 }
 162
 163
 164
 165 void
 166 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwSwitchDiffers)
 167 {
 168     const char *     elec;
 169     const char *     elec_mod;
 170     const char *     vdw;
 171     const char *     vdw_mod;
 172     const char *     geom;
 173     const char *     other;
 174     const char *     vf;
 175
 176     struct
 177     {
 178         const char *  arch;
 179         int           simd_padding_width;
 180     }
 181     arch_and_padding[] =
 182     {
 183         /* Single precision */
 184 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
 185         { "avx_256_single", 8 },
 186 #endif
 187 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 188         { "avx_128_fma_single", 4 },
 189 #endif
 190 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
 191         { "sse4_1_single", 4 },
 192 #endif
 193 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
 194         { "sse2_single", 4 },
 195 #endif
 196         /* Double precision */
 197 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
 198         { "avx_256_double", 4 },
 199 #endif
 200 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
 201         /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
 202          * since the kernels execute a loop unrolled a factor 2, followed by
 203          * a possible single odd-element epilogue.
 204          */
 205         { "avx_128_fma_double", 1 },
 206 #endif
 207 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
 208         /* No padding - see comment above */
 209         { "sse2_double", 1 },
 210 #endif
 211 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
 212         /* No padding - see comment above */
 213         { "sse4_1_double", 1 },
 214 #endif
 215 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 216         /* No padding - see comment above */
 217         { "sparc64_hpc_ace_double", 1 },
 218 #endif
 219         { "c", 1 },
 220     };
 221     int              narch = asize(arch_and_padding);
 222     int              i;
 223
 224     if (nonbonded_setup_done == FALSE)
 225     {
 226         /* We typically call this setup routine before starting timers,
 227          * but if that has not been done for whatever reason we do it now.
 228          */
 229         gmx_nonbonded_setup(NULL, FALSE);
 230     }
 231
 232     /* Not used yet */
 233     other = "";
 234
 235     nl->kernelptr_vf = NULL;
 236     nl->kernelptr_v  = NULL;
 237     nl->kernelptr_f  = NULL;
 238
 239     elec     = gmx_nbkernel_elec_names[nl->ielec];
 240     elec_mod = eintmod_names[nl->ielecmod];
 241     vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
 242     vdw_mod  = eintmod_names[nl->ivdwmod];
 243     geom     = gmx_nblist_geometry_names[nl->igeometry];
 244
 245     if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
 246     {
 247         nl->kernelptr_vf       = (void *) gmx_nb_generic_adress_kernel;
 248         nl->kernelptr_f        = (void *) gmx_nb_generic_adress_kernel;
 249         nl->simd_padding_width = 1;
 250         return;
 251     }
 252
 253     if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 254     {
 255         nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
 256         nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
 257         nl->simd_padding_width = 1;
 258     }
 259     else if (!gmx_strcasecmp_min(geom, "CG-CG"))
 260     {
 261         nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
 262         nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
 263         nl->simd_padding_width = 1;
 264     }
 265     else
 266     {
 267         /* Try to find a specific kernel first */
 268
 269         for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
 270         {
 271             nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 272             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 273         }
 274         for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
 275         {
 276             nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
 277             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 278
 279             /* If there is not force-only optimized kernel, is there a potential & force one? */
 280             if (nl->kernelptr_f == NULL)
 281             {
 282                 nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 283                 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 284             }
 285         }
 286
 287         /* For now, the accelerated kernels cannot handle the combination of switch functions for both
 288          * electrostatics and VdW that use different switch radius or switch cutoff distances
 289          * (both of them enter in the switch function calculation). This would require
 290          * us to evaluate two completely separate switch functions for every interaction.
 291          * Instead, we disable such kernels by setting the pointer to NULL.
 292          * This will cause the generic kernel (which can handle it) to be called instead.
 293          *
 294          * Note that we typically already enable tabulated coulomb interactions for this case,
 295          * so this is mostly a safe-guard to make sure we call the generic kernel if the
 296          * tables are disabled.
 297          */
 298         if ((nl->ielec != GMX_NBKERNEL_ELEC_NONE) && (nl->ielecmod == eintmodPOTSWITCH) &&
 299             (nl->ivdw  != GMX_NBKERNEL_VDW_NONE)  && (nl->ivdwmod  == eintmodPOTSWITCH) &&
 300             bElecAndVdwSwitchDiffers)
 301         {
 302             nl->kernelptr_vf = NULL;
 303             nl->kernelptr_f  = NULL;
 304         }
 305
 306         /* Give up, pick a generic one instead.
 307          * We only do this for particle-particle kernels; by leaving the water-optimized kernel
 308          * pointers to NULL, the water optimization will automatically be disabled for this interaction.
 309          */
 310         if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
 311         {
 312             nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
 313             nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
 314             nl->simd_padding_width = 1;
 315             if (debug)
 316             {
 317                 fprintf(debug,
 318                         "WARNING - Slow generic NB kernel used for neighborlist with\n"
 319                         "    Elec: '%s', Modifier: '%s'\n"
 320                         "    Vdw:  '%s', Modifier: '%s'\n",
 321                         elec, elec_mod, vdw, vdw_mod);
 322             }
 323         }
 324     }
 325     return;
 326 }
 327
 328 void do_nonbonded(t_forcerec *fr,
 329                   rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
 330                   gmx_grppairener_t *grppener,
 331                   t_nrnb *nrnb, real *lambda, real *dvdl,
 332                   int nls, int eNL, int flags)
 333 {
 334     t_nblist *        nlist;
 335     int               n, n0, n1, i, i0, i1, sz, range;
 336     t_nblists *       nblists;
 337     nb_kernel_data_t  kernel_data;
 338     nb_kernel_t *     kernelptr = NULL;
 339     rvec *            f;
 340
 341     kernel_data.flags                   = flags;
 342     kernel_data.exclusions              = excl;
 343     kernel_data.lambda                  = lambda;
 344     kernel_data.dvdl                    = dvdl;
 345
 346     if (fr->bAllvsAll)
 347     {
 348         gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
 349         return;
 350     }
 351
 352     if (eNL >= 0)
 353     {
 354         i0 = eNL;
 355         i1 = i0+1;
 356     }
 357     else
 358     {
 359         i0 = 0;
 360         i1 = eNL_NR;
 361     }
 362
 363     if (nls >= 0)
 364     {
 365         n0 = nls;
 366         n1 = nls+1;
 367     }
 368     else
 369     {
 370         n0 = 0;
 371         n1 = fr->nnblists;
 372     }
 373
 374     for (n = n0; (n < n1); n++)
 375     {
 376         nblists = &fr->nblists[n];
 377
 378         kernel_data.table_elec              = &nblists->table_elec;
 379         kernel_data.table_vdw               = &nblists->table_vdw;
 380         kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 381
 382         for (range = 0; range < 2; range++)
 383         {
 384             /* Are we doing short/long-range? */
 385             if (range == 0)
 386             {
 387                 /* Short-range */
 388                 if (!(flags & GMX_NONBONDED_DO_SR))
 389                 {
 390                     continue;
 391                 }
 392                 kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
 393                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
 394                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
 395                 nlist = nblists->nlist_sr;
 396                 f                                   = f_shortrange;
 397             }
 398             else
 399             {
 400                 /* Long-range */
 401                 if (!(flags & GMX_NONBONDED_DO_LR))
 402                 {
 403                     continue;
 404                 }
 405                 kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
 406                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
 407                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
 408                 nlist = nblists->nlist_lr;
 409                 f                                   = f_longrange;
 410             }
 411
 412             for (i = i0; (i < i1); i++)
 413             {
 414                 if (nlist[i].nri > 0)
 415                 {
 416                     if (flags & GMX_NONBONDED_DO_POTENTIAL)
 417                     {
 418                         /* Potential and force */
 419                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
 420                     }
 421                     else
 422                     {
 423                         /* Force only, no potential */
 424                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
 425                     }
 426
 427                     if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
 428                     {
 429                         /* We don't need the non-perturbed interactions */
 430                         continue;
 431                     }
 432                     /* Neighborlists whose kernelptr==NULL will always be empty */
 433                     if (kernelptr != NULL)
 434                     {
 435                         (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
 436                     }
 437                     else
 438                     {
 439                         gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned.");
 440                     }
 441                 }
 442             }
 443         }
 444     }
 445 }
 446
 447 static void
 448 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
 449 {
 450     gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
 451                 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
 452                 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
 453                 "a smaller molecule you are decoupling during a free energy calculation.\n"
 454                 "Since interactions at distances beyond the table cannot be computed,\n"
 455                 "they are skipped until they are inside the table limit again. You will\n"
 456                 "only see this message once, even if it occurs for several interactions.\n\n"
 457                 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
 458                 "probably something wrong with your system. Only change the table-extension\n"
 459                 "distance in the mdp file if you are really sure that is the reason.\n",
 460                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
 461
 462     if (debug)
 463     {
 464         fprintf(debug,
 465                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
 466                 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
 467                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
 468     }
 469 }
 470
 471
 472
 473 /* This might logically belong better in the nb_generic.c module, but it is only
 474  * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
 475  * extra functional call for every single pair listed in the topology.
 476  */
 477 static real
 478 nb_evaluate_single(real r2, real tabscale, real *vftab,
 479                    real qq, real c6, real c12, real *velec, real *vvdw)
 480 {
 481     real       rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 482     int        ntab;
 483
 484     /* Do the tabulated interactions - first table lookup */
 485     rinv             = gmx_invsqrt(r2);
 486     r                = r2*rinv;
 487     rtab             = r*tabscale;
 488     ntab             = rtab;
 489     eps              = rtab-ntab;
 490     eps2             = eps*eps;
 491     ntab             = 12*ntab;
 492     /* Electrostatics */
 493     Y                = vftab[ntab];
 494     F                = vftab[ntab+1];
 495     Geps             = eps*vftab[ntab+2];
 496     Heps2            = eps2*vftab[ntab+3];
 497     Fp               = F+Geps+Heps2;
 498     VVe              = Y+eps*Fp;
 499     FFe              = Fp+Geps+2.0*Heps2;
 500     /* Dispersion */
 501     Y                = vftab[ntab+4];
 502     F                = vftab[ntab+5];
 503     Geps             = eps*vftab[ntab+6];
 504     Heps2            = eps2*vftab[ntab+7];
 505     Fp               = F+Geps+Heps2;
 506     VVd              = Y+eps*Fp;
 507     FFd              = Fp+Geps+2.0*Heps2;
 508     /* Repulsion */
 509     Y                = vftab[ntab+8];
 510     F                = vftab[ntab+9];
 511     Geps             = eps*vftab[ntab+10];
 512     Heps2            = eps2*vftab[ntab+11];
 513     Fp               = F+Geps+Heps2;
 514     VVr              = Y+eps*Fp;
 515     FFr              = Fp+Geps+2.0*Heps2;
 516
 517     *velec           = qq*VVe;
 518     *vvdw            = c6*VVd+c12*VVr;
 519
 520     fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
 521
 522     return fscal;
 523 }
 524
 525
 526 real
 527 do_nonbonded_listed(int ftype, int nbonds,
 528                     const t_iatom iatoms[], const t_iparams iparams[],
 529                     const rvec x[], rvec f[], rvec fshift[],
 530                     const t_pbc *pbc, const t_graph *g,
 531                     real *lambda, real *dvdl,
 532                     const t_mdatoms *md,
 533                     const t_forcerec *fr, gmx_grppairener_t *grppener,
 534                     int *global_atom_index)
 535 {
 536     int              ielec, ivdw;
 537     real             qq, c6, c12;
 538     rvec             dx;
 539     ivec             dt;
 540     int              i, j, itype, ai, aj, gid;
 541     int              fshift_index;
 542     real             r2, rinv;
 543     real             fscal, velec, vvdw;
 544     real *           energygrp_elec;
 545     real *           energygrp_vdw;
 546     static gmx_bool  warned_rlimit = FALSE;
 547     /* Free energy stuff */
 548     gmx_bool         bFreeEnergy;
 549     real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 550     real             qqB, c6B, c12B, sigma2_def, sigma2_min;
 551
 552
 553     switch (ftype)
 554     {
 555         case F_LJ14:
 556         case F_LJC14_Q:
 557             energygrp_elec = grppener->ener[egCOUL14];
 558             energygrp_vdw  = grppener->ener[egLJ14];
 559             break;
 560         case F_LJC_PAIRS_NB:
 561             energygrp_elec = grppener->ener[egCOULSR];
 562             energygrp_vdw  = grppener->ener[egLJSR];
 563             break;
 564         default:
 565             energygrp_elec = NULL; /* Keep compiler happy */
 566             energygrp_vdw  = NULL; /* Keep compiler happy */
 567             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 568             break;
 569     }
 570
 571     if (fr->efep != efepNO)
 572     {
 573         /* Lambda factor for state A=1-lambda and B=lambda */
 574         LFC[0] = 1.0 - lambda[efptCOUL];
 575         LFV[0] = 1.0 - lambda[efptVDW];
 576         LFC[1] = lambda[efptCOUL];
 577         LFV[1] = lambda[efptVDW];
 578
 579         /*derivative of the lambda factor for state A and B */
 580         DLF[0] = -1;
 581         DLF[1] = 1;
 582
 583         /* precalculate */
 584         sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
 585         sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
 586
 587         for (i = 0; i < 2; i++)
 588         {
 589             lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 590             dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
 591             lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 592             dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
 593         }
 594     }
 595     else
 596     {
 597         sigma2_min = sigma2_def = 0;
 598     }
 599
 600     bFreeEnergy = FALSE;
 601     for (i = 0; (i < nbonds); )
 602     {
 603         itype = iatoms[i++];
 604         ai    = iatoms[i++];
 605         aj    = iatoms[i++];
 606         gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
 607
 608         /* Get parameters */
 609         switch (ftype)
 610         {
 611             case F_LJ14:
 612                 bFreeEnergy =
 613                     (fr->efep != efepNO &&
 614                      ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
 615                       iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
 616                       iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 617                 qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
 618                 c6               = iparams[itype].lj14.c6A;
 619                 c12              = iparams[itype].lj14.c12A;
 620                 break;
 621             case F_LJC14_Q:
 622                 qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
 623                 c6               = iparams[itype].ljc14.c6;
 624                 c12              = iparams[itype].ljc14.c12;
 625                 break;
 626             case F_LJC_PAIRS_NB:
 627                 qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
 628                 c6               = iparams[itype].ljcnb.c6;
 629                 c12              = iparams[itype].ljcnb.c12;
 630                 break;
 631             default:
 632                 /* Cannot happen since we called gmx_fatal() above in this case */
 633                 qq = c6 = c12 = 0; /* Keep compiler happy */
 634                 break;
 635         }
 636
 637         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 638          * included in the general nfbp array now. This means the tables are scaled down by the
 639          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 640          * be scaled up.
 641          */
 642         c6  *= 6.0;
 643         c12 *= 12.0;
 644
 645         /* Do we need to apply full periodic boundary conditions? */
 646         if (fr->bMolPBC == TRUE)
 647         {
 648             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 649         }
 650         else
 651         {
 652             fshift_index = CENTRAL;
 653             rvec_sub(x[ai], x[aj], dx);
 654         }
 655         r2           = norm2(dx);
 656
 657         if (r2 >= fr->tab14.r*fr->tab14.r)
 658         {
 659             /* This check isn't race free. But it doesn't matter because if a race occurs the only
 660              * disadvantage is that the warning is printed twice */
 661             if (warned_rlimit == FALSE)
 662             {
 663                 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
 664                 warned_rlimit = TRUE;
 665             }
 666             continue;
 667         }
 668
 669         if (bFreeEnergy)
 670         {
 671             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 672             qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
 673             c6B              = iparams[itype].lj14.c6B*6.0;
 674             c12B             = iparams[itype].lj14.c12B*12.0;
 675
 676             fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
 677                                                               fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
 678                                                               LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
 679                                                               fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
 680         }
 681         else
 682         {
 683             /* Evaluate tabulated interaction without free energy */
 684             fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
 685         }
 686
 687         energygrp_elec[gid]  += velec;
 688         energygrp_vdw[gid]   += vvdw;
 689         svmul(fscal, dx, dx);
 690
 691         /* Add the forces */
 692         rvec_inc(f[ai], dx);
 693         rvec_dec(f[aj], dx);
 694
 695         if (g)
 696         {
 697             /* Correct the shift forces using the graph */
 698             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 699             fshift_index = IVEC2IS(dt);
 700         }
 701         if (fshift_index != CENTRAL)
 702         {
 703             rvec_inc(fshift[fshift_index], dx);
 704             rvec_dec(fshift[CENTRAL], dx);
 705         }
 706     }
 707     return 0.0;
 708 }