src/gmxlib/nonbonded/nonbonded.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #ifdef GMX_THREAD_MPI
  43 #include <thread_mpi.h>
  44 #endif
  45
  46 #include <stdio.h>
  47 #include <stdlib.h>
  48 #include "typedefs.h"
  49 #include "txtdump.h"
  50 #include "smalloc.h"
  51 #include "ns.h"
  52 #include "vec.h"
  53 #include "maths.h"
  54 #include "macros.h"
  55 #include "string2.h"
  56 #include "force.h"
  57 #include "names.h"
  58 #include "main.h"
  59 #include "xvgr.h"
  60 #include "gmx_fatal.h"
  61 #include "physics.h"
  62 #include "force.h"
  63 #include "bondf.h"
  64 #include "nrnb.h"
  65 #include "smalloc.h"
  66 #include "nonbonded.h"
  67
  68 #include "nb_kernel.h"
  69 #include "nb_free_energy.h"
  70 #include "nb_generic.h"
  71 #include "nb_generic_cg.h"
  72 #include "nb_generic_adress.h"
  73
  74 /* Different default (c) and accelerated interaction-specific kernels */
  75 #include "nb_kernel_c/nb_kernel_c.h"
  76
  77 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
  78 #    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
  79 #endif
  80 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
  81 #    include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
  82 #endif
  83 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
  84 #    include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
  85 #endif
  86 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
  87 #    include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
  88 #endif
  89 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
  90 #    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
  91 #endif
  92 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
  93 #    include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
  94 #endif
  95 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
  96 #    include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
  97 #endif
  98 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
  99 #    include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
 100 #endif
 101 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 102 #    include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
 103 #endif
 104
 105
 106 #ifdef GMX_THREAD_MPI
 107 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 108 #endif
 109 static gmx_bool            nonbonded_setup_done  = FALSE;
 110
 111
 112 void
 113 gmx_nonbonded_setup(FILE *         fplog,
 114                     t_forcerec *   fr,
 115                     gmx_bool       bGenericKernelOnly)
 116 {
 117 #ifdef GMX_THREAD_MPI
 118     tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 119 #endif
 120     /* Here we are guaranteed only one thread made it. */
 121     if (nonbonded_setup_done == FALSE)
 122     {
 123         if (bGenericKernelOnly == FALSE)
 124         {
 125             /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 126             nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
 127
 128             if (!(fr != NULL && fr->use_cpu_acceleration == FALSE))
 129             {
 130                 /* Add interaction-specific kernels for different architectures */
 131                 /* Single precision */
 132 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 133                 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
 134 #endif
 135 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 136                 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
 137 #endif
 138 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 139                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
 140 #endif
 141 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 142                 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
 143 #endif
 144                 /* Double precision */
 145 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 146                 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
 147 #endif
 148 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 149                 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
 150 #endif
 151 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 152                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
 153 #endif
 154 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 155                 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 156 #endif
 157 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 158                 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
 159 #endif
 160                 ; /* empty statement to avoid a completely empty block */
 161             }
 162         }
 163         /* Create a hash for faster lookups */
 164         nb_kernel_list_hash_init();
 165
 166         nonbonded_setup_done = TRUE;
 167     }
 168 #ifdef GMX_THREAD_MPI
 169     tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
 170 #endif
 171 }
 172
 173
 174
 175 void
 176 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwSwitchDiffers)
 177 {
 178     const char *     elec;
 179     const char *     elec_mod;
 180     const char *     vdw;
 181     const char *     vdw_mod;
 182     const char *     geom;
 183     const char *     other;
 184     const char *     vf;
 185
 186     struct
 187     {
 188         const char *  arch;
 189         int           simd_padding_width;
 190     }
 191     arch_and_padding[] =
 192     {
 193         /* Single precision */
 194 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
 195         { "avx_256_single", 8 },
 196 #endif
 197 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 198         { "avx_128_fma_single", 4 },
 199 #endif
 200 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
 201         { "sse4_1_single", 4 },
 202 #endif
 203 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
 204         { "sse2_single", 4 },
 205 #endif
 206         /* Double precision */
 207 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
 208         { "avx_256_double", 4 },
 209 #endif
 210 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
 211         /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
 212          * since the kernels execute a loop unrolled a factor 2, followed by
 213          * a possible single odd-element epilogue.
 214          */
 215         { "avx_128_fma_double", 1 },
 216 #endif
 217 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
 218         /* No padding - see comment above */
 219         { "sse2_double", 1 },
 220 #endif
 221 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
 222         /* No padding - see comment above */
 223         { "sse4_1_double", 1 },
 224 #endif
 225 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 226         /* No padding - see comment above */
 227         { "sparc64_hpc_ace_double", 1 },
 228 #endif
 229         { "c", 1 },
 230     };
 231     int              narch = asize(arch_and_padding);
 232     int              i;
 233
 234     if (nonbonded_setup_done == FALSE)
 235     {
 236         /* We typically call this setup routine before starting timers,
 237          * but if that has not been done for whatever reason we do it now.
 238          */
 239         gmx_nonbonded_setup(NULL, NULL, FALSE);
 240     }
 241
 242     /* Not used yet */
 243     other = "";
 244
 245     nl->kernelptr_vf = NULL;
 246     nl->kernelptr_v  = NULL;
 247     nl->kernelptr_f  = NULL;
 248
 249     elec     = gmx_nbkernel_elec_names[nl->ielec];
 250     elec_mod = eintmod_names[nl->ielecmod];
 251     vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
 252     vdw_mod  = eintmod_names[nl->ivdwmod];
 253     geom     = gmx_nblist_geometry_names[nl->igeometry];
 254
 255     if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
 256     {
 257         nl->kernelptr_vf       = (void *) gmx_nb_generic_adress_kernel;
 258         nl->kernelptr_f        = (void *) gmx_nb_generic_adress_kernel;
 259         nl->simd_padding_width = 1;
 260         return;
 261     }
 262
 263     if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 264     {
 265         nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
 266         nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
 267         nl->simd_padding_width = 1;
 268     }
 269     else if (!gmx_strcasecmp_min(geom, "CG-CG"))
 270     {
 271         nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
 272         nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
 273         nl->simd_padding_width = 1;
 274     }
 275     else
 276     {
 277         /* Try to find a specific kernel first */
 278
 279         for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
 280         {
 281             nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 282             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 283         }
 284         for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
 285         {
 286             nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
 287             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 288
 289             /* If there is not force-only optimized kernel, is there a potential & force one? */
 290             if (nl->kernelptr_f == NULL)
 291             {
 292                 nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 293                 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 294             }
 295         }
 296
 297         /* For now, the accelerated kernels cannot handle the combination of switch functions for both
 298          * electrostatics and VdW that use different switch radius or switch cutoff distances
 299          * (both of them enter in the switch function calculation). This would require
 300          * us to evaluate two completely separate switch functions for every interaction.
 301          * Instead, we disable such kernels by setting the pointer to NULL.
 302          * This will cause the generic kernel (which can handle it) to be called instead.
 303          *
 304          * Note that we typically already enable tabulated coulomb interactions for this case,
 305          * so this is mostly a safe-guard to make sure we call the generic kernel if the
 306          * tables are disabled.
 307          */
 308         if ((nl->ielec != GMX_NBKERNEL_ELEC_NONE) && (nl->ielecmod == eintmodPOTSWITCH) &&
 309             (nl->ivdw  != GMX_NBKERNEL_VDW_NONE)  && (nl->ivdwmod  == eintmodPOTSWITCH) &&
 310             bElecAndVdwSwitchDiffers)
 311         {
 312             nl->kernelptr_vf = NULL;
 313             nl->kernelptr_f  = NULL;
 314         }
 315
 316         /* Give up, pick a generic one instead.
 317          * We only do this for particle-particle kernels; by leaving the water-optimized kernel
 318          * pointers to NULL, the water optimization will automatically be disabled for this interaction.
 319          */
 320         if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom,"Particle-Particle"))
 321         {
 322             nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
 323             nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
 324             nl->simd_padding_width = 1;
 325             if (debug)
 326             {
 327                 fprintf(debug,
 328                         "WARNING - Slow generic NB kernel used for neighborlist with\n"
 329                         "    Elec: '%s', Modifier: '%s'\n"
 330                         "    Vdw:  '%s', Modifier: '%s'\n",
 331                         elec, elec_mod, vdw, vdw_mod);
 332             }
 333         }
 334     }
 335     return;
 336 }
 337
 338 void do_nonbonded(t_commrec *cr, t_forcerec *fr,
 339                   rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
 340                   gmx_grppairener_t *grppener, rvec box_size,
 341                   t_nrnb *nrnb, real *lambda, real *dvdl,
 342                   int nls, int eNL, int flags)
 343 {
 344     t_nblist *        nlist;
 345     int               n, n0, n1, i, i0, i1, sz, range;
 346     t_nblists *       nblists;
 347     nb_kernel_data_t  kernel_data;
 348     nb_kernel_t *     kernelptr = NULL;
 349     rvec *            f;
 350
 351     kernel_data.flags                   = flags;
 352     kernel_data.exclusions              = excl;
 353     kernel_data.lambda                  = lambda;
 354     kernel_data.dvdl                    = dvdl;
 355
 356     if (fr->bAllvsAll)
 357     {
 358         gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
 359         return;
 360     }
 361
 362     if (eNL >= 0)
 363     {
 364         i0 = eNL;
 365         i1 = i0+1;
 366     }
 367     else
 368     {
 369         i0 = 0;
 370         i1 = eNL_NR;
 371     }
 372
 373     if (nls >= 0)
 374     {
 375         n0 = nls;
 376         n1 = nls+1;
 377     }
 378     else
 379     {
 380         n0 = 0;
 381         n1 = fr->nnblists;
 382     }
 383
 384     for (n = n0; (n < n1); n++)
 385     {
 386         nblists = &fr->nblists[n];
 387
 388         kernel_data.table_elec              = &nblists->table_elec;
 389         kernel_data.table_vdw               = &nblists->table_vdw;
 390         kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 391
 392         for (range = 0; range < 2; range++)
 393         {
 394             /* Are we doing short/long-range? */
 395             if (range == 0)
 396             {
 397                 /* Short-range */
 398                 if (!(flags & GMX_NONBONDED_DO_SR))
 399                 {
 400                     continue;
 401                 }
 402                 kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
 403                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
 404                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
 405                 nlist = nblists->nlist_sr;
 406                 f                                   = f_shortrange;
 407             }
 408             else if (range == 1)
 409             {
 410                 /* Long-range */
 411                 if (!(flags & GMX_NONBONDED_DO_LR))
 412                 {
 413                     continue;
 414                 }
 415                 kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
 416                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
 417                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
 418                 nlist = nblists->nlist_lr;
 419                 f                                   = f_longrange;
 420             }
 421
 422             for (i = i0; (i < i1); i++)
 423             {
 424                 if (nlist[i].nri > 0)
 425                 {
 426                     if (flags & GMX_NONBONDED_DO_POTENTIAL)
 427                     {
 428                         /* Potential and force */
 429                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
 430                     }
 431                     else
 432                     {
 433                         /* Force only, no potential */
 434                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
 435                     }
 436
 437                     if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
 438                     {
 439                         /* We don't need the non-perturbed interactions */
 440                         continue;
 441                     }
 442
 443                     if(kernelptr != NULL)
 444                     {
 445                         (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
 446                     }
 447                     else
 448                     {
 449                         gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned.");
 450                     }
 451                 }
 452             }
 453         }
 454     }
 455 }
 456
 457 static void
 458 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
 459 {
 460     gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
 461                 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
 462                 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
 463                 "a smaller molecule you are decoupling during a free energy calculation.\n"
 464                 "Since interactions at distances beyond the table cannot be computed,\n"
 465                 "they are skipped until they are inside the table limit again. You will\n"
 466                 "only see this message once, even if it occurs for several interactions.\n\n"
 467                 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
 468                 "probably something wrong with your system. Only change the table-extension\n"
 469                 "distance in the mdp file if you are really sure that is the reason.\n",
 470                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
 471
 472     if (debug)
 473     {
 474         fprintf(debug,
 475                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
 476                 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
 477                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
 478     }
 479 }
 480
 481
 482
 483 /* This might logically belong better in the nb_generic.c module, but it is only
 484  * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
 485  * extra functional call for every single pair listed in the topology.
 486  */
 487 static real
 488 nb_evaluate_single(real r2, real tabscale, real *vftab,
 489                    real qq, real c6, real c12, real *velec, real *vvdw)
 490 {
 491     real       rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 492     int        ntab;
 493
 494     /* Do the tabulated interactions - first table lookup */
 495     rinv             = gmx_invsqrt(r2);
 496     r                = r2*rinv;
 497     rtab             = r*tabscale;
 498     ntab             = rtab;
 499     eps              = rtab-ntab;
 500     eps2             = eps*eps;
 501     ntab             = 12*ntab;
 502     /* Electrostatics */
 503     Y                = vftab[ntab];
 504     F                = vftab[ntab+1];
 505     Geps             = eps*vftab[ntab+2];
 506     Heps2            = eps2*vftab[ntab+3];
 507     Fp               = F+Geps+Heps2;
 508     VVe              = Y+eps*Fp;
 509     FFe              = Fp+Geps+2.0*Heps2;
 510     /* Dispersion */
 511     Y                = vftab[ntab+4];
 512     F                = vftab[ntab+5];
 513     Geps             = eps*vftab[ntab+6];
 514     Heps2            = eps2*vftab[ntab+7];
 515     Fp               = F+Geps+Heps2;
 516     VVd              = Y+eps*Fp;
 517     FFd              = Fp+Geps+2.0*Heps2;
 518     /* Repulsion */
 519     Y                = vftab[ntab+8];
 520     F                = vftab[ntab+9];
 521     Geps             = eps*vftab[ntab+10];
 522     Heps2            = eps2*vftab[ntab+11];
 523     Fp               = F+Geps+Heps2;
 524     VVr              = Y+eps*Fp;
 525     FFr              = Fp+Geps+2.0*Heps2;
 526
 527     *velec           = qq*VVe;
 528     *vvdw            = c6*VVd+c12*VVr;
 529
 530     fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
 531
 532     return fscal;
 533 }
 534
 535
 536 real
 537 do_nonbonded_listed(int ftype, int nbonds,
 538                     const t_iatom iatoms[], const t_iparams iparams[],
 539                     const rvec x[], rvec f[], rvec fshift[],
 540                     const t_pbc *pbc, const t_graph *g,
 541                     real *lambda, real *dvdl,
 542                     const t_mdatoms *md,
 543                     const t_forcerec *fr, gmx_grppairener_t *grppener,
 544                     int *global_atom_index)
 545 {
 546     int              ielec, ivdw;
 547     real             qq, c6, c12;
 548     rvec             dx;
 549     ivec             dt;
 550     int              i, j, itype, ai, aj, gid;
 551     int              fshift_index;
 552     real             r2, rinv;
 553     real             fscal, velec, vvdw;
 554     real *           energygrp_elec;
 555     real *           energygrp_vdw;
 556     static gmx_bool  warned_rlimit = FALSE;
 557     /* Free energy stuff */
 558     gmx_bool         bFreeEnergy;
 559     real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 560     real             qqB, c6B, c12B, sigma2_def, sigma2_min;
 561
 562
 563     switch (ftype)
 564     {
 565         case F_LJ14:
 566         case F_LJC14_Q:
 567             energygrp_elec = grppener->ener[egCOUL14];
 568             energygrp_vdw  = grppener->ener[egLJ14];
 569             break;
 570         case F_LJC_PAIRS_NB:
 571             energygrp_elec = grppener->ener[egCOULSR];
 572             energygrp_vdw  = grppener->ener[egLJSR];
 573             break;
 574         default:
 575             energygrp_elec = NULL; /* Keep compiler happy */
 576             energygrp_vdw  = NULL; /* Keep compiler happy */
 577             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 578             break;
 579     }
 580
 581     if (fr->efep != efepNO)
 582     {
 583         /* Lambda factor for state A=1-lambda and B=lambda */
 584         LFC[0] = 1.0 - lambda[efptCOUL];
 585         LFV[0] = 1.0 - lambda[efptVDW];
 586         LFC[1] = lambda[efptCOUL];
 587         LFV[1] = lambda[efptVDW];
 588
 589         /*derivative of the lambda factor for state A and B */
 590         DLF[0] = -1;
 591         DLF[1] = 1;
 592
 593         /* precalculate */
 594         sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
 595         sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
 596
 597         for (i = 0; i < 2; i++)
 598         {
 599             lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 600             dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
 601             lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 602             dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
 603         }
 604     }
 605     else
 606     {
 607         sigma2_min = sigma2_def = 0;
 608     }
 609
 610     bFreeEnergy = FALSE;
 611     for (i = 0; (i < nbonds); )
 612     {
 613         itype = iatoms[i++];
 614         ai    = iatoms[i++];
 615         aj    = iatoms[i++];
 616         gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
 617
 618         /* Get parameters */
 619         switch (ftype)
 620         {
 621             case F_LJ14:
 622                 bFreeEnergy =
 623                     (fr->efep != efepNO &&
 624                      ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
 625                       iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
 626                       iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 627                 qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
 628                 c6               = iparams[itype].lj14.c6A;
 629                 c12              = iparams[itype].lj14.c12A;
 630                 break;
 631             case F_LJC14_Q:
 632                 qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
 633                 c6               = iparams[itype].ljc14.c6;
 634                 c12              = iparams[itype].ljc14.c12;
 635                 break;
 636             case F_LJC_PAIRS_NB:
 637                 qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
 638                 c6               = iparams[itype].ljcnb.c6;
 639                 c12              = iparams[itype].ljcnb.c12;
 640                 break;
 641             default:
 642                 /* Cannot happen since we called gmx_fatal() above in this case */
 643                 qq = c6 = c12 = 0; /* Keep compiler happy */
 644                 break;
 645         }
 646
 647         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 648          * included in the general nfbp array now. This means the tables are scaled down by the
 649          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 650          * be scaled up.
 651          */
 652         c6  *= 6.0;
 653         c12 *= 12.0;
 654
 655         /* Do we need to apply full periodic boundary conditions? */
 656         if (fr->bMolPBC == TRUE)
 657         {
 658             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 659         }
 660         else
 661         {
 662             fshift_index = CENTRAL;
 663             rvec_sub(x[ai], x[aj], dx);
 664         }
 665         r2           = norm2(dx);
 666
 667         if (r2 >= fr->tab14.r*fr->tab14.r)
 668         {
 669             if (warned_rlimit == FALSE)
 670             {
 671                 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
 672                 warned_rlimit = TRUE;
 673             }
 674             continue;
 675         }
 676
 677         if (bFreeEnergy)
 678         {
 679             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 680             qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
 681             c6B              = iparams[itype].lj14.c6B*6.0;
 682             c12B             = iparams[itype].lj14.c12B*12.0;
 683
 684             fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
 685                                                               fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
 686                                                               LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
 687                                                               fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
 688         }
 689         else
 690         {
 691             /* Evaluate tabulated interaction without free energy */
 692             fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
 693         }
 694
 695         energygrp_elec[gid]  += velec;
 696         energygrp_vdw[gid]   += vvdw;
 697         svmul(fscal, dx, dx);
 698
 699         /* Add the forces */
 700         rvec_inc(f[ai], dx);
 701         rvec_dec(f[aj], dx);
 702
 703         if (g)
 704         {
 705             /* Correct the shift forces using the graph */
 706             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 707             fshift_index = IVEC2IS(dt);
 708         }
 709         if (fshift_index != CENTRAL)
 710         {
 711             rvec_inc(fshift[fshift_index], dx);
 712             rvec_dec(fshift[CENTRAL], dx);
 713         }
 714     }
 715     return 0.0;
 716 }