src/gromacs/gmxlib/nonbonded/nonbonded.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "config.h"
  40
  41 #include <stdio.h>
  42 #include <stdlib.h>
  43
  44 #include "thread_mpi/threads.h"
  45
  46 #include "gromacs/bonded/bonded.h"
  47 #include "gromacs/legacyheaders/typedefs.h"
  48 #include "gromacs/legacyheaders/txtdump.h"
  49 #include "gromacs/legacyheaders/ns.h"
  50 #include "gromacs/math/vec.h"
  51 #include "gromacs/math/utilities.h"
  52 #include "gromacs/legacyheaders/macros.h"
  53 #include "gromacs/legacyheaders/force.h"
  54 #include "gromacs/legacyheaders/names.h"
  55 #include "gromacs/legacyheaders/force.h"
  56 #include "gromacs/legacyheaders/nrnb.h"
  57 #include "gromacs/legacyheaders/nonbonded.h"
  58 #include "gromacs/simd/simd.h"
  59
  60 #include "gromacs/pbcutil/ishift.h"
  61 #include "gromacs/pbcutil/mshift.h"
  62 #include "gromacs/pbcutil/pbc.h"
  63 #include "gromacs/utility/cstringutil.h"
  64 #include "gromacs/utility/fatalerror.h"
  65 #include "gromacs/utility/smalloc.h"
  66
  67 #include "nb_kernel.h"
  68 #include "nb_free_energy.h"
  69 #include "nb_generic.h"
  70 #include "nb_generic_cg.h"
  71 #include "nb_generic_adress.h"
  72
  73 /* Different default (c) and SIMD instructions interaction-specific kernels */
  74 #include "nb_kernel_c/nb_kernel_c.h"
  75
  76 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
  77 #    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
  78 #endif
  79 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
  80 #    include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
  81 #endif
  82 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
  83 #    include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
  84 #endif
  85 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
  86 #    include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
  87 #endif
  88 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
  89 #    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
  90 #endif
  91 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
  92 #    include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
  93 #endif
  94 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
  95 #    include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
  96 #endif
  97 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
  98 #    include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
  99 #endif
 100 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 101 #    include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
 102 #endif
 103
 104
 105 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 106 static gmx_bool            nonbonded_setup_done  = FALSE;
 107
 108
 109 void
 110 gmx_nonbonded_setup(t_forcerec *   fr,
 111                     gmx_bool       bGenericKernelOnly)
 112 {
 113     tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 114     /* Here we are guaranteed only one thread made it. */
 115     if (nonbonded_setup_done == FALSE)
 116     {
 117         if (bGenericKernelOnly == FALSE)
 118         {
 119             /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 120             nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
 121
 122             if (!(fr != NULL && fr->use_simd_kernels == FALSE))
 123             {
 124                 /* Add interaction-specific kernels for different architectures */
 125                 /* Single precision */
 126 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
 127                 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
 128 #endif
 129 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
 130                 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
 131 #endif
 132 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 133                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
 134 #endif
 135 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
 136                 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
 137 #endif
 138                 /* Double precision */
 139 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
 140                 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
 141 #endif
 142 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
 143                 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
 144 #endif
 145 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
 146                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
 147 #endif
 148 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
 149                 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 150 #endif
 151 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 152                 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
 153 #endif
 154                 ; /* empty statement to avoid a completely empty block */
 155             }
 156         }
 157         /* Create a hash for faster lookups */
 158         nb_kernel_list_hash_init();
 159
 160         nonbonded_setup_done = TRUE;
 161     }
 162     tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
 163 }
 164
 165
 166
 167 void
 168 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwSwitchDiffers)
 169 {
 170     const char *     elec;
 171     const char *     elec_mod;
 172     const char *     vdw;
 173     const char *     vdw_mod;
 174     const char *     geom;
 175     const char *     other;
 176     const char *     vf;
 177
 178     struct
 179     {
 180         const char *  arch;
 181         int           simd_padding_width;
 182     }
 183     arch_and_padding[] =
 184     {
 185         /* Single precision */
 186 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
 187         { "avx_256_single", 8 },
 188 #endif
 189 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
 190         { "avx_128_fma_single", 4 },
 191 #endif
 192 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
 193         { "sse4_1_single", 4 },
 194 #endif
 195 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
 196         { "sse2_single", 4 },
 197 #endif
 198         /* Double precision */
 199 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
 200         { "avx_256_double", 4 },
 201 #endif
 202 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
 203         /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
 204          * since the kernels execute a loop unrolled a factor 2, followed by
 205          * a possible single odd-element epilogue.
 206          */
 207         { "avx_128_fma_double", 1 },
 208 #endif
 209 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
 210         /* No padding - see comment above */
 211         { "sse2_double", 1 },
 212 #endif
 213 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
 214         /* No padding - see comment above */
 215         { "sse4_1_double", 1 },
 216 #endif
 217 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
 218         /* No padding - see comment above */
 219         { "sparc64_hpc_ace_double", 1 },
 220 #endif
 221         { "c", 1 },
 222     };
 223     int              narch = asize(arch_and_padding);
 224     int              i;
 225
 226     if (nonbonded_setup_done == FALSE)
 227     {
 228         /* We typically call this setup routine before starting timers,
 229          * but if that has not been done for whatever reason we do it now.
 230          */
 231         gmx_nonbonded_setup(NULL, FALSE);
 232     }
 233
 234     /* Not used yet */
 235     other = "";
 236
 237     nl->kernelptr_vf = NULL;
 238     nl->kernelptr_v  = NULL;
 239     nl->kernelptr_f  = NULL;
 240
 241     elec     = gmx_nbkernel_elec_names[nl->ielec];
 242     elec_mod = eintmod_names[nl->ielecmod];
 243     vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
 244     vdw_mod  = eintmod_names[nl->ivdwmod];
 245     geom     = gmx_nblist_geometry_names[nl->igeometry];
 246
 247     if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
 248     {
 249         nl->kernelptr_vf       = (void *) gmx_nb_generic_adress_kernel;
 250         nl->kernelptr_f        = (void *) gmx_nb_generic_adress_kernel;
 251         nl->simd_padding_width = 1;
 252         return;
 253     }
 254
 255     if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 256     {
 257         nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
 258         nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
 259         nl->simd_padding_width = 1;
 260     }
 261     else if (!gmx_strcasecmp_min(geom, "CG-CG"))
 262     {
 263         nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
 264         nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
 265         nl->simd_padding_width = 1;
 266     }
 267     else
 268     {
 269         /* Try to find a specific kernel first */
 270
 271         for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
 272         {
 273             nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 274             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 275         }
 276         for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
 277         {
 278             nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
 279             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 280
 281             /* If there is not force-only optimized kernel, is there a potential & force one? */
 282             if (nl->kernelptr_f == NULL)
 283             {
 284                 nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 285                 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 286             }
 287         }
 288
 289         /* For now, the accelerated kernels cannot handle the combination of switch functions for both
 290          * electrostatics and VdW that use different switch radius or switch cutoff distances
 291          * (both of them enter in the switch function calculation). This would require
 292          * us to evaluate two completely separate switch functions for every interaction.
 293          * Instead, we disable such kernels by setting the pointer to NULL.
 294          * This will cause the generic kernel (which can handle it) to be called instead.
 295          *
 296          * Note that we typically already enable tabulated coulomb interactions for this case,
 297          * so this is mostly a safe-guard to make sure we call the generic kernel if the
 298          * tables are disabled.
 299          */
 300         if ((nl->ielec != GMX_NBKERNEL_ELEC_NONE) && (nl->ielecmod == eintmodPOTSWITCH) &&
 301             (nl->ivdw  != GMX_NBKERNEL_VDW_NONE)  && (nl->ivdwmod  == eintmodPOTSWITCH) &&
 302             bElecAndVdwSwitchDiffers)
 303         {
 304             nl->kernelptr_vf = NULL;
 305             nl->kernelptr_f  = NULL;
 306         }
 307
 308         /* Give up, pick a generic one instead.
 309          * We only do this for particle-particle kernels; by leaving the water-optimized kernel
 310          * pointers to NULL, the water optimization will automatically be disabled for this interaction.
 311          */
 312         if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
 313         {
 314             nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
 315             nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
 316             nl->simd_padding_width = 1;
 317             if (debug)
 318             {
 319                 fprintf(debug,
 320                         "WARNING - Slow generic NB kernel used for neighborlist with\n"
 321                         "    Elec: '%s', Modifier: '%s'\n"
 322                         "    Vdw:  '%s', Modifier: '%s'\n",
 323                         elec, elec_mod, vdw, vdw_mod);
 324             }
 325         }
 326     }
 327     return;
 328 }
 329
 330 void do_nonbonded(t_forcerec *fr,
 331                   rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
 332                   gmx_grppairener_t *grppener,
 333                   t_nrnb *nrnb, real *lambda, real *dvdl,
 334                   int nls, int eNL, int flags)
 335 {
 336     t_nblist *        nlist;
 337     int               n, n0, n1, i, i0, i1, sz, range;
 338     t_nblists *       nblists;
 339     nb_kernel_data_t  kernel_data;
 340     nb_kernel_t *     kernelptr = NULL;
 341     rvec *            f;
 342
 343     kernel_data.flags                   = flags;
 344     kernel_data.exclusions              = excl;
 345     kernel_data.lambda                  = lambda;
 346     kernel_data.dvdl                    = dvdl;
 347
 348     if (fr->bAllvsAll)
 349     {
 350         gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
 351         return;
 352     }
 353
 354     if (eNL >= 0)
 355     {
 356         i0 = eNL;
 357         i1 = i0+1;
 358     }
 359     else
 360     {
 361         i0 = 0;
 362         i1 = eNL_NR;
 363     }
 364
 365     if (nls >= 0)
 366     {
 367         n0 = nls;
 368         n1 = nls+1;
 369     }
 370     else
 371     {
 372         n0 = 0;
 373         n1 = fr->nnblists;
 374     }
 375
 376     for (n = n0; (n < n1); n++)
 377     {
 378         nblists = &fr->nblists[n];
 379
 380         kernel_data.table_elec              = &nblists->table_elec;
 381         kernel_data.table_vdw               = &nblists->table_vdw;
 382         kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
 383
 384         for (range = 0; range < 2; range++)
 385         {
 386             /* Are we doing short/long-range? */
 387             if (range == 0)
 388             {
 389                 /* Short-range */
 390                 if (!(flags & GMX_NONBONDED_DO_SR))
 391                 {
 392                     continue;
 393                 }
 394                 kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
 395                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
 396                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
 397                 nlist = nblists->nlist_sr;
 398                 f                                   = f_shortrange;
 399             }
 400             else
 401             {
 402                 /* Long-range */
 403                 if (!(flags & GMX_NONBONDED_DO_LR))
 404                 {
 405                     continue;
 406                 }
 407                 kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
 408                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
 409                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
 410                 nlist = nblists->nlist_lr;
 411                 f                                   = f_longrange;
 412             }
 413
 414             for (i = i0; (i < i1); i++)
 415             {
 416                 if (nlist[i].nri > 0)
 417                 {
 418                     if (flags & GMX_NONBONDED_DO_POTENTIAL)
 419                     {
 420                         /* Potential and force */
 421                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
 422                     }
 423                     else
 424                     {
 425                         /* Force only, no potential */
 426                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
 427                     }
 428
 429                     if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
 430                     {
 431                         /* We don't need the non-perturbed interactions */
 432                         continue;
 433                     }
 434                     /* Neighborlists whose kernelptr==NULL will always be empty */
 435                     if (kernelptr != NULL)
 436                     {
 437                         (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
 438                     }
 439                     else
 440                     {
 441                         gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned.");
 442                     }
 443                 }
 444             }
 445         }
 446     }
 447 }
 448
 449 static void
 450 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
 451 {
 452     gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
 453                 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
 454                 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
 455                 "a smaller molecule you are decoupling during a free energy calculation.\n"
 456                 "Since interactions at distances beyond the table cannot be computed,\n"
 457                 "they are skipped until they are inside the table limit again. You will\n"
 458                 "only see this message once, even if it occurs for several interactions.\n\n"
 459                 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
 460                 "probably something wrong with your system. Only change the table-extension\n"
 461                 "distance in the mdp file if you are really sure that is the reason.\n",
 462                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
 463
 464     if (debug)
 465     {
 466         fprintf(debug,
 467                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
 468                 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
 469                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
 470     }
 471 }
 472
 473
 474
 475 /* This might logically belong better in the nb_generic.c module, but it is only
 476  * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
 477  * extra functional call for every single pair listed in the topology.
 478  */
 479 static real
 480 nb_evaluate_single(real r2, real tabscale, real *vftab,
 481                    real qq, real c6, real c12, real *velec, real *vvdw)
 482 {
 483     real       rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
 484     int        ntab;
 485
 486     /* Do the tabulated interactions - first table lookup */
 487     rinv             = gmx_invsqrt(r2);
 488     r                = r2*rinv;
 489     rtab             = r*tabscale;
 490     ntab             = rtab;
 491     eps              = rtab-ntab;
 492     eps2             = eps*eps;
 493     ntab             = 12*ntab;
 494     /* Electrostatics */
 495     Y                = vftab[ntab];
 496     F                = vftab[ntab+1];
 497     Geps             = eps*vftab[ntab+2];
 498     Heps2            = eps2*vftab[ntab+3];
 499     Fp               = F+Geps+Heps2;
 500     VVe              = Y+eps*Fp;
 501     FFe              = Fp+Geps+2.0*Heps2;
 502     /* Dispersion */
 503     Y                = vftab[ntab+4];
 504     F                = vftab[ntab+5];
 505     Geps             = eps*vftab[ntab+6];
 506     Heps2            = eps2*vftab[ntab+7];
 507     Fp               = F+Geps+Heps2;
 508     VVd              = Y+eps*Fp;
 509     FFd              = Fp+Geps+2.0*Heps2;
 510     /* Repulsion */
 511     Y                = vftab[ntab+8];
 512     F                = vftab[ntab+9];
 513     Geps             = eps*vftab[ntab+10];
 514     Heps2            = eps2*vftab[ntab+11];
 515     Fp               = F+Geps+Heps2;
 516     VVr              = Y+eps*Fp;
 517     FFr              = Fp+Geps+2.0*Heps2;
 518
 519     *velec           = qq*VVe;
 520     *vvdw            = c6*VVd+c12*VVr;
 521
 522     fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
 523
 524     return fscal;
 525 }
 526
 527
 528 real
 529 do_nonbonded_listed(int ftype, int nbonds,
 530                     const t_iatom iatoms[], const t_iparams iparams[],
 531                     const rvec x[], rvec f[], rvec fshift[],
 532                     const t_pbc *pbc, const t_graph *g,
 533                     real *lambda, real *dvdl,
 534                     const t_mdatoms *md,
 535                     const t_forcerec *fr, gmx_grppairener_t *grppener,
 536                     int *global_atom_index)
 537 {
 538     int              ielec, ivdw;
 539     real             qq, c6, c12;
 540     rvec             dx;
 541     ivec             dt;
 542     int              i, j, itype, ai, aj, gid;
 543     int              fshift_index;
 544     real             r2, rinv;
 545     real             fscal, velec, vvdw;
 546     real *           energygrp_elec;
 547     real *           energygrp_vdw;
 548     static gmx_bool  warned_rlimit = FALSE;
 549     /* Free energy stuff */
 550     gmx_bool         bFreeEnergy;
 551     real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
 552     real             qqB, c6B, c12B, sigma2_def, sigma2_min;
 553
 554
 555     switch (ftype)
 556     {
 557         case F_LJ14:
 558         case F_LJC14_Q:
 559             energygrp_elec = grppener->ener[egCOUL14];
 560             energygrp_vdw  = grppener->ener[egLJ14];
 561             break;
 562         case F_LJC_PAIRS_NB:
 563             energygrp_elec = grppener->ener[egCOULSR];
 564             energygrp_vdw  = grppener->ener[egLJSR];
 565             break;
 566         default:
 567             energygrp_elec = NULL; /* Keep compiler happy */
 568             energygrp_vdw  = NULL; /* Keep compiler happy */
 569             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
 570             break;
 571     }
 572
 573     if (fr->efep != efepNO)
 574     {
 575         /* Lambda factor for state A=1-lambda and B=lambda */
 576         LFC[0] = 1.0 - lambda[efptCOUL];
 577         LFV[0] = 1.0 - lambda[efptVDW];
 578         LFC[1] = lambda[efptCOUL];
 579         LFV[1] = lambda[efptVDW];
 580
 581         /*derivative of the lambda factor for state A and B */
 582         DLF[0] = -1;
 583         DLF[1] = 1;
 584
 585         /* precalculate */
 586         sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
 587         sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
 588
 589         for (i = 0; i < 2; i++)
 590         {
 591             lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
 592             dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
 593             lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
 594             dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
 595         }
 596     }
 597     else
 598     {
 599         sigma2_min = sigma2_def = 0;
 600     }
 601
 602     bFreeEnergy = FALSE;
 603     for (i = 0; (i < nbonds); )
 604     {
 605         itype = iatoms[i++];
 606         ai    = iatoms[i++];
 607         aj    = iatoms[i++];
 608         gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
 609
 610         /* Get parameters */
 611         switch (ftype)
 612         {
 613             case F_LJ14:
 614                 bFreeEnergy =
 615                     (fr->efep != efepNO &&
 616                      ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
 617                       iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
 618                       iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
 619                 qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
 620                 c6               = iparams[itype].lj14.c6A;
 621                 c12              = iparams[itype].lj14.c12A;
 622                 break;
 623             case F_LJC14_Q:
 624                 qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
 625                 c6               = iparams[itype].ljc14.c6;
 626                 c12              = iparams[itype].ljc14.c12;
 627                 break;
 628             case F_LJC_PAIRS_NB:
 629                 qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
 630                 c6               = iparams[itype].ljcnb.c6;
 631                 c12              = iparams[itype].ljcnb.c12;
 632                 break;
 633             default:
 634                 /* Cannot happen since we called gmx_fatal() above in this case */
 635                 qq = c6 = c12 = 0; /* Keep compiler happy */
 636                 break;
 637         }
 638
 639         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
 640          * included in the general nfbp array now. This means the tables are scaled down by the
 641          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
 642          * be scaled up.
 643          */
 644         c6  *= 6.0;
 645         c12 *= 12.0;
 646
 647         /* Do we need to apply full periodic boundary conditions? */
 648         if (fr->bMolPBC == TRUE)
 649         {
 650             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
 651         }
 652         else
 653         {
 654             fshift_index = CENTRAL;
 655             rvec_sub(x[ai], x[aj], dx);
 656         }
 657         r2           = norm2(dx);
 658
 659         if (r2 >= fr->tab14.r*fr->tab14.r)
 660         {
 661             /* This check isn't race free. But it doesn't matter because if a race occurs the only
 662              * disadvantage is that the warning is printed twice */
 663             if (warned_rlimit == FALSE)
 664             {
 665                 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
 666                 warned_rlimit = TRUE;
 667             }
 668             continue;
 669         }
 670
 671         if (bFreeEnergy)
 672         {
 673             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
 674             qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
 675             c6B              = iparams[itype].lj14.c6B*6.0;
 676             c12B             = iparams[itype].lj14.c12B*12.0;
 677
 678             fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
 679                                                               fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
 680                                                               LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
 681                                                               fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
 682         }
 683         else
 684         {
 685             /* Evaluate tabulated interaction without free energy */
 686             fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
 687         }
 688
 689         energygrp_elec[gid]  += velec;
 690         energygrp_vdw[gid]   += vvdw;
 691         svmul(fscal, dx, dx);
 692
 693         /* Add the forces */
 694         rvec_inc(f[ai], dx);
 695         rvec_dec(f[aj], dx);
 696
 697         if (g)
 698         {
 699             /* Correct the shift forces using the graph */
 700             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
 701             fshift_index = IVEC2IS(dt);
 702         }
 703         if (fshift_index != CENTRAL)
 704         {
 705             rvec_inc(fshift[fshift_index], dx);
 706             rvec_dec(fshift[CENTRAL], dx);
 707         }
 708     }
 709     return 0.0;
 710 }