Fujitsu Sparc64 acceleration and general fixes for non-x86 builds
[alexxy/gromacs.git] / src / gmxlib / nonbonded / nonbonded.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team,
6  * check out http://www.gromacs.org for more information.
7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9  * others, as listed in the AUTHORS file in the top-level source
10  * directory and at http://www.gromacs.org.
11  *
12  * GROMACS is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public License
14  * as published by the Free Software Foundation; either version 2.1
15  * of the License, or (at your option) any later version.
16  *
17  * GROMACS is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with GROMACS; if not, see
24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
26  *
27  * If you want to redistribute modifications to GROMACS, please
28  * consider that scientific software is very special. Version
29  * control is crucial - bugs must be traceable. We will be happy to
30  * consider code for inclusion in the official distribution, but
31  * derived work must not be called official GROMACS. Details are found
32  * in the README & COPYING files - if they are missing, get the
33  * official version at http://www.gromacs.org.
34  *
35  * To help us fund GROMACS development, we humbly ask that you cite
36  * the research papers on the package. Check out http://www.gromacs.org.
37  */
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41
42 #ifdef GMX_THREAD_MPI
43 #include <thread_mpi.h>
44 #endif
45
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include "typedefs.h"
49 #include "txtdump.h"
50 #include "smalloc.h"
51 #include "ns.h"
52 #include "vec.h"
53 #include "maths.h"
54 #include "macros.h"
55 #include "string2.h"
56 #include "force.h"
57 #include "names.h"
58 #include "main.h"
59 #include "xvgr.h"
60 #include "gmx_fatal.h"
61 #include "physics.h"
62 #include "force.h"
63 #include "bondf.h"
64 #include "nrnb.h"
65 #include "smalloc.h"
66 #include "nonbonded.h"
67
68 #include "nb_kernel.h"
69 #include "nb_free_energy.h"
70 #include "nb_generic.h"
71 #include "nb_generic_cg.h"
72 #include "nb_generic_adress.h"
73
74 /* Different default (c) and accelerated interaction-specific kernels */
75 #include "nb_kernel_c/nb_kernel_c.h"
76
77 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
78 #    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
79 #endif
80 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
81 #    include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
82 #endif
83 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
84 #    include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
85 #endif
86 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
87 #    include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
88 #endif
89 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
90 #    include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
91 #endif
92 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
93 #    include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
94 #endif
95 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
96 #    include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
97 #endif
98 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
99 #    include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
100 #endif
101 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
102 #    include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
103 #endif
104
105
106 #ifdef GMX_THREAD_MPI
107 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
108 #endif
109 static gmx_bool            nonbonded_setup_done  = FALSE;
110
111
112 void
113 gmx_nonbonded_setup(FILE *         fplog,
114                     t_forcerec *   fr,
115                     gmx_bool       bGenericKernelOnly)
116 {
117 #ifdef GMX_THREAD_MPI
118     tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
119 #endif
120     /* Here we are guaranteed only one thread made it. */
121     if (nonbonded_setup_done == FALSE)
122     {
123         if (bGenericKernelOnly == FALSE)
124         {
125             /* Add the generic kernels to the structure stored statically in nb_kernel.c */
126             nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
127
128             if (!(fr != NULL && fr->use_cpu_acceleration == FALSE))
129             {
130                 /* Add interaction-specific kernels for different architectures */
131                 /* Single precision */
132 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
133                 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
134 #endif
135 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
136                 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
137 #endif
138 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
139                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
140 #endif
141 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
142                 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
143 #endif
144                 /* Double precision */
145 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
146                 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
147 #endif
148 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
149                 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
150 #endif
151 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
152                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
153 #endif
154 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
155                 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
156 #endif
157 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
158                 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double,kernellist_sparc64_hpc_ace_double_size);
159 #endif
160                 ; /* empty statement to avoid a completely empty block */
161             }
162         }
163         /* Create a hash for faster lookups */
164         nb_kernel_list_hash_init();
165
166         nonbonded_setup_done = TRUE;
167     }
168 #ifdef GMX_THREAD_MPI
169     tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
170 #endif
171 }
172
173
174
175 void
176 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
177 {
178     const char *     elec;
179     const char *     elec_mod;
180     const char *     vdw;
181     const char *     vdw_mod;
182     const char *     geom;
183     const char *     other;
184     const char *     vf;
185
186     struct
187     {
188         const char *  arch;
189         int           simd_padding_width;
190     }
191     arch_and_padding[] =
192     {
193         /* Single precision */
194 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
195         { "avx_256_single", 8 },
196 #endif
197 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
198         { "avx_128_fma_single", 4 },
199 #endif
200 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
201         { "sse4_1_single", 4 },
202 #endif
203 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
204         { "sse2_single", 4 },
205 #endif
206         /* Double precision */
207 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
208         { "avx_256_double", 4 },
209 #endif
210 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
211         /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
212          * since the kernels execute a loop unrolled a factor 2, followed by
213          * a possible single odd-element epilogue.
214          */
215         { "avx_128_fma_double", 1 },
216 #endif
217 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
218         /* No padding - see comment above */
219         { "sse2_double", 1 },
220 #endif
221 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
222         /* No padding - see comment above */
223         { "sse4_1_double", 1 },
224 #endif
225 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
226         /* No padding - see comment above */
227         { "sparc64_hpc_ace_double", 1 },
228 #endif
229         { "c", 1 },
230     };
231     int              narch = asize(arch_and_padding);
232     int              i;
233
234     if (nonbonded_setup_done == FALSE)
235     {
236         /* We typically call this setup routine before starting timers,
237          * but if that has not been done for whatever reason we do it now.
238          */
239         gmx_nonbonded_setup(NULL, NULL, FALSE);
240     }
241
242     /* Not used yet */
243     other = "";
244
245     nl->kernelptr_vf = NULL;
246     nl->kernelptr_v  = NULL;
247     nl->kernelptr_f  = NULL;
248
249     elec     = gmx_nbkernel_elec_names[nl->ielec];
250     elec_mod = eintmod_names[nl->ielecmod];
251     vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
252     vdw_mod  = eintmod_names[nl->ivdwmod];
253     geom     = gmx_nblist_geometry_names[nl->igeometry];
254
255     if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
256     {
257         nl->kernelptr_vf       = (void *) gmx_nb_generic_adress_kernel;
258         nl->kernelptr_f        = (void *) gmx_nb_generic_adress_kernel;
259         nl->simd_padding_width = 1;
260         return;
261     }
262
263     if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
264     {
265         nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
266         nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
267         nl->simd_padding_width = 1;
268     }
269     else if (!gmx_strcasecmp_min(geom, "CG-CG"))
270     {
271         nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
272         nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
273         nl->simd_padding_width = 1;
274     }
275     else
276     {
277         /* Try to find a specific kernel first */
278
279         for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
280         {
281             nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
282             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
283         }
284         for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
285         {
286             nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
287             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
288
289             /* If there is not force-only optimized kernel, is there a potential & force one? */
290             if (nl->kernelptr_f == NULL)
291             {
292                 nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
293                 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
294             }
295         }
296
297         /* Give up, pick a generic one instead */
298         if (nl->kernelptr_vf == NULL)
299         {
300             nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
301             nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
302             nl->simd_padding_width = 1;
303             if (debug)
304             {
305                 fprintf(debug,
306                         "WARNING - Slow generic NB kernel used for neighborlist with\n"
307                         "    Elec: '%s', Modifier: '%s'\n"
308                         "    Vdw:  '%s', Modifier: '%s'\n"
309                         "    Geom: '%s', Other: '%s'\n\n",
310                         elec, elec_mod, vdw, vdw_mod, geom, other);
311             }
312         }
313     }
314
315     return;
316 }
317
318 void do_nonbonded(t_commrec *cr, t_forcerec *fr,
319                   rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
320                   gmx_grppairener_t *grppener, rvec box_size,
321                   t_nrnb *nrnb, real *lambda, real *dvdl,
322                   int nls, int eNL, int flags)
323 {
324     t_nblist *        nlist;
325     int               n, n0, n1, i, i0, i1, sz, range;
326     t_nblists *       nblists;
327     nb_kernel_data_t  kernel_data;
328     nb_kernel_t *     kernelptr = NULL;
329     rvec *            f;
330
331     kernel_data.flags                   = flags;
332     kernel_data.exclusions              = excl;
333     kernel_data.lambda                  = lambda;
334     kernel_data.dvdl                    = dvdl;
335
336     if (fr->bAllvsAll)
337     {
338         return;
339     }
340
341     if (eNL >= 0)
342     {
343         i0 = eNL;
344         i1 = i0+1;
345     }
346     else
347     {
348         i0 = 0;
349         i1 = eNL_NR;
350     }
351
352     if (nls >= 0)
353     {
354         n0 = nls;
355         n1 = nls+1;
356     }
357     else
358     {
359         n0 = 0;
360         n1 = fr->nnblists;
361     }
362
363     for (n = n0; (n < n1); n++)
364     {
365         nblists = &fr->nblists[n];
366
367         kernel_data.table_elec              = &nblists->table_elec;
368         kernel_data.table_vdw               = &nblists->table_vdw;
369         kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
370
371         for (range = 0; range < 2; range++)
372         {
373             /* Are we doing short/long-range? */
374             if (range == 0)
375             {
376                 /* Short-range */
377                 if (!(flags & GMX_NONBONDED_DO_SR))
378                 {
379                     continue;
380                 }
381                 kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
382                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
383                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
384                 nlist = nblists->nlist_sr;
385                 f                                   = f_shortrange;
386             }
387             else if (range == 1)
388             {
389                 /* Long-range */
390                 if (!(flags & GMX_NONBONDED_DO_LR))
391                 {
392                     continue;
393                 }
394                 kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
395                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
396                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
397                 nlist = nblists->nlist_lr;
398                 f                                   = f_longrange;
399             }
400
401             for (i = i0; (i < i1); i++)
402             {
403                 if (nlist[i].nri > 0)
404                 {
405                     if (flags & GMX_NONBONDED_DO_POTENTIAL)
406                     {
407                         /* Potential and force */
408                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
409                     }
410                     else
411                     {
412                         /* Force only, no potential */
413                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
414                     }
415
416                     if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
417                     {
418                         /* We don't need the non-perturbed interactions */
419                         continue;
420                     }
421                     (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
422                 }
423             }
424         }
425     }
426 }
427
428 static void
429 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
430 {
431     gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
432                 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
433                 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
434                 "a smaller molecule you are decoupling during a free energy calculation.\n"
435                 "Since interactions at distances beyond the table cannot be computed,\n"
436                 "they are skipped until they are inside the table limit again. You will\n"
437                 "only see this message once, even if it occurs for several interactions.\n\n"
438                 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
439                 "probably something wrong with your system. Only change the table-extension\n"
440                 "distance in the mdp file if you are really sure that is the reason.\n",
441                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
442
443     if (debug)
444     {
445         fprintf(debug,
446                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
447                 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
448                 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
449     }
450 }
451
452
453
454 /* This might logically belong better in the nb_generic.c module, but it is only
455  * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
456  * extra functional call for every single pair listed in the topology.
457  */
458 static real
459 nb_evaluate_single(real r2, real tabscale, real *vftab,
460                    real qq, real c6, real c12, real *velec, real *vvdw)
461 {
462     real       rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
463     int        ntab;
464
465     /* Do the tabulated interactions - first table lookup */
466     rinv             = gmx_invsqrt(r2);
467     r                = r2*rinv;
468     rtab             = r*tabscale;
469     ntab             = rtab;
470     eps              = rtab-ntab;
471     eps2             = eps*eps;
472     ntab             = 12*ntab;
473     /* Electrostatics */
474     Y                = vftab[ntab];
475     F                = vftab[ntab+1];
476     Geps             = eps*vftab[ntab+2];
477     Heps2            = eps2*vftab[ntab+3];
478     Fp               = F+Geps+Heps2;
479     VVe              = Y+eps*Fp;
480     FFe              = Fp+Geps+2.0*Heps2;
481     /* Dispersion */
482     Y                = vftab[ntab+4];
483     F                = vftab[ntab+5];
484     Geps             = eps*vftab[ntab+6];
485     Heps2            = eps2*vftab[ntab+7];
486     Fp               = F+Geps+Heps2;
487     VVd              = Y+eps*Fp;
488     FFd              = Fp+Geps+2.0*Heps2;
489     /* Repulsion */
490     Y                = vftab[ntab+8];
491     F                = vftab[ntab+9];
492     Geps             = eps*vftab[ntab+10];
493     Heps2            = eps2*vftab[ntab+11];
494     Fp               = F+Geps+Heps2;
495     VVr              = Y+eps*Fp;
496     FFr              = Fp+Geps+2.0*Heps2;
497
498     *velec           = qq*VVe;
499     *vvdw            = c6*VVd+c12*VVr;
500
501     fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
502
503     return fscal;
504 }
505
506
507 real
508 do_nonbonded_listed(int ftype, int nbonds,
509                     const t_iatom iatoms[], const t_iparams iparams[],
510                     const rvec x[], rvec f[], rvec fshift[],
511                     const t_pbc *pbc, const t_graph *g,
512                     real *lambda, real *dvdl,
513                     const t_mdatoms *md,
514                     const t_forcerec *fr, gmx_grppairener_t *grppener,
515                     int *global_atom_index)
516 {
517     int              ielec, ivdw;
518     real             qq, c6, c12;
519     rvec             dx;
520     ivec             dt;
521     int              i, j, itype, ai, aj, gid;
522     int              fshift_index;
523     real             r2, rinv;
524     real             fscal, velec, vvdw;
525     real *           energygrp_elec;
526     real *           energygrp_vdw;
527     static gmx_bool  warned_rlimit = FALSE;
528     /* Free energy stuff */
529     gmx_bool         bFreeEnergy;
530     real             LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
531     real             qqB, c6B, c12B, sigma2_def, sigma2_min;
532
533
534     switch (ftype)
535     {
536         case F_LJ14:
537         case F_LJC14_Q:
538             energygrp_elec = grppener->ener[egCOUL14];
539             energygrp_vdw  = grppener->ener[egLJ14];
540             break;
541         case F_LJC_PAIRS_NB:
542             energygrp_elec = grppener->ener[egCOULSR];
543             energygrp_vdw  = grppener->ener[egLJSR];
544             break;
545         default:
546             energygrp_elec = NULL; /* Keep compiler happy */
547             energygrp_vdw  = NULL; /* Keep compiler happy */
548             gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
549             break;
550     }
551
552     if (fr->efep != efepNO)
553     {
554         /* Lambda factor for state A=1-lambda and B=lambda */
555         LFC[0] = 1.0 - lambda[efptCOUL];
556         LFV[0] = 1.0 - lambda[efptVDW];
557         LFC[1] = lambda[efptCOUL];
558         LFV[1] = lambda[efptVDW];
559
560         /*derivative of the lambda factor for state A and B */
561         DLF[0] = -1;
562         DLF[1] = 1;
563
564         /* precalculate */
565         sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
566         sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
567
568         for (i = 0; i < 2; i++)
569         {
570             lfac_coul[i]  = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
571             dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
572             lfac_vdw[i]   = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
573             dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
574         }
575     }
576     else
577     {
578         sigma2_min = sigma2_def = 0;
579     }
580
581     bFreeEnergy = FALSE;
582     for (i = 0; (i < nbonds); )
583     {
584         itype = iatoms[i++];
585         ai    = iatoms[i++];
586         aj    = iatoms[i++];
587         gid   = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
588
589         /* Get parameters */
590         switch (ftype)
591         {
592             case F_LJ14:
593                 bFreeEnergy =
594                     (fr->efep != efepNO &&
595                      ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
596                       iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
597                       iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
598                 qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
599                 c6               = iparams[itype].lj14.c6A;
600                 c12              = iparams[itype].lj14.c12A;
601                 break;
602             case F_LJC14_Q:
603                 qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
604                 c6               = iparams[itype].ljc14.c6;
605                 c12              = iparams[itype].ljc14.c12;
606                 break;
607             case F_LJC_PAIRS_NB:
608                 qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
609                 c6               = iparams[itype].ljcnb.c6;
610                 c12              = iparams[itype].ljcnb.c12;
611                 break;
612             default:
613                 /* Cannot happen since we called gmx_fatal() above in this case */
614                 qq = c6 = c12 = 0; /* Keep compiler happy */
615                 break;
616         }
617
618         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
619          * included in the general nfbp array now. This means the tables are scaled down by the
620          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
621          * be scaled up.
622          */
623         c6  *= 6.0;
624         c12 *= 12.0;
625
626         /* Do we need to apply full periodic boundary conditions? */
627         if (fr->bMolPBC == TRUE)
628         {
629             fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
630         }
631         else
632         {
633             fshift_index = CENTRAL;
634             rvec_sub(x[ai], x[aj], dx);
635         }
636         r2           = norm2(dx);
637
638         if (r2 >= fr->tab14.r*fr->tab14.r)
639         {
640             if (warned_rlimit == FALSE)
641             {
642                 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
643                 warned_rlimit = TRUE;
644             }
645             continue;
646         }
647
648         if (bFreeEnergy)
649         {
650             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
651             qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
652             c6B              = iparams[itype].lj14.c6B*6.0;
653             c12B             = iparams[itype].lj14.c12B*12.0;
654
655             fscal            = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
656                                                               fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
657                                                               LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
658                                                               fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
659         }
660         else
661         {
662             /* Evaluate tabulated interaction without free energy */
663             fscal            = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
664         }
665
666         energygrp_elec[gid]  += velec;
667         energygrp_vdw[gid]   += vvdw;
668         svmul(fscal, dx, dx);
669
670         /* Add the forces */
671         rvec_inc(f[ai], dx);
672         rvec_dec(f[aj], dx);
673
674         if (g)
675         {
676             /* Correct the shift forces using the graph */
677             ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
678             fshift_index = IVEC2IS(dt);
679         }
680         if (fshift_index != CENTRAL)
681         {
682             rvec_inc(fshift[fshift_index], dx);
683             rvec_dec(fshift[CENTRAL], dx);
684         }
685     }
686     return 0.0;
687 }