2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "thread_mpi/threads.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/math/utilities.h"
57 #include "nonbonded.h"
58 #include "gromacs/simd/simd.h"
60 #include "gromacs/pbcutil/ishift.h"
61 #include "gromacs/pbcutil/mshift.h"
62 #include "gromacs/pbcutil/pbc.h"
63 #include "gromacs/utility/cstringutil.h"
64 #include "gromacs/utility/fatalerror.h"
65 #include "gromacs/utility/smalloc.h"
67 #include "nb_kernel.h"
68 #include "nb_free_energy.h"
69 #include "nb_generic.h"
70 #include "nb_generic_cg.h"
71 #include "nb_generic_adress.h"
73 /* Different default (c) and SIMD instructions interaction-specific kernels */
74 #include "nb_kernel_c/nb_kernel_c.h"
76 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
77 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
79 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
80 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
82 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
83 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
85 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
86 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
88 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
89 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
91 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
92 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
94 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
95 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
97 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
98 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
100 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
101 # include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
105 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
106 static gmx_bool nonbonded_setup_done = FALSE;
110 gmx_nonbonded_setup(t_forcerec * fr,
111 gmx_bool bGenericKernelOnly)
113 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
114 /* Here we are guaranteed only one thread made it. */
115 if (nonbonded_setup_done == FALSE)
117 if (bGenericKernelOnly == FALSE)
119 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
120 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
122 if (!(fr != NULL && fr->use_simd_kernels == FALSE))
124 /* Add interaction-specific kernels for different architectures */
125 /* Single precision */
126 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
127 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
129 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
130 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
132 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
133 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
135 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
136 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
138 /* Double precision */
139 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
140 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
142 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
143 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
145 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
146 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
148 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
149 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
151 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
152 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
154 ; /* empty statement to avoid a completely empty block */
157 /* Create a hash for faster lookups */
158 nb_kernel_list_hash_init();
160 nonbonded_setup_done = TRUE;
162 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
168 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
171 const char * elec_mod;
173 const char * vdw_mod;
181 int simd_padding_width;
185 /* Single precision */
186 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
187 { "avx_256_single", 8 },
189 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
190 { "avx_128_fma_single", 4 },
192 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
193 { "sse4_1_single", 4 },
195 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
196 { "sse2_single", 4 },
198 /* Double precision */
199 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
200 { "avx_256_double", 4 },
202 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
203 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
204 * since the kernels execute a loop unrolled a factor 2, followed by
205 * a possible single odd-element epilogue.
207 { "avx_128_fma_double", 1 },
209 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
210 /* No padding - see comment above */
211 { "sse2_double", 1 },
213 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
214 /* No padding - see comment above */
215 { "sse4_1_double", 1 },
217 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
218 /* No padding - see comment above */
219 { "sparc64_hpc_ace_double", 1 },
223 int narch = asize(arch_and_padding);
226 if (nonbonded_setup_done == FALSE)
228 /* We typically call this setup routine before starting timers,
229 * but if that has not been done for whatever reason we do it now.
231 gmx_nonbonded_setup(NULL, FALSE);
237 nl->kernelptr_vf = NULL;
238 nl->kernelptr_v = NULL;
239 nl->kernelptr_f = NULL;
241 elec = gmx_nbkernel_elec_names[nl->ielec];
242 elec_mod = eintmod_names[nl->ielecmod];
243 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
244 vdw_mod = eintmod_names[nl->ivdwmod];
245 geom = gmx_nblist_geometry_names[nl->igeometry];
247 if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
249 nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
250 nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
251 nl->simd_padding_width = 1;
255 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
257 nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
258 nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
259 nl->simd_padding_width = 1;
261 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
263 nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
264 nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
265 nl->simd_padding_width = 1;
269 /* Try to find a specific kernel first */
271 for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
273 nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
274 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
276 for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
278 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
279 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
281 /* If there is not force-only optimized kernel, is there a potential & force one? */
282 if (nl->kernelptr_f == NULL)
284 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
285 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
289 /* Give up. If this was a water kernel, leave the pointer as NULL, which
290 * will disable water optimization in NS. If it is a particle kernel, set
291 * the pointer to the generic NB kernel.
293 if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
295 nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
296 nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
297 nl->simd_padding_width = 1;
301 "WARNING - Slow generic NB kernel used for neighborlist with\n"
302 " Elec: '%s', Modifier: '%s'\n"
303 " Vdw: '%s', Modifier: '%s'\n"
304 " Geom: '%s', Other: '%s'\n\n",
305 elec, elec_mod, vdw, vdw_mod, geom, other);
313 void do_nonbonded(t_forcerec *fr,
314 rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
315 gmx_grppairener_t *grppener,
316 t_nrnb *nrnb, real *lambda, real *dvdl,
317 int nls, int eNL, int flags)
320 int n, n0, n1, i, i0, i1, sz, range;
322 nb_kernel_data_t kernel_data;
323 nb_kernel_t * kernelptr = NULL;
326 kernel_data.flags = flags;
327 kernel_data.exclusions = excl;
328 kernel_data.lambda = lambda;
329 kernel_data.dvdl = dvdl;
333 gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
359 for (n = n0; (n < n1); n++)
361 nblists = &fr->nblists[n];
363 kernel_data.table_elec = &nblists->table_elec;
364 kernel_data.table_vdw = &nblists->table_vdw;
365 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
367 for (range = 0; range < 2; range++)
369 /* Are we doing short/long-range? */
373 if (!(flags & GMX_NONBONDED_DO_SR))
377 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
378 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
379 kernel_data.energygrp_polarization = grppener->ener[egGB];
380 nlist = nblists->nlist_sr;
386 if (!(flags & GMX_NONBONDED_DO_LR))
390 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
391 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
392 kernel_data.energygrp_polarization = grppener->ener[egGB];
393 nlist = nblists->nlist_lr;
397 for (i = i0; (i < i1); i++)
399 if (nlist[i].nri > 0)
401 if (flags & GMX_NONBONDED_DO_POTENTIAL)
403 /* Potential and force */
404 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
408 /* Force only, no potential */
409 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
412 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
414 /* We don't need the non-perturbed interactions */
417 /* Neighborlists whose kernelptr==NULL will always be empty */
418 if (kernelptr != NULL)
420 (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
429 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
431 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
432 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
433 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
434 "a smaller molecule you are decoupling during a free energy calculation.\n"
435 "Since interactions at distances beyond the table cannot be computed,\n"
436 "they are skipped until they are inside the table limit again. You will\n"
437 "only see this message once, even if it occurs for several interactions.\n\n"
438 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
439 "probably something wrong with your system. Only change the table-extension\n"
440 "distance in the mdp file if you are really sure that is the reason.\n",
441 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
446 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
447 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
448 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
454 /* This might logically belong better in the nb_generic.c module, but it is only
455 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
456 * extra functional call for every single pair listed in the topology.
459 nb_evaluate_single(real r2, real tabscale, real *vftab,
460 real qq, real c6, real c12, real *velec, real *vvdw)
462 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
465 /* Do the tabulated interactions - first table lookup */
466 rinv = gmx_invsqrt(r2);
476 Geps = eps*vftab[ntab+2];
477 Heps2 = eps2*vftab[ntab+3];
480 FFe = Fp+Geps+2.0*Heps2;
484 Geps = eps*vftab[ntab+6];
485 Heps2 = eps2*vftab[ntab+7];
488 FFd = Fp+Geps+2.0*Heps2;
492 Geps = eps*vftab[ntab+10];
493 Heps2 = eps2*vftab[ntab+11];
496 FFr = Fp+Geps+2.0*Heps2;
499 *vvdw = c6*VVd+c12*VVr;
501 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
508 do_nonbonded_listed(int ftype, int nbonds,
509 const t_iatom iatoms[], const t_iparams iparams[],
510 const rvec x[], rvec f[], rvec fshift[],
511 const t_pbc *pbc, const t_graph *g,
512 real *lambda, real *dvdl,
514 const t_forcerec *fr, gmx_grppairener_t *grppener,
515 int *global_atom_index)
521 int i, j, itype, ai, aj, gid;
524 real fscal, velec, vvdw;
525 real * energygrp_elec;
526 real * energygrp_vdw;
527 static gmx_bool warned_rlimit = FALSE;
528 /* Free energy stuff */
529 gmx_bool bFreeEnergy;
530 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
531 real qqB, c6B, c12B, sigma2_def, sigma2_min;
538 energygrp_elec = grppener->ener[egCOUL14];
539 energygrp_vdw = grppener->ener[egLJ14];
542 energygrp_elec = grppener->ener[egCOULSR];
543 energygrp_vdw = grppener->ener[egLJSR];
546 energygrp_elec = NULL; /* Keep compiler happy */
547 energygrp_vdw = NULL; /* Keep compiler happy */
548 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
552 if (fr->efep != efepNO)
554 /* Lambda factor for state A=1-lambda and B=lambda */
555 LFC[0] = 1.0 - lambda[efptCOUL];
556 LFV[0] = 1.0 - lambda[efptVDW];
557 LFC[1] = lambda[efptCOUL];
558 LFV[1] = lambda[efptVDW];
560 /*derivative of the lambda factor for state A and B */
565 sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
566 sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
568 for (i = 0; i < 2; i++)
570 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
571 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
572 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
573 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
578 sigma2_min = sigma2_def = 0;
582 for (i = 0; (i < nbonds); )
587 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
594 (fr->efep != efepNO &&
595 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
596 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
597 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
598 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
599 c6 = iparams[itype].lj14.c6A;
600 c12 = iparams[itype].lj14.c12A;
603 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
604 c6 = iparams[itype].ljc14.c6;
605 c12 = iparams[itype].ljc14.c12;
608 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
609 c6 = iparams[itype].ljcnb.c6;
610 c12 = iparams[itype].ljcnb.c12;
613 /* Cannot happen since we called gmx_fatal() above in this case */
614 qq = c6 = c12 = 0; /* Keep compiler happy */
618 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
619 * included in the general nfbp array now. This means the tables are scaled down by the
620 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
626 /* Do we need to apply full periodic boundary conditions? */
627 if (fr->bMolPBC == TRUE)
629 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
633 fshift_index = CENTRAL;
634 rvec_sub(x[ai], x[aj], dx);
638 if (r2 >= fr->tab14.r*fr->tab14.r)
640 if (warned_rlimit == FALSE)
642 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
643 warned_rlimit = TRUE;
650 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
651 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
652 c6B = iparams[itype].lj14.c6B*6.0;
653 c12B = iparams[itype].lj14.c12B*12.0;
655 fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
656 fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
657 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
658 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
662 /* Evaluate tabulated interaction without free energy */
663 fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
666 energygrp_elec[gid] += velec;
667 energygrp_vdw[gid] += vvdw;
668 svmul(fscal, dx, dx);
676 /* Correct the shift forces using the graph */
677 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
678 fshift_index = IVEC2IS(dt);
680 if (fshift_index != CENTRAL)
682 rvec_inc(fshift[fshift_index], dx);
683 rvec_dec(fshift[CENTRAL], dx);