2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "thread_mpi/threads.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/math/utilities.h"
52 #include "gromacs/utility/cstringutil.h"
58 #include "nonbonded.h"
59 #include "gromacs/simd/simd.h"
61 #include "gromacs/pbcutil/mshift.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/smalloc.h"
66 #include "nb_kernel.h"
67 #include "nb_free_energy.h"
68 #include "nb_generic.h"
69 #include "nb_generic_cg.h"
70 #include "nb_generic_adress.h"
72 /* Different default (c) and SIMD instructions interaction-specific kernels */
73 #include "nb_kernel_c/nb_kernel_c.h"
75 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
76 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
78 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
79 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
81 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
82 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
84 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
85 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
87 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
88 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
90 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
91 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
93 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
94 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
96 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
97 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
99 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
100 # include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
104 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
105 static gmx_bool nonbonded_setup_done = FALSE;
109 gmx_nonbonded_setup(t_forcerec * fr,
110 gmx_bool bGenericKernelOnly)
112 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
113 /* Here we are guaranteed only one thread made it. */
114 if (nonbonded_setup_done == FALSE)
116 if (bGenericKernelOnly == FALSE)
118 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
119 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
121 if (!(fr != NULL && fr->use_simd_kernels == FALSE))
123 /* Add interaction-specific kernels for different architectures */
124 /* Single precision */
125 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
126 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
128 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
129 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
131 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
132 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
134 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
135 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
137 /* Double precision */
138 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
139 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
141 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
142 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
144 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
145 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
147 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
148 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
150 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
151 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
153 ; /* empty statement to avoid a completely empty block */
156 /* Create a hash for faster lookups */
157 nb_kernel_list_hash_init();
159 nonbonded_setup_done = TRUE;
161 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
167 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
170 const char * elec_mod;
172 const char * vdw_mod;
180 int simd_padding_width;
184 /* Single precision */
185 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
186 { "avx_256_single", 8 },
188 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
189 { "avx_128_fma_single", 4 },
191 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
192 { "sse4_1_single", 4 },
194 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
195 { "sse2_single", 4 },
197 /* Double precision */
198 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
199 { "avx_256_double", 4 },
201 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
202 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
203 * since the kernels execute a loop unrolled a factor 2, followed by
204 * a possible single odd-element epilogue.
206 { "avx_128_fma_double", 1 },
208 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
209 /* No padding - see comment above */
210 { "sse2_double", 1 },
212 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
213 /* No padding - see comment above */
214 { "sse4_1_double", 1 },
216 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
217 /* No padding - see comment above */
218 { "sparc64_hpc_ace_double", 1 },
222 int narch = asize(arch_and_padding);
225 if (nonbonded_setup_done == FALSE)
227 /* We typically call this setup routine before starting timers,
228 * but if that has not been done for whatever reason we do it now.
230 gmx_nonbonded_setup(NULL, FALSE);
236 nl->kernelptr_vf = NULL;
237 nl->kernelptr_v = NULL;
238 nl->kernelptr_f = NULL;
240 elec = gmx_nbkernel_elec_names[nl->ielec];
241 elec_mod = eintmod_names[nl->ielecmod];
242 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
243 vdw_mod = eintmod_names[nl->ivdwmod];
244 geom = gmx_nblist_geometry_names[nl->igeometry];
246 if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
248 nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
249 nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
250 nl->simd_padding_width = 1;
254 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
256 nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
257 nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
258 nl->simd_padding_width = 1;
260 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
262 nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
263 nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
264 nl->simd_padding_width = 1;
268 /* Try to find a specific kernel first */
270 for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
272 nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
273 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
275 for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
277 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
278 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
280 /* If there is not force-only optimized kernel, is there a potential & force one? */
281 if (nl->kernelptr_f == NULL)
283 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
284 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
288 /* Give up. If this was a water kernel, leave the pointer as NULL, which
289 * will disable water optimization in NS. If it is a particle kernel, set
290 * the pointer to the generic NB kernel.
292 if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
294 nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
295 nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
296 nl->simd_padding_width = 1;
300 "WARNING - Slow generic NB kernel used for neighborlist with\n"
301 " Elec: '%s', Modifier: '%s'\n"
302 " Vdw: '%s', Modifier: '%s'\n"
303 " Geom: '%s', Other: '%s'\n\n",
304 elec, elec_mod, vdw, vdw_mod, geom, other);
312 void do_nonbonded(t_forcerec *fr,
313 rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
314 gmx_grppairener_t *grppener,
315 t_nrnb *nrnb, real *lambda, real *dvdl,
316 int nls, int eNL, int flags)
319 int n, n0, n1, i, i0, i1, sz, range;
321 nb_kernel_data_t kernel_data;
322 nb_kernel_t * kernelptr = NULL;
325 kernel_data.flags = flags;
326 kernel_data.exclusions = excl;
327 kernel_data.lambda = lambda;
328 kernel_data.dvdl = dvdl;
332 gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
358 for (n = n0; (n < n1); n++)
360 nblists = &fr->nblists[n];
362 kernel_data.table_elec = &nblists->table_elec;
363 kernel_data.table_vdw = &nblists->table_vdw;
364 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
366 for (range = 0; range < 2; range++)
368 /* Are we doing short/long-range? */
372 if (!(flags & GMX_NONBONDED_DO_SR))
376 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
377 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
378 kernel_data.energygrp_polarization = grppener->ener[egGB];
379 nlist = nblists->nlist_sr;
385 if (!(flags & GMX_NONBONDED_DO_LR))
389 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
390 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
391 kernel_data.energygrp_polarization = grppener->ener[egGB];
392 nlist = nblists->nlist_lr;
396 for (i = i0; (i < i1); i++)
398 if (nlist[i].nri > 0)
400 if (flags & GMX_NONBONDED_DO_POTENTIAL)
402 /* Potential and force */
403 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
407 /* Force only, no potential */
408 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
411 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
413 /* We don't need the non-perturbed interactions */
416 /* Neighborlists whose kernelptr==NULL will always be empty */
417 if (kernelptr != NULL)
419 (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
428 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
430 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
431 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
432 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
433 "a smaller molecule you are decoupling during a free energy calculation.\n"
434 "Since interactions at distances beyond the table cannot be computed,\n"
435 "they are skipped until they are inside the table limit again. You will\n"
436 "only see this message once, even if it occurs for several interactions.\n\n"
437 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
438 "probably something wrong with your system. Only change the table-extension\n"
439 "distance in the mdp file if you are really sure that is the reason.\n",
440 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
445 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
446 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
447 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
453 /* This might logically belong better in the nb_generic.c module, but it is only
454 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
455 * extra functional call for every single pair listed in the topology.
458 nb_evaluate_single(real r2, real tabscale, real *vftab,
459 real qq, real c6, real c12, real *velec, real *vvdw)
461 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
464 /* Do the tabulated interactions - first table lookup */
465 rinv = gmx_invsqrt(r2);
475 Geps = eps*vftab[ntab+2];
476 Heps2 = eps2*vftab[ntab+3];
479 FFe = Fp+Geps+2.0*Heps2;
483 Geps = eps*vftab[ntab+6];
484 Heps2 = eps2*vftab[ntab+7];
487 FFd = Fp+Geps+2.0*Heps2;
491 Geps = eps*vftab[ntab+10];
492 Heps2 = eps2*vftab[ntab+11];
495 FFr = Fp+Geps+2.0*Heps2;
498 *vvdw = c6*VVd+c12*VVr;
500 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
507 do_nonbonded_listed(int ftype, int nbonds,
508 const t_iatom iatoms[], const t_iparams iparams[],
509 const rvec x[], rvec f[], rvec fshift[],
510 const t_pbc *pbc, const t_graph *g,
511 real *lambda, real *dvdl,
513 const t_forcerec *fr, gmx_grppairener_t *grppener,
514 int *global_atom_index)
520 int i, j, itype, ai, aj, gid;
523 real fscal, velec, vvdw;
524 real * energygrp_elec;
525 real * energygrp_vdw;
526 static gmx_bool warned_rlimit = FALSE;
527 /* Free energy stuff */
528 gmx_bool bFreeEnergy;
529 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
530 real qqB, c6B, c12B, sigma2_def, sigma2_min;
537 energygrp_elec = grppener->ener[egCOUL14];
538 energygrp_vdw = grppener->ener[egLJ14];
541 energygrp_elec = grppener->ener[egCOULSR];
542 energygrp_vdw = grppener->ener[egLJSR];
545 energygrp_elec = NULL; /* Keep compiler happy */
546 energygrp_vdw = NULL; /* Keep compiler happy */
547 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
551 if (fr->efep != efepNO)
553 /* Lambda factor for state A=1-lambda and B=lambda */
554 LFC[0] = 1.0 - lambda[efptCOUL];
555 LFV[0] = 1.0 - lambda[efptVDW];
556 LFC[1] = lambda[efptCOUL];
557 LFV[1] = lambda[efptVDW];
559 /*derivative of the lambda factor for state A and B */
564 sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
565 sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
567 for (i = 0; i < 2; i++)
569 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
570 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
571 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
572 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
577 sigma2_min = sigma2_def = 0;
581 for (i = 0; (i < nbonds); )
586 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
593 (fr->efep != efepNO &&
594 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
595 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
596 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
597 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
598 c6 = iparams[itype].lj14.c6A;
599 c12 = iparams[itype].lj14.c12A;
602 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
603 c6 = iparams[itype].ljc14.c6;
604 c12 = iparams[itype].ljc14.c12;
607 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
608 c6 = iparams[itype].ljcnb.c6;
609 c12 = iparams[itype].ljcnb.c12;
612 /* Cannot happen since we called gmx_fatal() above in this case */
613 qq = c6 = c12 = 0; /* Keep compiler happy */
617 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
618 * included in the general nfbp array now. This means the tables are scaled down by the
619 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
625 /* Do we need to apply full periodic boundary conditions? */
626 if (fr->bMolPBC == TRUE)
628 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
632 fshift_index = CENTRAL;
633 rvec_sub(x[ai], x[aj], dx);
637 if (r2 >= fr->tab14.r*fr->tab14.r)
639 if (warned_rlimit == FALSE)
641 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
642 warned_rlimit = TRUE;
649 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
650 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
651 c6B = iparams[itype].lj14.c6B*6.0;
652 c12B = iparams[itype].lj14.c12B*12.0;
654 fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
655 fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
656 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
657 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
661 /* Evaluate tabulated interaction without free energy */
662 fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
665 energygrp_elec[gid] += velec;
666 energygrp_vdw[gid] += vvdw;
667 svmul(fscal, dx, dx);
675 /* Correct the shift forces using the graph */
676 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
677 fshift_index = IVEC2IS(dt);
679 if (fshift_index != CENTRAL)
681 rvec_inc(fshift[fshift_index], dx);
682 rvec_dec(fshift[CENTRAL], dx);