2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "thread_mpi/threads.h"
48 #include "gromacs/utility/smalloc.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/math/utilities.h"
53 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/fatalerror.h"
60 #include "nonbonded.h"
61 #include "gromacs/simd/simd.h"
63 #include "nb_kernel.h"
64 #include "nb_free_energy.h"
65 #include "nb_generic.h"
66 #include "nb_generic_cg.h"
67 #include "nb_generic_adress.h"
69 /* Different default (c) and SIMD instructions interaction-specific kernels */
70 #include "nb_kernel_c/nb_kernel_c.h"
72 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
73 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
75 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
76 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
78 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
79 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
81 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
82 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
84 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
85 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
87 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
88 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
90 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
91 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
93 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
94 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
96 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
97 # include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
101 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
102 static gmx_bool nonbonded_setup_done = FALSE;
106 gmx_nonbonded_setup(t_forcerec * fr,
107 gmx_bool bGenericKernelOnly)
109 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
110 /* Here we are guaranteed only one thread made it. */
111 if (nonbonded_setup_done == FALSE)
113 if (bGenericKernelOnly == FALSE)
115 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
116 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
118 if (!(fr != NULL && fr->use_simd_kernels == FALSE))
120 /* Add interaction-specific kernels for different architectures */
121 /* Single precision */
122 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
123 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
125 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
126 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
128 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
129 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
131 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
132 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
134 /* Double precision */
135 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
136 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
138 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
139 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
141 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
142 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
144 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
145 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
147 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
148 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
150 ; /* empty statement to avoid a completely empty block */
153 /* Create a hash for faster lookups */
154 nb_kernel_list_hash_init();
156 nonbonded_setup_done = TRUE;
158 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
164 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
167 const char * elec_mod;
169 const char * vdw_mod;
177 int simd_padding_width;
181 /* Single precision */
182 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
183 { "avx_256_single", 8 },
185 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
186 { "avx_128_fma_single", 4 },
188 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
189 { "sse4_1_single", 4 },
191 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
192 { "sse2_single", 4 },
194 /* Double precision */
195 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
196 { "avx_256_double", 4 },
198 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
199 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
200 * since the kernels execute a loop unrolled a factor 2, followed by
201 * a possible single odd-element epilogue.
203 { "avx_128_fma_double", 1 },
205 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
206 /* No padding - see comment above */
207 { "sse2_double", 1 },
209 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
210 /* No padding - see comment above */
211 { "sse4_1_double", 1 },
213 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
214 /* No padding - see comment above */
215 { "sparc64_hpc_ace_double", 1 },
219 int narch = asize(arch_and_padding);
222 if (nonbonded_setup_done == FALSE)
224 /* We typically call this setup routine before starting timers,
225 * but if that has not been done for whatever reason we do it now.
227 gmx_nonbonded_setup(NULL, FALSE);
233 nl->kernelptr_vf = NULL;
234 nl->kernelptr_v = NULL;
235 nl->kernelptr_f = NULL;
237 elec = gmx_nbkernel_elec_names[nl->ielec];
238 elec_mod = eintmod_names[nl->ielecmod];
239 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
240 vdw_mod = eintmod_names[nl->ivdwmod];
241 geom = gmx_nblist_geometry_names[nl->igeometry];
243 if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
245 nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
246 nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
247 nl->simd_padding_width = 1;
251 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
253 nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
254 nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
255 nl->simd_padding_width = 1;
257 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
259 nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
260 nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
261 nl->simd_padding_width = 1;
265 /* Try to find a specific kernel first */
267 for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
269 nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
270 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
272 for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
274 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
275 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
277 /* If there is not force-only optimized kernel, is there a potential & force one? */
278 if (nl->kernelptr_f == NULL)
280 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
281 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
285 /* Give up. If this was a water kernel, leave the pointer as NULL, which
286 * will disable water optimization in NS. If it is a particle kernel, set
287 * the pointer to the generic NB kernel.
289 if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
291 nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
292 nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
293 nl->simd_padding_width = 1;
297 "WARNING - Slow generic NB kernel used for neighborlist with\n"
298 " Elec: '%s', Modifier: '%s'\n"
299 " Vdw: '%s', Modifier: '%s'\n"
300 " Geom: '%s', Other: '%s'\n\n",
301 elec, elec_mod, vdw, vdw_mod, geom, other);
309 void do_nonbonded(t_forcerec *fr,
310 rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
311 gmx_grppairener_t *grppener,
312 t_nrnb *nrnb, real *lambda, real *dvdl,
313 int nls, int eNL, int flags)
316 int n, n0, n1, i, i0, i1, sz, range;
318 nb_kernel_data_t kernel_data;
319 nb_kernel_t * kernelptr = NULL;
322 kernel_data.flags = flags;
323 kernel_data.exclusions = excl;
324 kernel_data.lambda = lambda;
325 kernel_data.dvdl = dvdl;
329 gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
355 for (n = n0; (n < n1); n++)
357 nblists = &fr->nblists[n];
359 kernel_data.table_elec = &nblists->table_elec;
360 kernel_data.table_vdw = &nblists->table_vdw;
361 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
363 for (range = 0; range < 2; range++)
365 /* Are we doing short/long-range? */
369 if (!(flags & GMX_NONBONDED_DO_SR))
373 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
374 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
375 kernel_data.energygrp_polarization = grppener->ener[egGB];
376 nlist = nblists->nlist_sr;
382 if (!(flags & GMX_NONBONDED_DO_LR))
386 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
387 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
388 kernel_data.energygrp_polarization = grppener->ener[egGB];
389 nlist = nblists->nlist_lr;
393 for (i = i0; (i < i1); i++)
395 if (nlist[i].nri > 0)
397 if (flags & GMX_NONBONDED_DO_POTENTIAL)
399 /* Potential and force */
400 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
404 /* Force only, no potential */
405 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
408 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
410 /* We don't need the non-perturbed interactions */
413 /* Neighborlists whose kernelptr==NULL will always be empty */
414 if (kernelptr != NULL)
416 (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
425 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
427 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
428 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
429 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
430 "a smaller molecule you are decoupling during a free energy calculation.\n"
431 "Since interactions at distances beyond the table cannot be computed,\n"
432 "they are skipped until they are inside the table limit again. You will\n"
433 "only see this message once, even if it occurs for several interactions.\n\n"
434 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
435 "probably something wrong with your system. Only change the table-extension\n"
436 "distance in the mdp file if you are really sure that is the reason.\n",
437 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
442 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
443 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
444 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
450 /* This might logically belong better in the nb_generic.c module, but it is only
451 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
452 * extra functional call for every single pair listed in the topology.
455 nb_evaluate_single(real r2, real tabscale, real *vftab,
456 real qq, real c6, real c12, real *velec, real *vvdw)
458 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
461 /* Do the tabulated interactions - first table lookup */
462 rinv = gmx_invsqrt(r2);
472 Geps = eps*vftab[ntab+2];
473 Heps2 = eps2*vftab[ntab+3];
476 FFe = Fp+Geps+2.0*Heps2;
480 Geps = eps*vftab[ntab+6];
481 Heps2 = eps2*vftab[ntab+7];
484 FFd = Fp+Geps+2.0*Heps2;
488 Geps = eps*vftab[ntab+10];
489 Heps2 = eps2*vftab[ntab+11];
492 FFr = Fp+Geps+2.0*Heps2;
495 *vvdw = c6*VVd+c12*VVr;
497 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
504 do_nonbonded_listed(int ftype, int nbonds,
505 const t_iatom iatoms[], const t_iparams iparams[],
506 const rvec x[], rvec f[], rvec fshift[],
507 const t_pbc *pbc, const t_graph *g,
508 real *lambda, real *dvdl,
510 const t_forcerec *fr, gmx_grppairener_t *grppener,
511 int *global_atom_index)
517 int i, j, itype, ai, aj, gid;
520 real fscal, velec, vvdw;
521 real * energygrp_elec;
522 real * energygrp_vdw;
523 static gmx_bool warned_rlimit = FALSE;
524 /* Free energy stuff */
525 gmx_bool bFreeEnergy;
526 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
527 real qqB, c6B, c12B, sigma2_def, sigma2_min;
534 energygrp_elec = grppener->ener[egCOUL14];
535 energygrp_vdw = grppener->ener[egLJ14];
538 energygrp_elec = grppener->ener[egCOULSR];
539 energygrp_vdw = grppener->ener[egLJSR];
542 energygrp_elec = NULL; /* Keep compiler happy */
543 energygrp_vdw = NULL; /* Keep compiler happy */
544 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
548 if (fr->efep != efepNO)
550 /* Lambda factor for state A=1-lambda and B=lambda */
551 LFC[0] = 1.0 - lambda[efptCOUL];
552 LFV[0] = 1.0 - lambda[efptVDW];
553 LFC[1] = lambda[efptCOUL];
554 LFV[1] = lambda[efptVDW];
556 /*derivative of the lambda factor for state A and B */
561 sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
562 sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
564 for (i = 0; i < 2; i++)
566 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
567 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
568 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
569 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
574 sigma2_min = sigma2_def = 0;
578 for (i = 0; (i < nbonds); )
583 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
590 (fr->efep != efepNO &&
591 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
592 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
593 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
594 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
595 c6 = iparams[itype].lj14.c6A;
596 c12 = iparams[itype].lj14.c12A;
599 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
600 c6 = iparams[itype].ljc14.c6;
601 c12 = iparams[itype].ljc14.c12;
604 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
605 c6 = iparams[itype].ljcnb.c6;
606 c12 = iparams[itype].ljcnb.c12;
609 /* Cannot happen since we called gmx_fatal() above in this case */
610 qq = c6 = c12 = 0; /* Keep compiler happy */
614 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
615 * included in the general nfbp array now. This means the tables are scaled down by the
616 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
622 /* Do we need to apply full periodic boundary conditions? */
623 if (fr->bMolPBC == TRUE)
625 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
629 fshift_index = CENTRAL;
630 rvec_sub(x[ai], x[aj], dx);
634 if (r2 >= fr->tab14.r*fr->tab14.r)
636 if (warned_rlimit == FALSE)
638 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
639 warned_rlimit = TRUE;
646 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
647 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
648 c6B = iparams[itype].lj14.c6B*6.0;
649 c12B = iparams[itype].lj14.c12B*12.0;
651 fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
652 fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
653 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
654 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
658 /* Evaluate tabulated interaction without free energy */
659 fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
662 energygrp_elec[gid] += velec;
663 energygrp_vdw[gid] += vvdw;
664 svmul(fscal, dx, dx);
672 /* Correct the shift forces using the graph */
673 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
674 fshift_index = IVEC2IS(dt);
676 if (fshift_index != CENTRAL)
678 rvec_inc(fshift[fshift_index], dx);
679 rvec_dec(fshift[CENTRAL], dx);