2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "thread_mpi/threads.h"
48 #include "gromacs/utility/smalloc.h"
50 #include "gromacs/math/vec.h"
51 #include "gromacs/math/utilities.h"
53 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/fatalerror.h"
61 #include "nonbonded.h"
62 #include "gromacs/simd/simd.h"
64 #include "nb_kernel.h"
65 #include "nb_free_energy.h"
66 #include "nb_generic.h"
67 #include "nb_generic_cg.h"
68 #include "nb_generic_adress.h"
70 /* Different default (c) and SIMD instructions interaction-specific kernels */
71 #include "nb_kernel_c/nb_kernel_c.h"
73 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
74 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
76 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
77 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
79 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
80 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
82 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
83 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
85 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
86 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
88 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
89 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
91 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
92 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
94 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
95 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
97 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
98 # include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
102 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
103 static gmx_bool nonbonded_setup_done = FALSE;
107 gmx_nonbonded_setup(t_forcerec * fr,
108 gmx_bool bGenericKernelOnly)
110 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
111 /* Here we are guaranteed only one thread made it. */
112 if (nonbonded_setup_done == FALSE)
114 if (bGenericKernelOnly == FALSE)
116 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
117 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
119 if (!(fr != NULL && fr->use_simd_kernels == FALSE))
121 /* Add interaction-specific kernels for different architectures */
122 /* Single precision */
123 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
124 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
126 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
127 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
129 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
130 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
132 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
133 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
135 /* Double precision */
136 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
137 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
139 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
140 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
142 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
143 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
145 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
146 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
148 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
149 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
151 ; /* empty statement to avoid a completely empty block */
154 /* Create a hash for faster lookups */
155 nb_kernel_list_hash_init();
157 nonbonded_setup_done = TRUE;
159 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
165 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
168 const char * elec_mod;
170 const char * vdw_mod;
178 int simd_padding_width;
182 /* Single precision */
183 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
184 { "avx_256_single", 8 },
186 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
187 { "avx_128_fma_single", 4 },
189 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
190 { "sse4_1_single", 4 },
192 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
193 { "sse2_single", 4 },
195 /* Double precision */
196 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
197 { "avx_256_double", 4 },
199 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
200 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
201 * since the kernels execute a loop unrolled a factor 2, followed by
202 * a possible single odd-element epilogue.
204 { "avx_128_fma_double", 1 },
206 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
207 /* No padding - see comment above */
208 { "sse2_double", 1 },
210 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
211 /* No padding - see comment above */
212 { "sse4_1_double", 1 },
214 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
215 /* No padding - see comment above */
216 { "sparc64_hpc_ace_double", 1 },
220 int narch = asize(arch_and_padding);
223 if (nonbonded_setup_done == FALSE)
225 /* We typically call this setup routine before starting timers,
226 * but if that has not been done for whatever reason we do it now.
228 gmx_nonbonded_setup(NULL, FALSE);
234 nl->kernelptr_vf = NULL;
235 nl->kernelptr_v = NULL;
236 nl->kernelptr_f = NULL;
238 elec = gmx_nbkernel_elec_names[nl->ielec];
239 elec_mod = eintmod_names[nl->ielecmod];
240 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
241 vdw_mod = eintmod_names[nl->ivdwmod];
242 geom = gmx_nblist_geometry_names[nl->igeometry];
244 if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
246 nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
247 nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
248 nl->simd_padding_width = 1;
252 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
254 nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
255 nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
256 nl->simd_padding_width = 1;
258 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
260 nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
261 nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
262 nl->simd_padding_width = 1;
266 /* Try to find a specific kernel first */
268 for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
270 nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
271 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
273 for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
275 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
276 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
278 /* If there is not force-only optimized kernel, is there a potential & force one? */
279 if (nl->kernelptr_f == NULL)
281 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
282 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
286 /* Give up. If this was a water kernel, leave the pointer as NULL, which
287 * will disable water optimization in NS. If it is a particle kernel, set
288 * the pointer to the generic NB kernel.
290 if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
292 nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
293 nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
294 nl->simd_padding_width = 1;
298 "WARNING - Slow generic NB kernel used for neighborlist with\n"
299 " Elec: '%s', Modifier: '%s'\n"
300 " Vdw: '%s', Modifier: '%s'\n"
301 " Geom: '%s', Other: '%s'\n\n",
302 elec, elec_mod, vdw, vdw_mod, geom, other);
310 void do_nonbonded(t_forcerec *fr,
311 rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
312 gmx_grppairener_t *grppener,
313 t_nrnb *nrnb, real *lambda, real *dvdl,
314 int nls, int eNL, int flags)
317 int n, n0, n1, i, i0, i1, sz, range;
319 nb_kernel_data_t kernel_data;
320 nb_kernel_t * kernelptr = NULL;
323 kernel_data.flags = flags;
324 kernel_data.exclusions = excl;
325 kernel_data.lambda = lambda;
326 kernel_data.dvdl = dvdl;
330 gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
356 for (n = n0; (n < n1); n++)
358 nblists = &fr->nblists[n];
360 kernel_data.table_elec = &nblists->table_elec;
361 kernel_data.table_vdw = &nblists->table_vdw;
362 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
364 for (range = 0; range < 2; range++)
366 /* Are we doing short/long-range? */
370 if (!(flags & GMX_NONBONDED_DO_SR))
374 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
375 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
376 kernel_data.energygrp_polarization = grppener->ener[egGB];
377 nlist = nblists->nlist_sr;
383 if (!(flags & GMX_NONBONDED_DO_LR))
387 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
388 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
389 kernel_data.energygrp_polarization = grppener->ener[egGB];
390 nlist = nblists->nlist_lr;
394 for (i = i0; (i < i1); i++)
396 if (nlist[i].nri > 0)
398 if (flags & GMX_NONBONDED_DO_POTENTIAL)
400 /* Potential and force */
401 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
405 /* Force only, no potential */
406 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
409 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
411 /* We don't need the non-perturbed interactions */
414 /* Neighborlists whose kernelptr==NULL will always be empty */
415 if (kernelptr != NULL)
417 (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
426 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
428 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
429 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
430 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
431 "a smaller molecule you are decoupling during a free energy calculation.\n"
432 "Since interactions at distances beyond the table cannot be computed,\n"
433 "they are skipped until they are inside the table limit again. You will\n"
434 "only see this message once, even if it occurs for several interactions.\n\n"
435 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
436 "probably something wrong with your system. Only change the table-extension\n"
437 "distance in the mdp file if you are really sure that is the reason.\n",
438 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
443 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
444 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
445 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
451 /* This might logically belong better in the nb_generic.c module, but it is only
452 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
453 * extra functional call for every single pair listed in the topology.
456 nb_evaluate_single(real r2, real tabscale, real *vftab,
457 real qq, real c6, real c12, real *velec, real *vvdw)
459 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
462 /* Do the tabulated interactions - first table lookup */
463 rinv = gmx_invsqrt(r2);
473 Geps = eps*vftab[ntab+2];
474 Heps2 = eps2*vftab[ntab+3];
477 FFe = Fp+Geps+2.0*Heps2;
481 Geps = eps*vftab[ntab+6];
482 Heps2 = eps2*vftab[ntab+7];
485 FFd = Fp+Geps+2.0*Heps2;
489 Geps = eps*vftab[ntab+10];
490 Heps2 = eps2*vftab[ntab+11];
493 FFr = Fp+Geps+2.0*Heps2;
496 *vvdw = c6*VVd+c12*VVr;
498 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
505 do_nonbonded_listed(int ftype, int nbonds,
506 const t_iatom iatoms[], const t_iparams iparams[],
507 const rvec x[], rvec f[], rvec fshift[],
508 const t_pbc *pbc, const t_graph *g,
509 real *lambda, real *dvdl,
511 const t_forcerec *fr, gmx_grppairener_t *grppener,
512 int *global_atom_index)
518 int i, j, itype, ai, aj, gid;
521 real fscal, velec, vvdw;
522 real * energygrp_elec;
523 real * energygrp_vdw;
524 static gmx_bool warned_rlimit = FALSE;
525 /* Free energy stuff */
526 gmx_bool bFreeEnergy;
527 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
528 real qqB, c6B, c12B, sigma2_def, sigma2_min;
535 energygrp_elec = grppener->ener[egCOUL14];
536 energygrp_vdw = grppener->ener[egLJ14];
539 energygrp_elec = grppener->ener[egCOULSR];
540 energygrp_vdw = grppener->ener[egLJSR];
543 energygrp_elec = NULL; /* Keep compiler happy */
544 energygrp_vdw = NULL; /* Keep compiler happy */
545 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
549 if (fr->efep != efepNO)
551 /* Lambda factor for state A=1-lambda and B=lambda */
552 LFC[0] = 1.0 - lambda[efptCOUL];
553 LFV[0] = 1.0 - lambda[efptVDW];
554 LFC[1] = lambda[efptCOUL];
555 LFV[1] = lambda[efptVDW];
557 /*derivative of the lambda factor for state A and B */
562 sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
563 sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
565 for (i = 0; i < 2; i++)
567 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
568 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
569 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
570 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
575 sigma2_min = sigma2_def = 0;
579 for (i = 0; (i < nbonds); )
584 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
591 (fr->efep != efepNO &&
592 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
593 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
594 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
595 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
596 c6 = iparams[itype].lj14.c6A;
597 c12 = iparams[itype].lj14.c12A;
600 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
601 c6 = iparams[itype].ljc14.c6;
602 c12 = iparams[itype].ljc14.c12;
605 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
606 c6 = iparams[itype].ljcnb.c6;
607 c12 = iparams[itype].ljcnb.c12;
610 /* Cannot happen since we called gmx_fatal() above in this case */
611 qq = c6 = c12 = 0; /* Keep compiler happy */
615 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
616 * included in the general nfbp array now. This means the tables are scaled down by the
617 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
623 /* Do we need to apply full periodic boundary conditions? */
624 if (fr->bMolPBC == TRUE)
626 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
630 fshift_index = CENTRAL;
631 rvec_sub(x[ai], x[aj], dx);
635 if (r2 >= fr->tab14.r*fr->tab14.r)
637 if (warned_rlimit == FALSE)
639 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
640 warned_rlimit = TRUE;
647 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
648 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
649 c6B = iparams[itype].lj14.c6B*6.0;
650 c12B = iparams[itype].lj14.c12B*12.0;
652 fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
653 fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
654 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
655 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
659 /* Evaluate tabulated interaction without free energy */
660 fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
663 energygrp_elec[gid] += velec;
664 energygrp_vdw[gid] += vvdw;
665 svmul(fscal, dx, dx);
673 /* Correct the shift forces using the graph */
674 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
675 fshift_index = IVEC2IS(dt);
677 if (fshift_index != CENTRAL)
679 rvec_inc(fshift[fshift_index], dx);
680 rvec_dec(fshift[CENTRAL], dx);