2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "thread_mpi/threads.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/math/utilities.h"
52 #include "gromacs/utility/cstringutil.h"
58 #include "nonbonded.h"
59 #include "gromacs/simd/simd.h"
61 #include "gromacs/pbcutil/mshift.h"
62 #include "gromacs/utility/fatalerror.h"
63 #include "gromacs/utility/smalloc.h"
65 #include "nb_kernel.h"
66 #include "nb_free_energy.h"
67 #include "nb_generic.h"
68 #include "nb_generic_cg.h"
69 #include "nb_generic_adress.h"
71 /* Different default (c) and SIMD instructions interaction-specific kernels */
72 #include "nb_kernel_c/nb_kernel_c.h"
74 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
75 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
77 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
78 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
80 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
81 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
83 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
84 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
86 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
87 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
89 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
90 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
92 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
93 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
95 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
96 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
98 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
99 # include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
103 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
104 static gmx_bool nonbonded_setup_done = FALSE;
108 gmx_nonbonded_setup(t_forcerec * fr,
109 gmx_bool bGenericKernelOnly)
111 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
112 /* Here we are guaranteed only one thread made it. */
113 if (nonbonded_setup_done == FALSE)
115 if (bGenericKernelOnly == FALSE)
117 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
118 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
120 if (!(fr != NULL && fr->use_simd_kernels == FALSE))
122 /* Add interaction-specific kernels for different architectures */
123 /* Single precision */
124 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
125 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
127 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
128 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
130 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
131 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
133 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
134 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
136 /* Double precision */
137 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
138 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
140 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
141 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
143 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
144 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
146 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
147 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
149 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
150 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
152 ; /* empty statement to avoid a completely empty block */
155 /* Create a hash for faster lookups */
156 nb_kernel_list_hash_init();
158 nonbonded_setup_done = TRUE;
160 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
166 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
169 const char * elec_mod;
171 const char * vdw_mod;
179 int simd_padding_width;
183 /* Single precision */
184 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER) && !(defined GMX_DOUBLE)
185 { "avx_256_single", 8 },
187 #if (defined GMX_SIMD_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
188 { "avx_128_fma_single", 4 },
190 #if (defined GMX_SIMD_X86_SSE4_1) && !(defined GMX_DOUBLE)
191 { "sse4_1_single", 4 },
193 #if (defined GMX_SIMD_X86_SSE2) && !(defined GMX_DOUBLE)
194 { "sse2_single", 4 },
196 /* Double precision */
197 #if (defined GMX_SIMD_X86_AVX_256_OR_HIGHER && defined GMX_DOUBLE)
198 { "avx_256_double", 4 },
200 #if (defined GMX_SIMD_X86_AVX_128_FMA && defined GMX_DOUBLE)
201 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
202 * since the kernels execute a loop unrolled a factor 2, followed by
203 * a possible single odd-element epilogue.
205 { "avx_128_fma_double", 1 },
207 #if (defined GMX_SIMD_X86_SSE2 && defined GMX_DOUBLE)
208 /* No padding - see comment above */
209 { "sse2_double", 1 },
211 #if (defined GMX_SIMD_X86_SSE4_1 && defined GMX_DOUBLE)
212 /* No padding - see comment above */
213 { "sse4_1_double", 1 },
215 #if (defined GMX_SIMD_SPARC64_HPC_ACE && defined GMX_DOUBLE)
216 /* No padding - see comment above */
217 { "sparc64_hpc_ace_double", 1 },
221 int narch = asize(arch_and_padding);
224 if (nonbonded_setup_done == FALSE)
226 /* We typically call this setup routine before starting timers,
227 * but if that has not been done for whatever reason we do it now.
229 gmx_nonbonded_setup(NULL, FALSE);
235 nl->kernelptr_vf = NULL;
236 nl->kernelptr_v = NULL;
237 nl->kernelptr_f = NULL;
239 elec = gmx_nbkernel_elec_names[nl->ielec];
240 elec_mod = eintmod_names[nl->ielecmod];
241 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
242 vdw_mod = eintmod_names[nl->ivdwmod];
243 geom = gmx_nblist_geometry_names[nl->igeometry];
245 if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
247 nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
248 nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
249 nl->simd_padding_width = 1;
253 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
255 nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
256 nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
257 nl->simd_padding_width = 1;
259 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
261 nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
262 nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
263 nl->simd_padding_width = 1;
267 /* Try to find a specific kernel first */
269 for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
271 nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
272 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
274 for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
276 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
277 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
279 /* If there is not force-only optimized kernel, is there a potential & force one? */
280 if (nl->kernelptr_f == NULL)
282 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
283 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
287 /* Give up. If this was a water kernel, leave the pointer as NULL, which
288 * will disable water optimization in NS. If it is a particle kernel, set
289 * the pointer to the generic NB kernel.
291 if (nl->kernelptr_vf == NULL && !gmx_strcasecmp_min(geom, "Particle-Particle"))
293 nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
294 nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
295 nl->simd_padding_width = 1;
299 "WARNING - Slow generic NB kernel used for neighborlist with\n"
300 " Elec: '%s', Modifier: '%s'\n"
301 " Vdw: '%s', Modifier: '%s'\n"
302 " Geom: '%s', Other: '%s'\n\n",
303 elec, elec_mod, vdw, vdw_mod, geom, other);
311 void do_nonbonded(t_forcerec *fr,
312 rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
313 gmx_grppairener_t *grppener,
314 t_nrnb *nrnb, real *lambda, real *dvdl,
315 int nls, int eNL, int flags)
318 int n, n0, n1, i, i0, i1, sz, range;
320 nb_kernel_data_t kernel_data;
321 nb_kernel_t * kernelptr = NULL;
324 kernel_data.flags = flags;
325 kernel_data.exclusions = excl;
326 kernel_data.lambda = lambda;
327 kernel_data.dvdl = dvdl;
331 gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
357 for (n = n0; (n < n1); n++)
359 nblists = &fr->nblists[n];
361 kernel_data.table_elec = &nblists->table_elec;
362 kernel_data.table_vdw = &nblists->table_vdw;
363 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
365 for (range = 0; range < 2; range++)
367 /* Are we doing short/long-range? */
371 if (!(flags & GMX_NONBONDED_DO_SR))
375 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
376 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
377 kernel_data.energygrp_polarization = grppener->ener[egGB];
378 nlist = nblists->nlist_sr;
384 if (!(flags & GMX_NONBONDED_DO_LR))
388 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
389 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
390 kernel_data.energygrp_polarization = grppener->ener[egGB];
391 nlist = nblists->nlist_lr;
395 for (i = i0; (i < i1); i++)
397 if (nlist[i].nri > 0)
399 if (flags & GMX_NONBONDED_DO_POTENTIAL)
401 /* Potential and force */
402 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
406 /* Force only, no potential */
407 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
410 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
412 /* We don't need the non-perturbed interactions */
415 /* Neighborlists whose kernelptr==NULL will always be empty */
416 if (kernelptr != NULL)
418 (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
427 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
429 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
430 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
431 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
432 "a smaller molecule you are decoupling during a free energy calculation.\n"
433 "Since interactions at distances beyond the table cannot be computed,\n"
434 "they are skipped until they are inside the table limit again. You will\n"
435 "only see this message once, even if it occurs for several interactions.\n\n"
436 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
437 "probably something wrong with your system. Only change the table-extension\n"
438 "distance in the mdp file if you are really sure that is the reason.\n",
439 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
444 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
445 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
446 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
452 /* This might logically belong better in the nb_generic.c module, but it is only
453 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
454 * extra functional call for every single pair listed in the topology.
457 nb_evaluate_single(real r2, real tabscale, real *vftab,
458 real qq, real c6, real c12, real *velec, real *vvdw)
460 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
463 /* Do the tabulated interactions - first table lookup */
464 rinv = gmx_invsqrt(r2);
474 Geps = eps*vftab[ntab+2];
475 Heps2 = eps2*vftab[ntab+3];
478 FFe = Fp+Geps+2.0*Heps2;
482 Geps = eps*vftab[ntab+6];
483 Heps2 = eps2*vftab[ntab+7];
486 FFd = Fp+Geps+2.0*Heps2;
490 Geps = eps*vftab[ntab+10];
491 Heps2 = eps2*vftab[ntab+11];
494 FFr = Fp+Geps+2.0*Heps2;
497 *vvdw = c6*VVd+c12*VVr;
499 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
506 do_nonbonded_listed(int ftype, int nbonds,
507 const t_iatom iatoms[], const t_iparams iparams[],
508 const rvec x[], rvec f[], rvec fshift[],
509 const t_pbc *pbc, const t_graph *g,
510 real *lambda, real *dvdl,
512 const t_forcerec *fr, gmx_grppairener_t *grppener,
513 int *global_atom_index)
519 int i, j, itype, ai, aj, gid;
522 real fscal, velec, vvdw;
523 real * energygrp_elec;
524 real * energygrp_vdw;
525 static gmx_bool warned_rlimit = FALSE;
526 /* Free energy stuff */
527 gmx_bool bFreeEnergy;
528 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
529 real qqB, c6B, c12B, sigma2_def, sigma2_min;
536 energygrp_elec = grppener->ener[egCOUL14];
537 energygrp_vdw = grppener->ener[egLJ14];
540 energygrp_elec = grppener->ener[egCOULSR];
541 energygrp_vdw = grppener->ener[egLJSR];
544 energygrp_elec = NULL; /* Keep compiler happy */
545 energygrp_vdw = NULL; /* Keep compiler happy */
546 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
550 if (fr->efep != efepNO)
552 /* Lambda factor for state A=1-lambda and B=lambda */
553 LFC[0] = 1.0 - lambda[efptCOUL];
554 LFV[0] = 1.0 - lambda[efptVDW];
555 LFC[1] = lambda[efptCOUL];
556 LFV[1] = lambda[efptVDW];
558 /*derivative of the lambda factor for state A and B */
563 sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
564 sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
566 for (i = 0; i < 2; i++)
568 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
569 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
570 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
571 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
576 sigma2_min = sigma2_def = 0;
580 for (i = 0; (i < nbonds); )
585 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
592 (fr->efep != efepNO &&
593 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
594 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
595 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
596 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
597 c6 = iparams[itype].lj14.c6A;
598 c12 = iparams[itype].lj14.c12A;
601 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
602 c6 = iparams[itype].ljc14.c6;
603 c12 = iparams[itype].ljc14.c12;
606 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
607 c6 = iparams[itype].ljcnb.c6;
608 c12 = iparams[itype].ljcnb.c12;
611 /* Cannot happen since we called gmx_fatal() above in this case */
612 qq = c6 = c12 = 0; /* Keep compiler happy */
616 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
617 * included in the general nfbp array now. This means the tables are scaled down by the
618 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
624 /* Do we need to apply full periodic boundary conditions? */
625 if (fr->bMolPBC == TRUE)
627 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
631 fshift_index = CENTRAL;
632 rvec_sub(x[ai], x[aj], dx);
636 if (r2 >= fr->tab14.r*fr->tab14.r)
638 if (warned_rlimit == FALSE)
640 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
641 warned_rlimit = TRUE;
648 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
649 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
650 c6B = iparams[itype].lj14.c6B*6.0;
651 c12B = iparams[itype].lj14.c12B*12.0;
653 fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
654 fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
655 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
656 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
660 /* Evaluate tabulated interaction without free energy */
661 fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
664 energygrp_elec[gid] += velec;
665 energygrp_vdw[gid] += vvdw;
666 svmul(fscal, dx, dx);
674 /* Correct the shift forces using the graph */
675 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
676 fshift_index = IVEC2IS(dt);
678 if (fshift_index != CENTRAL)
680 rvec_inc(fshift[fshift_index], dx);
681 rvec_dec(fshift[CENTRAL], dx);