2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
43 #include <thread_mpi.h>
60 #include "gmx_fatal.h"
66 #include "nonbonded.h"
68 #include "nb_kernel.h"
69 #include "nb_free_energy.h"
70 #include "nb_generic.h"
71 #include "nb_generic_cg.h"
72 #include "nb_generic_adress.h"
74 /* Different default (c) and accelerated interaction-specific kernels */
75 #include "nb_kernel_c/nb_kernel_c.h"
77 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
78 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
80 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
81 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
83 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
84 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
86 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
87 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
89 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
90 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
92 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
93 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
95 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
96 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
98 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
99 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
103 #ifdef GMX_THREAD_MPI
104 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
106 static gmx_bool nonbonded_setup_done = FALSE;
110 gmx_nonbonded_setup(FILE * fplog,
112 gmx_bool bGenericKernelOnly)
114 #ifdef GMX_THREAD_MPI
115 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
117 /* Here we are guaranteed only one thread made it. */
118 if (nonbonded_setup_done == FALSE)
120 if (bGenericKernelOnly == FALSE)
122 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
123 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
125 if (!(fr != NULL && fr->use_cpu_acceleration == FALSE))
127 /* Add interaction-specific kernels for different architectures */
128 /* Single precision */
129 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
130 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
132 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
133 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
135 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
136 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
138 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
139 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
141 /* Double precision */
142 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
143 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
145 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
146 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
148 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
149 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
151 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
152 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
154 ; /* empty statement to avoid a completely empty block */
157 /* Create a hash for faster lookups */
158 nb_kernel_list_hash_init();
160 nonbonded_setup_done = TRUE;
162 #ifdef GMX_THREAD_MPI
163 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
170 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
173 const char * elec_mod;
175 const char * vdw_mod;
183 int simd_padding_width;
187 /* Single precision */
188 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
189 { "avx_256_single", 8 },
191 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
192 { "avx_128_fma_single", 4 },
194 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
195 { "sse4_1_single", 4 },
197 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
198 { "sse2_single", 4 },
200 /* Double precision */
201 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
202 { "avx_256_double", 4 },
204 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
205 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
206 * since the kernels execute a loop unrolled a factor 2, followed by
207 * a possible single odd-element epilogue.
209 { "avx_128_fma_double", 1 },
211 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
212 /* No padding - see comment above */
213 { "sse2_double", 1 },
215 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
216 /* No padding - see comment above */
217 { "sse4_1_double", 1 },
221 int narch = asize(arch_and_padding);
224 if (nonbonded_setup_done == FALSE)
226 /* We typically call this setup routine before starting timers,
227 * but if that has not been done for whatever reason we do it now.
229 gmx_nonbonded_setup(NULL, NULL, FALSE);
235 nl->kernelptr_vf = NULL;
236 nl->kernelptr_v = NULL;
237 nl->kernelptr_f = NULL;
239 elec = gmx_nbkernel_elec_names[nl->ielec];
240 elec_mod = eintmod_names[nl->ielecmod];
241 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
242 vdw_mod = eintmod_names[nl->ivdwmod];
243 geom = gmx_nblist_geometry_names[nl->igeometry];
245 if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
247 nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
248 nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
249 nl->simd_padding_width = 1;
253 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
255 nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
256 nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
257 nl->simd_padding_width = 1;
259 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
261 nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
262 nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
263 nl->simd_padding_width = 1;
267 /* Try to find a specific kernel first */
269 for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
271 nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
272 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
274 for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
276 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
277 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
279 /* If there is not force-only optimized kernel, is there a potential & force one? */
280 if (nl->kernelptr_f == NULL)
282 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
283 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
287 /* Give up, pick a generic one instead */
288 if (nl->kernelptr_vf == NULL)
290 nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
291 nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
292 nl->simd_padding_width = 1;
296 "WARNING - Slow generic NB kernel used for neighborlist with\n"
297 " Elec: '%s', Modifier: '%s'\n"
298 " Vdw: '%s', Modifier: '%s'\n"
299 " Geom: '%s', Other: '%s'\n\n",
300 elec, elec_mod, vdw, vdw_mod, geom, other);
308 void do_nonbonded(t_commrec *cr, t_forcerec *fr,
309 rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
310 gmx_grppairener_t *grppener, rvec box_size,
311 t_nrnb *nrnb, real *lambda, real *dvdl,
312 int nls, int eNL, int flags)
315 int n, n0, n1, i, i0, i1, sz, range;
317 nb_kernel_data_t kernel_data;
318 nb_kernel_t * kernelptr = NULL;
321 kernel_data.flags = flags;
322 kernel_data.exclusions = excl;
323 kernel_data.lambda = lambda;
324 kernel_data.dvdl = dvdl;
353 for (n = n0; (n < n1); n++)
355 nblists = &fr->nblists[n];
357 kernel_data.table_elec = &nblists->table_elec;
358 kernel_data.table_vdw = &nblists->table_vdw;
359 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
361 for (range = 0; range < 2; range++)
363 /* Are we doing short/long-range? */
367 if (!(flags & GMX_NONBONDED_DO_SR))
371 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
372 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
373 kernel_data.energygrp_polarization = grppener->ener[egGB];
374 nlist = nblists->nlist_sr;
380 if (!(flags & GMX_NONBONDED_DO_LR))
384 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
385 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
386 kernel_data.energygrp_polarization = grppener->ener[egGB];
387 nlist = nblists->nlist_lr;
391 for (i = i0; (i < i1); i++)
393 if (nlist[i].nri > 0)
395 if (flags & GMX_NONBONDED_DO_POTENTIAL)
397 /* Potential and force */
398 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
402 /* Force only, no potential */
403 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
406 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
408 /* We don't need the non-perturbed interactions */
411 (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
419 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
421 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
422 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
423 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
424 "a smaller molecule you are decoupling during a free energy calculation.\n"
425 "Since interactions at distances beyond the table cannot be computed,\n"
426 "they are skipped until they are inside the table limit again. You will\n"
427 "only see this message once, even if it occurs for several interactions.\n\n"
428 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
429 "probably something wrong with your system. Only change the table-extension\n"
430 "distance in the mdp file if you are really sure that is the reason.\n",
431 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
436 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
437 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
438 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
444 /* This might logically belong better in the nb_generic.c module, but it is only
445 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
446 * extra functional call for every single pair listed in the topology.
449 nb_evaluate_single(real r2, real tabscale, real *vftab,
450 real qq, real c6, real c12, real *velec, real *vvdw)
452 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
455 /* Do the tabulated interactions - first table lookup */
456 rinv = gmx_invsqrt(r2);
466 Geps = eps*vftab[ntab+2];
467 Heps2 = eps2*vftab[ntab+3];
470 FFe = Fp+Geps+2.0*Heps2;
474 Geps = eps*vftab[ntab+6];
475 Heps2 = eps2*vftab[ntab+7];
478 FFd = Fp+Geps+2.0*Heps2;
482 Geps = eps*vftab[ntab+10];
483 Heps2 = eps2*vftab[ntab+11];
486 FFr = Fp+Geps+2.0*Heps2;
489 *vvdw = c6*VVd+c12*VVr;
491 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
498 do_nonbonded_listed(int ftype, int nbonds,
499 const t_iatom iatoms[], const t_iparams iparams[],
500 const rvec x[], rvec f[], rvec fshift[],
501 const t_pbc *pbc, const t_graph *g,
502 real *lambda, real *dvdl,
504 const t_forcerec *fr, gmx_grppairener_t *grppener,
505 int *global_atom_index)
511 int i, j, itype, ai, aj, gid;
514 real fscal, velec, vvdw;
515 real * energygrp_elec;
516 real * energygrp_vdw;
517 static gmx_bool warned_rlimit = FALSE;
518 /* Free energy stuff */
519 gmx_bool bFreeEnergy;
520 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
521 real qqB, c6B, c12B, sigma2_def, sigma2_min;
528 energygrp_elec = grppener->ener[egCOUL14];
529 energygrp_vdw = grppener->ener[egLJ14];
532 energygrp_elec = grppener->ener[egCOULSR];
533 energygrp_vdw = grppener->ener[egLJSR];
536 energygrp_elec = NULL; /* Keep compiler happy */
537 energygrp_vdw = NULL; /* Keep compiler happy */
538 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
542 if (fr->efep != efepNO)
544 /* Lambda factor for state A=1-lambda and B=lambda */
545 LFC[0] = 1.0 - lambda[efptCOUL];
546 LFV[0] = 1.0 - lambda[efptVDW];
547 LFC[1] = lambda[efptCOUL];
548 LFV[1] = lambda[efptVDW];
550 /*derivative of the lambda factor for state A and B */
555 sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
556 sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
558 for (i = 0; i < 2; i++)
560 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
561 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
562 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
563 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
568 sigma2_min = sigma2_def = 0;
572 for (i = 0; (i < nbonds); )
577 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
584 (fr->efep != efepNO &&
585 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
586 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
587 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
588 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
589 c6 = iparams[itype].lj14.c6A;
590 c12 = iparams[itype].lj14.c12A;
593 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
594 c6 = iparams[itype].ljc14.c6;
595 c12 = iparams[itype].ljc14.c12;
598 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
599 c6 = iparams[itype].ljcnb.c6;
600 c12 = iparams[itype].ljcnb.c12;
603 /* Cannot happen since we called gmx_fatal() above in this case */
604 qq = c6 = c12 = 0; /* Keep compiler happy */
608 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
609 * included in the general nfbp array now. This means the tables are scaled down by the
610 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
616 /* Do we need to apply full periodic boundary conditions? */
617 if (fr->bMolPBC == TRUE)
619 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
623 fshift_index = CENTRAL;
624 rvec_sub(x[ai], x[aj], dx);
628 if (r2 >= fr->tab14.r*fr->tab14.r)
630 if (warned_rlimit == FALSE)
632 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
633 warned_rlimit = TRUE;
640 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
641 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
642 c6B = iparams[itype].lj14.c6B*6.0;
643 c12B = iparams[itype].lj14.c12B*12.0;
645 fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
646 fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
647 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
648 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
652 /* Evaluate tabulated interaction without free energy */
653 fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
656 energygrp_elec[gid] += velec;
657 energygrp_vdw[gid] += vvdw;
658 svmul(fscal, dx, dx);
666 /* Correct the shift forces using the graph */
667 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
668 fshift_index = IVEC2IS(dt);
670 if (fshift_index != CENTRAL)
672 rvec_inc(fshift[fshift_index], dx);
673 rvec_dec(fshift[CENTRAL], dx);