2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
43 #include <thread_mpi.h>
60 #include "gmx_fatal.h"
66 #include "nonbonded.h"
68 #include "nb_kernel.h"
69 #include "nb_free_energy.h"
70 #include "nb_generic.h"
71 #include "nb_generic_cg.h"
72 #include "nb_generic_adress.h"
74 /* Different default (c) and accelerated interaction-specific kernels */
75 #include "nb_kernel_c/nb_kernel_c.h"
77 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
78 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
80 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
81 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
83 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
84 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
86 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
87 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
89 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
90 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
92 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
93 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
95 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
96 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
98 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
99 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
101 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
102 # include "nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
106 #ifdef GMX_THREAD_MPI
107 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
109 static gmx_bool nonbonded_setup_done = FALSE;
113 gmx_nonbonded_setup(FILE * fplog,
115 gmx_bool bGenericKernelOnly)
117 #ifdef GMX_THREAD_MPI
118 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
120 /* Here we are guaranteed only one thread made it. */
121 if (nonbonded_setup_done == FALSE)
123 if (bGenericKernelOnly == FALSE)
125 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
126 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
128 if (!(fr != NULL && fr->use_cpu_acceleration == FALSE))
130 /* Add interaction-specific kernels for different architectures */
131 /* Single precision */
132 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
133 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
135 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
136 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
138 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
139 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
141 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
142 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
144 /* Double precision */
145 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
146 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
148 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
149 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
151 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
152 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
154 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
155 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
157 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
158 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double,kernellist_sparc64_hpc_ace_double_size);
160 ; /* empty statement to avoid a completely empty block */
163 /* Create a hash for faster lookups */
164 nb_kernel_list_hash_init();
166 nonbonded_setup_done = TRUE;
168 #ifdef GMX_THREAD_MPI
169 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
176 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
179 const char * elec_mod;
181 const char * vdw_mod;
189 int simd_padding_width;
193 /* Single precision */
194 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
195 { "avx_256_single", 8 },
197 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
198 { "avx_128_fma_single", 4 },
200 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
201 { "sse4_1_single", 4 },
203 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
204 { "sse2_single", 4 },
206 /* Double precision */
207 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
208 { "avx_256_double", 4 },
210 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
211 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
212 * since the kernels execute a loop unrolled a factor 2, followed by
213 * a possible single odd-element epilogue.
215 { "avx_128_fma_double", 1 },
217 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
218 /* No padding - see comment above */
219 { "sse2_double", 1 },
221 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
222 /* No padding - see comment above */
223 { "sse4_1_double", 1 },
225 #if (defined GMX_CPU_ACCELERATION_SPARC64_HPC_ACE && defined GMX_DOUBLE)
226 /* No padding - see comment above */
227 { "sparc64_hpc_ace_double", 1 },
231 int narch = asize(arch_and_padding);
234 if (nonbonded_setup_done == FALSE)
236 /* We typically call this setup routine before starting timers,
237 * but if that has not been done for whatever reason we do it now.
239 gmx_nonbonded_setup(NULL, NULL, FALSE);
245 nl->kernelptr_vf = NULL;
246 nl->kernelptr_v = NULL;
247 nl->kernelptr_f = NULL;
249 elec = gmx_nbkernel_elec_names[nl->ielec];
250 elec_mod = eintmod_names[nl->ielecmod];
251 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
252 vdw_mod = eintmod_names[nl->ivdwmod];
253 geom = gmx_nblist_geometry_names[nl->igeometry];
255 if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
257 nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
258 nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
259 nl->simd_padding_width = 1;
263 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
265 nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
266 nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
267 nl->simd_padding_width = 1;
269 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
271 nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
272 nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
273 nl->simd_padding_width = 1;
277 /* Try to find a specific kernel first */
279 for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
281 nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
282 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
284 for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
286 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
287 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
289 /* If there is not force-only optimized kernel, is there a potential & force one? */
290 if (nl->kernelptr_f == NULL)
292 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
293 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
297 /* Give up, pick a generic one instead */
298 if (nl->kernelptr_vf == NULL)
300 nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
301 nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
302 nl->simd_padding_width = 1;
306 "WARNING - Slow generic NB kernel used for neighborlist with\n"
307 " Elec: '%s', Modifier: '%s'\n"
308 " Vdw: '%s', Modifier: '%s'\n"
309 " Geom: '%s', Other: '%s'\n\n",
310 elec, elec_mod, vdw, vdw_mod, geom, other);
318 void do_nonbonded(t_commrec *cr, t_forcerec *fr,
319 rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
320 gmx_grppairener_t *grppener, rvec box_size,
321 t_nrnb *nrnb, real *lambda, real *dvdl,
322 int nls, int eNL, int flags)
325 int n, n0, n1, i, i0, i1, sz, range;
327 nb_kernel_data_t kernel_data;
328 nb_kernel_t * kernelptr = NULL;
331 kernel_data.flags = flags;
332 kernel_data.exclusions = excl;
333 kernel_data.lambda = lambda;
334 kernel_data.dvdl = dvdl;
338 gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
364 for (n = n0; (n < n1); n++)
366 nblists = &fr->nblists[n];
368 kernel_data.table_elec = &nblists->table_elec;
369 kernel_data.table_vdw = &nblists->table_vdw;
370 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
372 for (range = 0; range < 2; range++)
374 /* Are we doing short/long-range? */
378 if (!(flags & GMX_NONBONDED_DO_SR))
382 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
383 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
384 kernel_data.energygrp_polarization = grppener->ener[egGB];
385 nlist = nblists->nlist_sr;
391 if (!(flags & GMX_NONBONDED_DO_LR))
395 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
396 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
397 kernel_data.energygrp_polarization = grppener->ener[egGB];
398 nlist = nblists->nlist_lr;
402 for (i = i0; (i < i1); i++)
404 if (nlist[i].nri > 0)
406 if (flags & GMX_NONBONDED_DO_POTENTIAL)
408 /* Potential and force */
409 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
413 /* Force only, no potential */
414 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
417 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
419 /* We don't need the non-perturbed interactions */
422 (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
430 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
432 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
433 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
434 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
435 "a smaller molecule you are decoupling during a free energy calculation.\n"
436 "Since interactions at distances beyond the table cannot be computed,\n"
437 "they are skipped until they are inside the table limit again. You will\n"
438 "only see this message once, even if it occurs for several interactions.\n\n"
439 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
440 "probably something wrong with your system. Only change the table-extension\n"
441 "distance in the mdp file if you are really sure that is the reason.\n",
442 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
447 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
448 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
449 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
455 /* This might logically belong better in the nb_generic.c module, but it is only
456 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
457 * extra functional call for every single pair listed in the topology.
460 nb_evaluate_single(real r2, real tabscale, real *vftab,
461 real qq, real c6, real c12, real *velec, real *vvdw)
463 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
466 /* Do the tabulated interactions - first table lookup */
467 rinv = gmx_invsqrt(r2);
477 Geps = eps*vftab[ntab+2];
478 Heps2 = eps2*vftab[ntab+3];
481 FFe = Fp+Geps+2.0*Heps2;
485 Geps = eps*vftab[ntab+6];
486 Heps2 = eps2*vftab[ntab+7];
489 FFd = Fp+Geps+2.0*Heps2;
493 Geps = eps*vftab[ntab+10];
494 Heps2 = eps2*vftab[ntab+11];
497 FFr = Fp+Geps+2.0*Heps2;
500 *vvdw = c6*VVd+c12*VVr;
502 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
509 do_nonbonded_listed(int ftype, int nbonds,
510 const t_iatom iatoms[], const t_iparams iparams[],
511 const rvec x[], rvec f[], rvec fshift[],
512 const t_pbc *pbc, const t_graph *g,
513 real *lambda, real *dvdl,
515 const t_forcerec *fr, gmx_grppairener_t *grppener,
516 int *global_atom_index)
522 int i, j, itype, ai, aj, gid;
525 real fscal, velec, vvdw;
526 real * energygrp_elec;
527 real * energygrp_vdw;
528 static gmx_bool warned_rlimit = FALSE;
529 /* Free energy stuff */
530 gmx_bool bFreeEnergy;
531 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
532 real qqB, c6B, c12B, sigma2_def, sigma2_min;
539 energygrp_elec = grppener->ener[egCOUL14];
540 energygrp_vdw = grppener->ener[egLJ14];
543 energygrp_elec = grppener->ener[egCOULSR];
544 energygrp_vdw = grppener->ener[egLJSR];
547 energygrp_elec = NULL; /* Keep compiler happy */
548 energygrp_vdw = NULL; /* Keep compiler happy */
549 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
553 if (fr->efep != efepNO)
555 /* Lambda factor for state A=1-lambda and B=lambda */
556 LFC[0] = 1.0 - lambda[efptCOUL];
557 LFV[0] = 1.0 - lambda[efptVDW];
558 LFC[1] = lambda[efptCOUL];
559 LFV[1] = lambda[efptVDW];
561 /*derivative of the lambda factor for state A and B */
566 sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
567 sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
569 for (i = 0; i < 2; i++)
571 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
572 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
573 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
574 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
579 sigma2_min = sigma2_def = 0;
583 for (i = 0; (i < nbonds); )
588 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
595 (fr->efep != efepNO &&
596 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
597 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
598 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
599 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
600 c6 = iparams[itype].lj14.c6A;
601 c12 = iparams[itype].lj14.c12A;
604 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
605 c6 = iparams[itype].ljc14.c6;
606 c12 = iparams[itype].ljc14.c12;
609 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
610 c6 = iparams[itype].ljcnb.c6;
611 c12 = iparams[itype].ljcnb.c12;
614 /* Cannot happen since we called gmx_fatal() above in this case */
615 qq = c6 = c12 = 0; /* Keep compiler happy */
619 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
620 * included in the general nfbp array now. This means the tables are scaled down by the
621 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
627 /* Do we need to apply full periodic boundary conditions? */
628 if (fr->bMolPBC == TRUE)
630 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
634 fshift_index = CENTRAL;
635 rvec_sub(x[ai], x[aj], dx);
639 if (r2 >= fr->tab14.r*fr->tab14.r)
641 if (warned_rlimit == FALSE)
643 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
644 warned_rlimit = TRUE;
651 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
652 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
653 c6B = iparams[itype].lj14.c6B*6.0;
654 c12B = iparams[itype].lj14.c12B*12.0;
656 fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
657 fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
658 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
659 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
663 /* Evaluate tabulated interaction without free energy */
664 fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
667 energygrp_elec[gid] += velec;
668 energygrp_vdw[gid] += vvdw;
669 svmul(fscal, dx, dx);
677 /* Correct the shift forces using the graph */
678 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
679 fshift_index = IVEC2IS(dt);
681 if (fshift_index != CENTRAL)
683 rvec_inc(fshift[fshift_index], dx);
684 rvec_dec(fshift[CENTRAL], dx);