1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
41 #include <thread_mpi.h>
58 #include "gmx_fatal.h"
64 #include "nonbonded.h"
66 #include "nb_kernel.h"
67 #include "nb_free_energy.h"
68 #include "nb_generic.h"
69 #include "nb_generic_cg.h"
70 #include "nb_generic_adress.h"
72 /* Different default (c) and accelerated interaction-specific kernels */
73 #include "nb_kernel_c/nb_kernel_c.h"
75 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
76 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
78 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
79 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
81 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
82 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
84 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
85 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
87 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
88 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
90 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
91 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
93 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
94 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
96 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
97 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
101 #ifdef GMX_THREAD_MPI
102 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
104 static gmx_bool nonbonded_setup_done = FALSE;
108 gmx_nonbonded_setup(FILE * fplog,
110 gmx_bool bGenericKernelOnly)
112 #ifdef GMX_THREAD_MPI
113 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
115 /* Here we are guaranteed only one thread made it. */
116 if (nonbonded_setup_done == FALSE)
118 if (bGenericKernelOnly == FALSE)
120 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
121 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
123 if (!(fr != NULL && fr->use_cpu_acceleration == FALSE))
125 /* Add interaction-specific kernels for different architectures */
126 /* Single precision */
127 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
128 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
130 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
131 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
133 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
134 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
136 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
137 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
139 /* Double precision */
140 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
141 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
143 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
144 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
146 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
147 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
149 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
150 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
152 ; /* empty statement to avoid a completely empty block */
155 /* Create a hash for faster lookups */
156 nb_kernel_list_hash_init();
158 nonbonded_setup_done = TRUE;
160 #ifdef GMX_THREAD_MPI
161 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
168 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
171 const char * elec_mod;
173 const char * vdw_mod;
181 int simd_padding_width;
185 /* Single precision */
186 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
187 { "avx_256_single", 8 },
189 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
190 { "avx_128_fma_single", 4 },
192 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
193 { "sse4_1_single", 4 },
195 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
196 { "sse2_single", 4 },
198 /* Double precision */
199 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
200 { "avx_256_double", 4 },
202 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
203 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
204 * since the kernels execute a loop unrolled a factor 2, followed by
205 * a possible single odd-element epilogue.
207 { "avx_128_fma_double", 1 },
209 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
210 /* No padding - see comment above */
211 { "sse2_double", 1 },
213 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
214 /* No padding - see comment above */
215 { "sse4_1_double", 1 },
219 int narch = asize(arch_and_padding);
222 if (nonbonded_setup_done == FALSE)
224 /* We typically call this setup routine before starting timers,
225 * but if that has not been done for whatever reason we do it now.
227 gmx_nonbonded_setup(NULL, NULL, FALSE);
233 nl->kernelptr_vf = NULL;
234 nl->kernelptr_v = NULL;
235 nl->kernelptr_f = NULL;
237 elec = gmx_nbkernel_elec_names[nl->ielec];
238 elec_mod = eintmod_names[nl->ielecmod];
239 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
240 vdw_mod = eintmod_names[nl->ivdwmod];
241 geom = gmx_nblist_geometry_names[nl->igeometry];
243 if (nl->type == GMX_NBLIST_INTERACTION_ADRESS)
245 nl->kernelptr_vf = (void *) gmx_nb_generic_adress_kernel;
246 nl->kernelptr_f = (void *) gmx_nb_generic_adress_kernel;
247 nl->simd_padding_width = 1;
251 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
253 nl->kernelptr_vf = (void *) gmx_nb_free_energy_kernel;
254 nl->kernelptr_f = (void *) gmx_nb_free_energy_kernel;
255 nl->simd_padding_width = 1;
257 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
259 nl->kernelptr_vf = (void *) gmx_nb_generic_cg_kernel;
260 nl->kernelptr_f = (void *) gmx_nb_generic_cg_kernel;
261 nl->simd_padding_width = 1;
265 /* Try to find a specific kernel first */
267 for (i = 0; i < narch && nl->kernelptr_vf == NULL; i++)
269 nl->kernelptr_vf = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
270 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
272 for (i = 0; i < narch && nl->kernelptr_f == NULL; i++)
274 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
275 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
277 /* If there is not force-only optimized kernel, is there a potential & force one? */
278 if (nl->kernelptr_f == NULL)
280 nl->kernelptr_f = (void *) nb_kernel_list_findkernel(NULL, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
281 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
285 /* Give up, pick a generic one instead */
286 if (nl->kernelptr_vf == NULL)
288 nl->kernelptr_vf = (void *) gmx_nb_generic_kernel;
289 nl->kernelptr_f = (void *) gmx_nb_generic_kernel;
290 nl->simd_padding_width = 1;
294 "WARNING - Slow generic NB kernel used for neighborlist with\n"
295 " Elec: '%s', Modifier: '%s'\n"
296 " Vdw: '%s', Modifier: '%s'\n"
297 " Geom: '%s', Other: '%s'\n\n",
298 elec, elec_mod, vdw, vdw_mod, geom, other);
306 void do_nonbonded(t_commrec *cr, t_forcerec *fr,
307 rvec x[], rvec f_shortrange[], rvec f_longrange[], t_mdatoms *mdatoms, t_blocka *excl,
308 gmx_grppairener_t *grppener, rvec box_size,
309 t_nrnb *nrnb, real *lambda, real *dvdl,
310 int nls, int eNL, int flags)
313 int n, n0, n1, i, i0, i1, sz, range;
315 nb_kernel_data_t kernel_data;
316 nb_kernel_t * kernelptr = NULL;
319 kernel_data.flags = flags;
320 kernel_data.exclusions = excl;
321 kernel_data.lambda = lambda;
322 kernel_data.dvdl = dvdl;
351 for (n = n0; (n < n1); n++)
353 nblists = &fr->nblists[n];
355 kernel_data.table_elec = &nblists->table_elec;
356 kernel_data.table_vdw = &nblists->table_vdw;
357 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
359 for (range = 0; range < 2; range++)
361 /* Are we doing short/long-range? */
365 if (!(flags & GMX_NONBONDED_DO_SR))
369 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
370 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
371 kernel_data.energygrp_polarization = grppener->ener[egGB];
372 nlist = nblists->nlist_sr;
378 if (!(flags & GMX_NONBONDED_DO_LR))
382 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
383 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
384 kernel_data.energygrp_polarization = grppener->ener[egGB];
385 nlist = nblists->nlist_lr;
389 for (i = i0; (i < i1); i++)
391 if (nlist[i].nri > 0)
393 if (flags & GMX_NONBONDED_DO_POTENTIAL)
395 /* Potential and force */
396 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
400 /* Force only, no potential */
401 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
404 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
406 /* We don't need the non-perturbed interactions */
409 (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
417 nb_listed_warning_rlimit(const rvec *x, int ai, int aj, int * global_atom_index, real r, real rlimit)
419 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
420 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
421 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
422 "a smaller molecule you are decoupling during a free energy calculation.\n"
423 "Since interactions at distances beyond the table cannot be computed,\n"
424 "they are skipped until they are inside the table limit again. You will\n"
425 "only see this message once, even if it occurs for several interactions.\n\n"
426 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
427 "probably something wrong with your system. Only change the table-extension\n"
428 "distance in the mdp file if you are really sure that is the reason.\n",
429 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r, rlimit);
434 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
435 x[ai][XX], x[ai][YY], x[ai][ZZ], x[aj][XX], x[aj][YY], x[aj][ZZ],
436 glatnr(global_atom_index, ai), glatnr(global_atom_index, aj), r);
442 /* This might logically belong better in the nb_generic.c module, but it is only
443 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
444 * extra functional call for every single pair listed in the topology.
447 nb_evaluate_single(real r2, real tabscale, real *vftab,
448 real qq, real c6, real c12, real *velec, real *vvdw)
450 real rinv, r, rtab, eps, eps2, Y, F, Geps, Heps2, Fp, VVe, FFe, VVd, FFd, VVr, FFr, fscal;
453 /* Do the tabulated interactions - first table lookup */
454 rinv = gmx_invsqrt(r2);
464 Geps = eps*vftab[ntab+2];
465 Heps2 = eps2*vftab[ntab+3];
468 FFe = Fp+Geps+2.0*Heps2;
472 Geps = eps*vftab[ntab+6];
473 Heps2 = eps2*vftab[ntab+7];
476 FFd = Fp+Geps+2.0*Heps2;
480 Geps = eps*vftab[ntab+10];
481 Heps2 = eps2*vftab[ntab+11];
484 FFr = Fp+Geps+2.0*Heps2;
487 *vvdw = c6*VVd+c12*VVr;
489 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
496 do_nonbonded_listed(int ftype, int nbonds,
497 const t_iatom iatoms[], const t_iparams iparams[],
498 const rvec x[], rvec f[], rvec fshift[],
499 const t_pbc *pbc, const t_graph *g,
500 real *lambda, real *dvdl,
502 const t_forcerec *fr, gmx_grppairener_t *grppener,
503 int *global_atom_index)
509 int i, j, itype, ai, aj, gid;
512 real fscal, velec, vvdw;
513 real * energygrp_elec;
514 real * energygrp_vdw;
515 static gmx_bool warned_rlimit = FALSE;
516 /* Free energy stuff */
517 gmx_bool bFreeEnergy;
518 real LFC[2], LFV[2], DLF[2], lfac_coul[2], lfac_vdw[2], dlfac_coul[2], dlfac_vdw[2];
519 real qqB, c6B, c12B, sigma2_def, sigma2_min;
526 energygrp_elec = grppener->ener[egCOUL14];
527 energygrp_vdw = grppener->ener[egLJ14];
530 energygrp_elec = grppener->ener[egCOULSR];
531 energygrp_vdw = grppener->ener[egLJSR];
534 energygrp_elec = NULL; /* Keep compiler happy */
535 energygrp_vdw = NULL; /* Keep compiler happy */
536 gmx_fatal(FARGS, "Unknown function type %d in do_nonbonded14", ftype);
540 if (fr->efep != efepNO)
542 /* Lambda factor for state A=1-lambda and B=lambda */
543 LFC[0] = 1.0 - lambda[efptCOUL];
544 LFV[0] = 1.0 - lambda[efptVDW];
545 LFC[1] = lambda[efptCOUL];
546 LFV[1] = lambda[efptVDW];
548 /*derivative of the lambda factor for state A and B */
553 sigma2_def = pow(fr->sc_sigma6_def, 1.0/3.0);
554 sigma2_min = pow(fr->sc_sigma6_min, 1.0/3.0);
556 for (i = 0; i < 2; i++)
558 lfac_coul[i] = (fr->sc_power == 2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
559 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFC[i]) : 1);
560 lfac_vdw[i] = (fr->sc_power == 2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
561 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power == 2 ? (1-LFV[i]) : 1);
566 sigma2_min = sigma2_def = 0;
570 for (i = 0; (i < nbonds); )
575 gid = GID(md->cENER[ai], md->cENER[aj], md->nenergrp);
582 (fr->efep != efepNO &&
583 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
584 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
585 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
586 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
587 c6 = iparams[itype].lj14.c6A;
588 c12 = iparams[itype].lj14.c12A;
591 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
592 c6 = iparams[itype].ljc14.c6;
593 c12 = iparams[itype].ljc14.c12;
596 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
597 c6 = iparams[itype].ljcnb.c6;
598 c12 = iparams[itype].ljcnb.c12;
601 /* Cannot happen since we called gmx_fatal() above in this case */
602 qq = c6 = c12 = 0; /* Keep compiler happy */
606 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
607 * included in the general nfbp array now. This means the tables are scaled down by the
608 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
614 /* Do we need to apply full periodic boundary conditions? */
615 if (fr->bMolPBC == TRUE)
617 fshift_index = pbc_dx_aiuc(pbc, x[ai], x[aj], dx);
621 fshift_index = CENTRAL;
622 rvec_sub(x[ai], x[aj], dx);
626 if (r2 >= fr->tab14.r*fr->tab14.r)
628 if (warned_rlimit == FALSE)
630 nb_listed_warning_rlimit(x, ai, aj, global_atom_index, sqrt(r2), fr->tab14.r);
631 warned_rlimit = TRUE;
638 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
639 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
640 c6B = iparams[itype].lj14.c6B*6.0;
641 c12B = iparams[itype].lj14.c12B*12.0;
643 fscal = nb_free_energy_evaluate_single(r2, fr->sc_r_power, fr->sc_alphacoul, fr->sc_alphavdw,
644 fr->tab14.scale, fr->tab14.data, qq, c6, c12, qqB, c6B, c12B,
645 LFC, LFV, DLF, lfac_coul, lfac_vdw, dlfac_coul, dlfac_vdw,
646 fr->sc_sigma6_def, fr->sc_sigma6_min, sigma2_def, sigma2_min, &velec, &vvdw, dvdl);
650 /* Evaluate tabulated interaction without free energy */
651 fscal = nb_evaluate_single(r2, fr->tab14.scale, fr->tab14.data, qq, c6, c12, &velec, &vvdw);
654 energygrp_elec[gid] += velec;
655 energygrp_vdw[gid] += vvdw;
656 svmul(fscal, dx, dx);
664 /* Correct the shift forces using the graph */
665 ivec_sub(SHIFT_IVEC(g, ai), SHIFT_IVEC(g, aj), dt);
666 fshift_index = IVEC2IS(dt);
668 if (fshift_index != CENTRAL)
670 rvec_inc(fshift[fshift_index], dx);
671 rvec_dec(fshift[CENTRAL], dx);