2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
43 #include <thread_mpi.h>
60 #include "gmx_fatal.h"
66 #include "nonbonded.h"
68 #include "nb_kernel.h"
69 #include "nb_free_energy.h"
70 #include "nb_generic.h"
71 #include "nb_generic_cg.h"
72 #include "nb_generic_adress.h"
74 /* Different default (c) and accelerated interaction-specific kernels */
75 #include "nb_kernel_c/nb_kernel_c.h"
77 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
78 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
80 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
81 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
83 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
84 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
86 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
87 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
89 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
90 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
92 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
93 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
95 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
96 # include "nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
98 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
99 # include "nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
103 #ifdef GMX_THREAD_MPI
104 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
106 static gmx_bool nonbonded_setup_done = FALSE;
110 gmx_nonbonded_setup(FILE * fplog,
112 gmx_bool bGenericKernelOnly)
114 #ifdef GMX_THREAD_MPI
115 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
117 /* Here we are guaranteed only one thread made it. */
118 if(nonbonded_setup_done==FALSE)
120 if(bGenericKernelOnly==FALSE)
122 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
123 nb_kernel_list_add_kernels(kernellist_c,kernellist_c_size);
125 if(!(fr!=NULL && fr->use_cpu_acceleration==FALSE))
127 /* Add interaction-specific kernels for different architectures */
128 /* Single precision */
129 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
130 nb_kernel_list_add_kernels(kernellist_sse2_single,kernellist_sse2_single_size);
132 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
133 nb_kernel_list_add_kernels(kernellist_sse4_1_single,kernellist_sse4_1_single_size);
135 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
136 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single,kernellist_avx_128_fma_single_size);
138 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
139 nb_kernel_list_add_kernels(kernellist_avx_256_single,kernellist_avx_256_single_size);
141 /* Double precision */
142 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
143 nb_kernel_list_add_kernels(kernellist_sse2_double,kernellist_sse2_double_size);
145 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
146 nb_kernel_list_add_kernels(kernellist_sse4_1_double,kernellist_sse4_1_double_size);
148 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
149 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double,kernellist_avx_128_fma_double_size);
151 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
152 nb_kernel_list_add_kernels(kernellist_avx_256_double,kernellist_avx_256_double_size);
154 ; /* empty statement to avoid a completely empty block */
157 /* Create a hash for faster lookups */
158 nb_kernel_list_hash_init();
160 nonbonded_setup_done=TRUE;
162 #ifdef GMX_THREAD_MPI
163 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
170 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
173 const char * elec_mod;
175 const char * vdw_mod;
183 int simd_padding_width;
187 /* Single precision */
188 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
189 { "avx_256_single", 8 },
191 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
192 { "avx_128_fma_single", 4 },
194 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
195 { "sse4_1_single", 4 },
197 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
198 { "sse2_single", 4 },
200 /* Double precision */
201 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256 && defined GMX_DOUBLE)
202 { "avx_256_double", 4 },
204 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA && defined GMX_DOUBLE)
205 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
206 * since the kernels execute a loop unrolled a factor 2, followed by
207 * a possible single odd-element epilogue.
209 { "avx_128_fma_double", 1 },
211 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
212 /* No padding - see comment above */
213 { "sse2_double", 1 },
215 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
216 /* No padding - see comment above */
217 { "sse4_1_double", 1 },
221 int narch = asize(arch_and_padding);
224 if(nonbonded_setup_done==FALSE)
226 /* We typically call this setup routine before starting timers,
227 * but if that has not been done for whatever reason we do it now.
229 gmx_nonbonded_setup(NULL,NULL,FALSE);
235 nl->kernelptr_vf = NULL;
236 nl->kernelptr_v = NULL;
237 nl->kernelptr_f = NULL;
239 elec = gmx_nbkernel_elec_names[nl->ielec];
240 elec_mod = eintmod_names[nl->ielecmod];
241 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
242 vdw_mod = eintmod_names[nl->ivdwmod];
243 geom = gmx_nblist_geometry_names[nl->igeometry];
247 nl->kernelptr_vf = gmx_nb_free_energy_kernel;
248 nl->kernelptr_f = gmx_nb_free_energy_kernel;
249 nl->simd_padding_width = 1;
251 else if(!gmx_strcasecmp_min(geom,"CG-CG"))
253 nl->kernelptr_vf = gmx_nb_generic_cg_kernel;
254 nl->kernelptr_f = gmx_nb_generic_cg_kernel;
255 nl->simd_padding_width = 1;
259 /* Try to find a specific kernel first */
261 for(i=0;i<narch && nl->kernelptr_vf==NULL ;i++)
263 nl->kernelptr_vf = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
264 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
266 for(i=0;i<narch && nl->kernelptr_f==NULL ;i++)
268 nl->kernelptr_f = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"Force");
269 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
271 /* If there is not force-only optimized kernel, is there a potential & force one? */
272 if(nl->kernelptr_f == NULL)
274 nl->kernelptr_f = nb_kernel_list_findkernel(NULL,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
275 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
279 /* Give up, pick a generic one instead */
280 if(nl->kernelptr_vf==NULL)
282 nl->kernelptr_vf = gmx_nb_generic_kernel;
283 nl->kernelptr_f = gmx_nb_generic_kernel;
284 nl->simd_padding_width = 1;
288 "WARNING - Slow generic NB kernel used for neighborlist with\n"
289 " Elec: '%s', Modifier: '%s'\n"
290 " Vdw: '%s', Modifier: '%s'\n"
291 " Geom: '%s', Other: '%s'\n\n",
292 elec,elec_mod,vdw,vdw_mod,geom,other);
300 void do_nonbonded(t_commrec *cr,t_forcerec *fr,
301 rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *mdatoms,t_blocka *excl,
302 gmx_grppairener_t *grppener,rvec box_size,
303 t_nrnb *nrnb,real *lambda, real *dvdl,
304 int nls,int eNL,int flags)
307 int n,n0,n1,i,i0,i1,sz,range;
309 nb_kernel_data_t kernel_data;
310 nb_kernel_t * kernelptr=NULL;
313 kernel_data.flags = flags;
314 kernel_data.exclusions = excl;
315 kernel_data.lambda = lambda;
316 kernel_data.dvdl = dvdl;
345 for(n=n0; (n<n1); n++)
347 nblists = &fr->nblists[n];
349 kernel_data.table_elec = &nblists->table_elec;
350 kernel_data.table_vdw = &nblists->table_vdw;
351 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
353 for(range=0;range<2;range++)
355 /* Are we doing short/long-range? */
359 if(!(flags & GMX_NONBONDED_DO_SR))
363 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
364 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
365 kernel_data.energygrp_polarization = grppener->ener[egGB];
366 nlist = nblists->nlist_sr;
372 if(!(flags & GMX_NONBONDED_DO_LR))
376 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
377 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
378 kernel_data.energygrp_polarization = grppener->ener[egGB];
379 nlist = nblists->nlist_lr;
383 for(i=i0; (i<i1); i++)
385 if (nlist[i].nri > 0)
387 if(flags & GMX_NONBONDED_DO_POTENTIAL)
389 /* Potential and force */
390 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
394 /* Force only, no potential */
395 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
398 if(nlist[i].free_energy==0 && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
400 /* We don't need the non-perturbed interactions */
403 (*kernelptr)(&(nlist[i]),x,f,fr,mdatoms,&kernel_data,nrnb);
411 nb_listed_warning_rlimit(const rvec *x,int ai, int aj,int * global_atom_index,real r, real rlimit)
413 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
414 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
415 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
416 "a smaller molecule you are decoupling during a free energy calculation.\n"
417 "Since interactions at distances beyond the table cannot be computed,\n"
418 "they are skipped until they are inside the table limit again. You will\n"
419 "only see this message once, even if it occurs for several interactions.\n\n"
420 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
421 "probably something wrong with your system. Only change the table-extension\n"
422 "distance in the mdp file if you are really sure that is the reason.\n",
423 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r,rlimit);
428 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
429 x[ai][XX],x[ai][YY],x[ai][ZZ],x[aj][XX],x[aj][YY],x[aj][ZZ],
430 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r);
436 /* This might logically belong better in the nb_generic.c module, but it is only
437 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
438 * extra functional call for every single pair listed in the topology.
441 nb_evaluate_single(real r2, real tabscale,real *vftab,
442 real qq, real c6, real c12, real *velec, real *vvdw)
444 real rinv,r,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VVe,FFe,VVd,FFd,VVr,FFr,fscal;
447 /* Do the tabulated interactions - first table lookup */
448 rinv = gmx_invsqrt(r2);
458 Geps = eps*vftab[ntab+2];
459 Heps2 = eps2*vftab[ntab+3];
462 FFe = Fp+Geps+2.0*Heps2;
466 Geps = eps*vftab[ntab+6];
467 Heps2 = eps2*vftab[ntab+7];
470 FFd = Fp+Geps+2.0*Heps2;
474 Geps = eps*vftab[ntab+10];
475 Heps2 = eps2*vftab[ntab+11];
478 FFr = Fp+Geps+2.0*Heps2;
481 *vvdw = c6*VVd+c12*VVr;
483 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
490 do_nonbonded_listed(int ftype,int nbonds,
491 const t_iatom iatoms[],const t_iparams iparams[],
492 const rvec x[],rvec f[],rvec fshift[],
493 const t_pbc *pbc,const t_graph *g,
494 real *lambda, real *dvdl,
496 const t_forcerec *fr,gmx_grppairener_t *grppener,
497 int *global_atom_index)
503 int i,j,itype,ai,aj,gid;
506 real fscal,velec,vvdw;
507 real * energygrp_elec;
508 real * energygrp_vdw;
509 static gmx_bool warned_rlimit=FALSE;
510 /* Free energy stuff */
511 gmx_bool bFreeEnergy;
512 real LFC[2],LFV[2],DLF[2],lfac_coul[2],lfac_vdw[2],dlfac_coul[2],dlfac_vdw[2];
513 real qqB,c6B,c12B,sigma2_def,sigma2_min;
519 energygrp_elec = grppener->ener[egCOUL14];
520 energygrp_vdw = grppener->ener[egLJ14];
523 energygrp_elec = grppener->ener[egCOULSR];
524 energygrp_vdw = grppener->ener[egLJSR];
527 energygrp_elec = NULL; /* Keep compiler happy */
528 energygrp_vdw = NULL; /* Keep compiler happy */
529 gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",ftype);
533 if(fr->efep != efepNO)
535 /* Lambda factor for state A=1-lambda and B=lambda */
536 LFC[0] = 1.0 - lambda[efptCOUL];
537 LFV[0] = 1.0 - lambda[efptVDW];
538 LFC[1] = lambda[efptCOUL];
539 LFV[1] = lambda[efptVDW];
541 /*derivative of the lambda factor for state A and B */
546 sigma2_def = pow(fr->sc_sigma6_def,1.0/3.0);
547 sigma2_min = pow(fr->sc_sigma6_min,1.0/3.0);
551 lfac_coul[i] = (fr->sc_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
552 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFC[i]) : 1);
553 lfac_vdw[i] = (fr->sc_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
554 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFV[i]) : 1);
559 sigma2_min = sigma2_def = 0;
563 for(i=0; (i<nbonds); )
568 gid = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
574 (fr->efep != efepNO &&
575 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
576 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
577 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
578 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
579 c6 = iparams[itype].lj14.c6A;
580 c12 = iparams[itype].lj14.c12A;
583 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
584 c6 = iparams[itype].ljc14.c6;
585 c12 = iparams[itype].ljc14.c12;
588 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
589 c6 = iparams[itype].ljcnb.c6;
590 c12 = iparams[itype].ljcnb.c12;
593 /* Cannot happen since we called gmx_fatal() above in this case */
594 qq = c6 = c12 = 0; /* Keep compiler happy */
598 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
599 * included in the general nfbp array now. This means the tables are scaled down by the
600 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
606 /* Do we need to apply full periodic boundary conditions? */
607 if(fr->bMolPBC==TRUE)
609 fshift_index = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
613 fshift_index = CENTRAL;
614 rvec_sub(x[ai],x[aj],dx);
618 if(r2>=fr->tab14.r*fr->tab14.r)
620 if(warned_rlimit==FALSE)
622 nb_listed_warning_rlimit(x,ai,aj,global_atom_index,sqrt(r2),fr->tab14.r);
630 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
631 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
632 c6B = iparams[itype].lj14.c6B*6.0;
633 c12B = iparams[itype].lj14.c12B*12.0;
635 fscal = nb_free_energy_evaluate_single(r2,fr->sc_r_power,fr->sc_alphacoul,fr->sc_alphavdw,
636 fr->tab14.scale,fr->tab14.data,qq,c6,c12,qqB,c6B,c12B,
637 LFC,LFV,DLF,lfac_coul,lfac_vdw,dlfac_coul,dlfac_vdw,
638 fr->sc_sigma6_def,fr->sc_sigma6_min,sigma2_def,sigma2_min,&velec,&vvdw,dvdl);
642 /* Evaluate tabulated interaction without free energy */
643 fscal = nb_evaluate_single(r2,fr->tab14.scale,fr->tab14.data,qq,c6,c12,&velec,&vvdw);
646 energygrp_elec[gid] += velec;
647 energygrp_vdw[gid] += vvdw;
656 /* Correct the shift forces using the graph */
657 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
658 fshift_index = IVEC2IS(dt);
660 if(fshift_index!=CENTRAL)
662 rvec_inc(fshift[fshift_index],dx);
663 rvec_dec(fshift[CENTRAL],dx);