1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
41 #include <thread_mpi.h>
58 #include "gmx_fatal.h"
64 #include "nonbonded.h"
66 #include "nb_kernel.h"
67 #include "nb_free_energy.h"
68 #include "nb_generic.h"
69 #include "nb_generic_cg.h"
70 #include "nb_generic_adress.h"
72 /* Different default (c) and accelerated interaction-specific kernels */
73 #include "nb_kernel_c/nb_kernel_c.h"
75 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
76 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
78 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
79 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
81 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
82 # include "nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
84 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
85 # include "nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
87 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
88 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
90 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
91 # include "nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
95 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
97 static gmx_bool nonbonded_setup_done = FALSE;
101 gmx_nonbonded_setup(FILE * fplog,
103 gmx_bool bGenericKernelOnly)
105 #ifdef GMX_THREAD_MPI
106 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
108 /* Here we are guaranteed only one thread made it. */
109 if(nonbonded_setup_done==FALSE)
111 if(bGenericKernelOnly==FALSE)
113 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
114 nb_kernel_list_add_kernels(kernellist_c,kernellist_c_size);
116 if(!(fr!=NULL && fr->use_cpu_acceleration==FALSE))
118 /* Add interaction-specific kernels for different architectures */
119 /* Single precision */
120 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
121 nb_kernel_list_add_kernels(kernellist_sse2_single,kernellist_sse2_single_size);
123 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
124 nb_kernel_list_add_kernels(kernellist_sse4_1_single,kernellist_sse4_1_single_size);
126 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
127 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single,kernellist_avx_128_fma_single_size);
129 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
130 nb_kernel_list_add_kernels(kernellist_avx_256_single,kernellist_avx_256_single_size);
132 /* Double precision */
133 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
134 nb_kernel_list_add_kernels(kernellist_sse2_double,kernellist_sse2_double_size);
136 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
137 nb_kernel_list_add_kernels(kernellist_sse4_1_double,kernellist_sse4_1_double_size);
139 ; /* empty statement to avoid a completely empty block */
142 /* Create a hash for faster lookups */
143 nb_kernel_list_hash_init();
145 nonbonded_setup_done=TRUE;
147 #ifdef GMX_THREAD_MPI
148 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
155 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
158 const char * elec_mod;
160 const char * vdw_mod;
168 int simd_padding_width;
172 /* Single precision */
173 #if (defined GMX_CPU_ACCELERATION_X86_AVX_256) && !(defined GMX_DOUBLE)
174 { "avx_256_single", 8 },
176 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) && !(defined GMX_DOUBLE)
177 { "avx_128_fma_single", 4 },
179 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
180 { "sse4_1_single", 4 },
182 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
183 { "sse2_single", 4 },
185 /* Double precision */
186 #if (defined GMX_CPU_ACCELERATION_X86_SSE2 && defined GMX_DOUBLE)
187 /* Sic. Double precision SSE2 does not require neighbor list padding,
188 * since the kernels execute a loop unrolled a factor 2, followed by
189 * a possible single odd-element epilogue.
191 { "sse2_double", 1 },
193 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1 && defined GMX_DOUBLE)
194 /* No padding - see SSE2 double comment */
195 { "sse4_1_double", 1 },
199 int narch = asize(arch_and_padding);
202 if(nonbonded_setup_done==FALSE)
204 /* We typically call this setup routine before starting timers,
205 * but if that has not been done for whatever reason we do it now.
207 gmx_nonbonded_setup(NULL,NULL,FALSE);
213 nl->kernelptr_vf = NULL;
214 nl->kernelptr_v = NULL;
215 nl->kernelptr_f = NULL;
217 elec = gmx_nbkernel_elec_names[nl->ielec];
218 elec_mod = eintmod_names[nl->ielecmod];
219 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
220 vdw_mod = eintmod_names[nl->ivdwmod];
221 geom = gmx_nblist_geometry_names[nl->igeometry];
225 nl->kernelptr_vf = gmx_nb_free_energy_kernel;
226 nl->kernelptr_f = gmx_nb_free_energy_kernel;
227 nl->simd_padding_width = 1;
229 else if(!gmx_strcasecmp_min(geom,"CG-CG"))
231 nl->kernelptr_vf = gmx_nb_generic_cg_kernel;
232 nl->kernelptr_f = gmx_nb_generic_cg_kernel;
233 nl->simd_padding_width = 1;
237 /* Try to find a specific kernel first */
239 for(i=0;i<narch && nl->kernelptr_vf==NULL ;i++)
241 nl->kernelptr_vf = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
242 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
244 for(i=0;i<narch && nl->kernelptr_f==NULL ;i++)
246 nl->kernelptr_f = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"Force");
247 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
249 /* If there is not force-only optimized kernel, is there a potential & force one? */
250 if(nl->kernelptr_f == NULL)
252 nl->kernelptr_f = nb_kernel_list_findkernel(NULL,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
253 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
257 /* Give up, pick a generic one instead */
258 if(nl->kernelptr_vf==NULL)
260 nl->kernelptr_vf = gmx_nb_generic_kernel;
261 nl->kernelptr_f = gmx_nb_generic_kernel;
262 nl->simd_padding_width = 1;
266 "WARNING - Slow generic NB kernel used for neighborlist with\n"
267 " Elec: '%s', Modifier: '%s'\n"
268 " Vdw: '%s', Modifier: '%s'\n"
269 " Geom: '%s', Other: '%s'\n\n",
270 elec,elec_mod,vdw,vdw_mod,geom,other);
278 void do_nonbonded(t_commrec *cr,t_forcerec *fr,
279 rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *mdatoms,t_blocka *excl,
280 gmx_grppairener_t *grppener,rvec box_size,
281 t_nrnb *nrnb,real *lambda, real *dvdl,
282 int nls,int eNL,int flags)
285 int n,n0,n1,i,i0,i1,sz,range;
287 nb_kernel_data_t kernel_data;
288 nb_kernel_t * kernelptr=NULL;
291 kernel_data.flags = flags;
292 kernel_data.exclusions = excl;
293 kernel_data.lambda = lambda;
294 kernel_data.dvdl = dvdl;
323 for(n=n0; (n<n1); n++)
325 nblists = &fr->nblists[n];
327 kernel_data.table_elec = &nblists->table_elec;
328 kernel_data.table_vdw = &nblists->table_vdw;
329 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
331 for(range=0;range<2;range++)
333 /* Are we doing short/long-range? */
337 if(!(flags & GMX_NONBONDED_DO_SR))
341 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
342 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
343 kernel_data.energygrp_polarization = grppener->ener[egGB];
344 nlist = nblists->nlist_sr;
350 if(!(flags & GMX_NONBONDED_DO_LR))
354 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
355 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
356 kernel_data.energygrp_polarization = grppener->ener[egGB];
357 nlist = nblists->nlist_lr;
361 for(i=i0; (i<i1); i++)
363 if (nlist[i].nri > 0)
365 if(flags & GMX_NONBONDED_DO_POTENTIAL)
367 /* Potential and force */
368 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
372 /* Force only, no potential */
373 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
376 if(nlist[i].free_energy==0 && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
378 /* We don't need the non-perturbed interactions */
381 (*kernelptr)(&(nlist[i]),x,f,fr,mdatoms,&kernel_data,nrnb);
389 nb_listed_warning_rlimit(const rvec *x,int ai, int aj,int * global_atom_index,real r, real rlimit)
391 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
392 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
393 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
394 "a smaller molecule you are decoupling during a free energy calculation.\n"
395 "Since interactions at distances beyond the table cannot be computed,\n"
396 "they are skipped until they are inside the table limit again. You will\n"
397 "only see this message once, even if it occurs for several interactions.\n\n"
398 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
399 "probably something wrong with your system. Only change the table-extension\n"
400 "distance in the mdp file if you are really sure that is the reason.\n",
401 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r,rlimit);
406 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
407 x[ai][XX],x[ai][YY],x[ai][ZZ],x[aj][XX],x[aj][YY],x[aj][ZZ],
408 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r);
414 /* This might logically belong better in the nb_generic.c module, but it is only
415 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
416 * extra functional call for every single pair listed in the topology.
419 nb_evaluate_single(real r2, real tabscale,real *vftab,
420 real qq, real c6, real c12, real *velec, real *vvdw)
422 real rinv,r,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VVe,FFe,VVd,FFd,VVr,FFr,fscal;
425 /* Do the tabulated interactions - first table lookup */
426 rinv = gmx_invsqrt(r2);
436 Geps = eps*vftab[ntab+2];
437 Heps2 = eps2*vftab[ntab+3];
440 FFe = Fp+Geps+2.0*Heps2;
444 Geps = eps*vftab[ntab+6];
445 Heps2 = eps2*vftab[ntab+7];
448 FFd = Fp+Geps+2.0*Heps2;
452 Geps = eps*vftab[ntab+10];
453 Heps2 = eps2*vftab[ntab+11];
456 FFr = Fp+Geps+2.0*Heps2;
459 *vvdw = c6*VVd+c12*VVr;
461 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
468 do_nonbonded_listed(int ftype,int nbonds,
469 const t_iatom iatoms[],const t_iparams iparams[],
470 const rvec x[],rvec f[],rvec fshift[],
471 const t_pbc *pbc,const t_graph *g,
472 real *lambda, real *dvdl,
474 const t_forcerec *fr,gmx_grppairener_t *grppener,
475 int *global_atom_index)
481 int i,j,itype,ai,aj,gid;
484 real fscal,velec,vvdw;
485 real * energygrp_elec;
486 real * energygrp_vdw;
487 static gmx_bool warned_rlimit=FALSE;
488 /* Free energy stuff */
489 gmx_bool bFreeEnergy;
490 real LFC[2],LFV[2],DLF[2],lfac_coul[2],lfac_vdw[2],dlfac_coul[2],dlfac_vdw[2];
491 real qqB,c6B,c12B,sigma2_def,sigma2_min;
497 energygrp_elec = grppener->ener[egCOUL14];
498 energygrp_vdw = grppener->ener[egLJ14];
501 energygrp_elec = grppener->ener[egCOULSR];
502 energygrp_vdw = grppener->ener[egLJSR];
505 energygrp_elec = NULL; /* Keep compiler happy */
506 energygrp_vdw = NULL; /* Keep compiler happy */
507 gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",ftype);
511 if(fr->efep != efepNO)
513 /* Lambda factor for state A=1-lambda and B=lambda */
514 LFC[0] = 1.0 - lambda[efptCOUL];
515 LFV[0] = 1.0 - lambda[efptVDW];
516 LFC[1] = lambda[efptCOUL];
517 LFV[1] = lambda[efptVDW];
519 /*derivative of the lambda factor for state A and B */
524 sigma2_def = pow(fr->sc_sigma6_def,1.0/3.0);
525 sigma2_min = pow(fr->sc_sigma6_min,1.0/3.0);
529 lfac_coul[i] = (fr->sc_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
530 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFC[i]) : 1);
531 lfac_vdw[i] = (fr->sc_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
532 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFV[i]) : 1);
537 sigma2_min = sigma2_def = 0;
541 for(i=0; (i<nbonds); )
546 gid = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
552 (fr->efep != efepNO &&
553 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
554 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
555 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
556 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
557 c6 = iparams[itype].lj14.c6A;
558 c12 = iparams[itype].lj14.c12A;
561 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
562 c6 = iparams[itype].ljc14.c6;
563 c12 = iparams[itype].ljc14.c12;
566 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
567 c6 = iparams[itype].ljcnb.c6;
568 c12 = iparams[itype].ljcnb.c12;
571 /* Cannot happen since we called gmx_fatal() above in this case */
572 qq = c6 = c12 = 0; /* Keep compiler happy */
576 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
577 * included in the general nfbp array now. This means the tables are scaled down by the
578 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
584 /* Do we need to apply full periodic boundary conditions? */
585 if(fr->bMolPBC==TRUE)
587 fshift_index = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
591 fshift_index = CENTRAL;
592 rvec_sub(x[ai],x[aj],dx);
596 if(r2>=fr->tab14.r*fr->tab14.r)
598 if(warned_rlimit==FALSE)
600 nb_listed_warning_rlimit(x,ai,aj,global_atom_index,sqrt(r2),fr->tab14.r);
608 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
609 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
610 c6B = iparams[itype].lj14.c6B*6.0;
611 c12B = iparams[itype].lj14.c12B*12.0;
613 fscal = nb_free_energy_evaluate_single(r2,fr->sc_r_power,fr->sc_alphacoul,fr->sc_alphavdw,
614 fr->tab14.scale,fr->tab14.data,qq,c6,c12,qqB,c6B,c12B,
615 LFC,LFV,DLF,lfac_coul,lfac_vdw,dlfac_coul,dlfac_vdw,
616 fr->sc_sigma6_def,fr->sc_sigma6_min,sigma2_def,sigma2_min,&velec,&vvdw,dvdl);
620 /* Evaluate tabulated interaction without free energy */
621 fscal = nb_evaluate_single(r2,fr->tab14.scale,fr->tab14.data,qq,c6,c12,&velec,&vvdw);
624 energygrp_elec[gid] += velec;
625 energygrp_vdw[gid] += vvdw;
634 /* Correct the shift forces using the graph */
635 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
636 fshift_index = IVEC2IS(dt);
638 if(fshift_index!=CENTRAL)
640 rvec_inc(fshift[fshift_index],dx);
641 rvec_dec(fshift[CENTRAL],dx);