1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
40 #ifdef GMX_THREAD_SHM_FDECOMP
41 #include <thread_mpi.h>
58 #include "gmx_fatal.h"
64 #include "nonbonded.h"
66 #include "nb_kernel_c/nb_kernel_c.h"
67 #include "nb_free_energy.h"
68 #include "nb_generic.h"
69 #include "nb_generic_cg.h"
72 /* 1,4 interactions uses kernel 330 directly */
73 #include "nb_kernel_c/nb_kernel330.h"
75 #ifdef GMX_PPC_ALTIVEC
76 #include "nb_kernel_ppc_altivec/nb_kernel_ppc_altivec.h"
79 #if defined(GMX_IA32_SSE)
80 #include "nb_kernel_ia32_sse/nb_kernel_ia32_sse.h"
83 #if defined(GMX_IA32_SSE2)
84 #include "nb_kernel_ia32_sse2/nb_kernel_ia32_sse2.h"
87 #if defined(GMX_X86_64_SSE)
88 #include "nb_kernel_x86_64_sse/nb_kernel_x86_64_sse.h"
91 #if defined(GMX_X86_64_SSE2)
92 #include "nb_kernel_x86_64_sse2/nb_kernel_x86_64_sse2.h"
97 # include "nb_kernel_sse2_double/nb_kernel_sse2_double.h"
99 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
103 #if defined(GMX_FORTRAN)
105 # include "nb_kernel_f77_double/nb_kernel_f77_double.h"
107 # include "nb_kernel_f77_single/nb_kernel_f77_single.h"
111 #if (defined GMX_IA64_ASM && defined GMX_DOUBLE)
112 #include "nb_kernel_ia64_double/nb_kernel_ia64_double.h"
115 #if (defined GMX_IA64_ASM && !defined GMX_DOUBLE)
116 #include "nb_kernel_ia64_single/nb_kernel_ia64_single.h"
120 #include "nb_kernel_power6/nb_kernel_power6.h"
124 #include "nb_kernel_bluegene/nb_kernel_bluegene.h"
129 enum { TABLE_NONE, TABLE_COMBINED, TABLE_COUL, TABLE_VDW, TABLE_NR };
132 /* Table version for each kernel.
135 nb_kernel_table[eNR_NBKERNEL_NR] =
137 TABLE_NONE, /* kernel010 */
138 TABLE_NONE, /* kernel020 */
139 TABLE_VDW, /* kernel030 */
140 TABLE_NONE, /* kernel100 */
141 TABLE_NONE, /* kernel101 */
142 TABLE_NONE, /* kernel102 */
143 TABLE_NONE, /* kernel103 */
144 TABLE_NONE, /* kernel104 */
145 TABLE_NONE, /* kernel110 */
146 TABLE_NONE, /* kernel111 */
147 TABLE_NONE, /* kernel112 */
148 TABLE_NONE, /* kernel113 */
149 TABLE_NONE, /* kernel114 */
150 TABLE_NONE, /* kernel120 */
151 TABLE_NONE, /* kernel121 */
152 TABLE_NONE, /* kernel122 */
153 TABLE_NONE, /* kernel123 */
154 TABLE_NONE, /* kernel124 */
155 TABLE_VDW, /* kernel130 */
156 TABLE_VDW, /* kernel131 */
157 TABLE_VDW, /* kernel132 */
158 TABLE_VDW, /* kernel133 */
159 TABLE_VDW, /* kernel134 */
160 TABLE_NONE, /* kernel200 */
161 TABLE_NONE, /* kernel201 */
162 TABLE_NONE, /* kernel202 */
163 TABLE_NONE, /* kernel203 */
164 TABLE_NONE, /* kernel204 */
165 TABLE_NONE, /* kernel210 */
166 TABLE_NONE, /* kernel211 */
167 TABLE_NONE, /* kernel212 */
168 TABLE_NONE, /* kernel213 */
169 TABLE_NONE, /* kernel214 */
170 TABLE_NONE, /* kernel220 */
171 TABLE_NONE, /* kernel221 */
172 TABLE_NONE, /* kernel222 */
173 TABLE_NONE, /* kernel223 */
174 TABLE_NONE, /* kernel224 */
175 TABLE_VDW, /* kernel230 */
176 TABLE_VDW, /* kernel231 */
177 TABLE_VDW, /* kernel232 */
178 TABLE_VDW, /* kernel233 */
179 TABLE_VDW, /* kernel234 */
180 TABLE_COUL, /* kernel300 */
181 TABLE_COUL, /* kernel301 */
182 TABLE_COUL, /* kernel302 */
183 TABLE_COUL, /* kernel303 */
184 TABLE_COUL, /* kernel304 */
185 TABLE_COUL, /* kernel310 */
186 TABLE_COUL, /* kernel311 */
187 TABLE_COUL, /* kernel312 */
188 TABLE_COUL, /* kernel313 */
189 TABLE_COUL, /* kernel314 */
190 TABLE_COUL, /* kernel320 */
191 TABLE_COUL, /* kernel321 */
192 TABLE_COUL, /* kernel322 */
193 TABLE_COUL, /* kernel323 */
194 TABLE_COUL, /* kernel324 */
195 TABLE_COMBINED, /* kernel330 */
196 TABLE_COMBINED, /* kernel331 */
197 TABLE_COMBINED, /* kernel332 */
198 TABLE_COMBINED, /* kernel333 */
199 TABLE_COMBINED, /* kernel334 */
200 TABLE_NONE, /* kernel400 */
201 TABLE_NONE, /* kernel410 */
202 TABLE_VDW /* kernel430 */
207 static nb_kernel_t **
208 nb_kernel_list = NULL;
212 gmx_setup_kernels(FILE *fplog,gmx_bool bGenericKernelOnly)
216 snew(nb_kernel_list,eNR_NBKERNEL_NR);
218 /* Note that later calls overwrite earlier, so the preferred (fastest)
219 * version should be at the end. For instance, we call SSE after 3DNow.
222 for(i=0; i<eNR_NBKERNEL_NR; i++)
224 nb_kernel_list[i] = NULL;
227 if (bGenericKernelOnly)
234 fprintf(fplog,"Configuring nonbonded kernels...\n");
237 nb_kernel_setup(fplog,nb_kernel_list);
239 if(getenv("GMX_NOOPTIMIZEDKERNELS") != NULL)
244 /* Setup kernels. The last called setup routine will overwrite earlier assignments,
245 * so we should e.g. test SSE3 support _after_ SSE2 support,
246 * and call e.g. Fortran setup before SSE.
249 #if defined(GMX_FORTRAN) && defined(GMX_DOUBLE)
250 nb_kernel_setup_f77_double(fplog,nb_kernel_list);
253 #if defined(GMX_FORTRAN) && !defined(GMX_DOUBLE)
254 nb_kernel_setup_f77_single(fplog,nb_kernel_list);
258 nb_kernel_setup_bluegene(fplog,nb_kernel_list);
262 nb_kernel_setup_power6(fplog,nb_kernel_list);
265 #ifdef GMX_PPC_ALTIVEC
266 nb_kernel_setup_ppc_altivec(fplog,nb_kernel_list);
269 #if defined(GMX_IA32_SSE)
270 nb_kernel_setup_ia32_sse(fplog,nb_kernel_list);
273 #if defined(GMX_IA32_SSE2)
274 nb_kernel_setup_ia32_sse2(fplog,nb_kernel_list);
277 #if defined(GMX_X86_64_SSE)
278 nb_kernel_setup_x86_64_sse(fplog,nb_kernel_list);
281 #if defined(GMX_X86_64_SSE2)
282 nb_kernel_setup_x86_64_sse2(fplog,nb_kernel_list);
285 #if (defined GMX_IA64_ASM && defined GMX_DOUBLE)
286 nb_kernel_setup_ia64_double(fplog,nb_kernel_list);
289 #if (defined GMX_IA64_ASM && !defined GMX_DOUBLE)
290 nb_kernel_setup_ia64_single(fplog,nb_kernel_list);
295 fprintf(fplog,"\n\n");
300 void do_nonbonded(t_commrec *cr,t_forcerec *fr,
301 rvec x[],rvec f[],t_mdatoms *mdatoms,t_blocka *excl,
302 real egnb[],real egcoul[],real egpol[],rvec box_size,
303 t_nrnb *nrnb,real lambda,real *dvdlambda,
304 int nls,int eNL,int flags)
306 gmx_bool bLR,bDoForces,bForeignLambda;
309 int n,n0,n1,i,i0,i1,nrnb_ind,sz;
312 nb_kernel_t * kernelptr;
317 int outeriter,inneriter;
318 real * tabledata = NULL;
321 bLR = (flags & GMX_DONB_LR);
322 bDoForces = (flags & GMX_DONB_FORCES);
323 bForeignLambda = (flags & GMX_DONB_FOREIGNLAMBDA);
325 gbdata.gb_epsilon_solvent = fr->gb_epsilon_solvent;
326 gbdata.epsilon_r = fr->epsilon_r;
333 #if (defined GMX_SSE2 || defined GMX_X86_64_SSE || defined GMX_X86_64_SSE2 || defined GMX_IA32_SSE || defined GMX_IA32_SSE2)
335 if(fr->UseOptimizedKernels)
337 nb_kernel_allvsallgb_sse2_double(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
338 &outeriter,&inneriter,&fr->AllvsAll_work);
342 nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
343 &outeriter,&inneriter,&fr->AllvsAll_work);
345 # else /* not double */
346 if(fr->UseOptimizedKernels)
348 nb_kernel_allvsallgb_sse2_single(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
349 &outeriter,&inneriter,&fr->AllvsAll_work);
353 nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
354 &outeriter,&inneriter,&fr->AllvsAll_work);
356 # endif /* double/single alt. */
357 #else /* no SSE support compiled in */
358 nb_kernel_allvsallgb(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,egpol,
359 &outeriter,&inneriter,&fr->AllvsAll_work);
361 inc_nrnb(nrnb,eNR_NBKERNEL_ALLVSALLGB,inneriter);
365 #if (defined GMX_SSE2 || defined GMX_X86_64_SSE || defined GMX_X86_64_SSE2 || defined GMX_IA32_SSE || defined GMX_IA32_SSE2)
367 if(fr->UseOptimizedKernels)
369 nb_kernel_allvsall_sse2_double(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
370 &outeriter,&inneriter,&fr->AllvsAll_work);
374 nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
375 &outeriter,&inneriter,&fr->AllvsAll_work);
378 # else /* not double */
379 if(fr->UseOptimizedKernels)
381 nb_kernel_allvsall_sse2_single(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
382 &outeriter,&inneriter,&fr->AllvsAll_work);
386 nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
387 &outeriter,&inneriter,&fr->AllvsAll_work);
390 # endif /* double/single check */
391 #else /* No SSE2 support compiled in */
392 nb_kernel_allvsall(fr,mdatoms,excl,x[0],f[0],egcoul,egnb,
393 &outeriter,&inneriter,&fr->AllvsAll_work);
396 inc_nrnb(nrnb,eNR_NBKERNEL_ALLVSALL,inneriter);
398 inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,outeriter);
424 if(nb_kernel_list == NULL)
426 gmx_fatal(FARGS,"gmx_setup_kernels has not been called");
429 fshift = fr->fshift[0];
431 for(n=n0; (n<n1); n++)
433 nblists = &fr->nblists[n];
434 for(i=i0; (i<i1); i++)
436 outeriter = inneriter = 0;
440 nlist = &(nblists->nlist_lr[i]);
444 nlist = &(nblists->nlist_sr[i]);
449 nrnb_ind = nlist->il_code;
451 if(nrnb_ind==eNR_NBKERNEL_FREE_ENERGY)
453 /* generic free energy, use combined table */
454 tabledata = nblists->tab.tab;
460 /* We don't need the non-perturbed interactions */
464 tabletype = nb_kernel_table[nrnb_ind];
466 /* normal kernels, not free energy */
469 nrnb_ind += eNR_NBKERNEL_NR/2;
472 if(tabletype == TABLE_COMBINED)
474 tabledata = nblists->tab.tab;
476 else if(tabletype == TABLE_COUL)
478 tabledata = nblists->coultab;
480 else if(tabletype == TABLE_VDW)
482 tabledata = nblists->vdwtab;
492 if(nlist->free_energy)
496 gmx_fatal(FARGS,"Cannot do free energy Buckingham interactions.");
499 gmx_nb_free_energy_kernel(nlist->icoul,
535 else if (nlist->enlist == enlistCG_CG)
537 /* Call the charge group based inner loop */
538 gmx_nb_generic_cg_kernel(nlist,
553 /* Not free energy */
555 kernelptr = nb_kernel_list[nrnb_ind];
557 if (kernelptr == NULL)
559 /* Call a generic nonbonded kernel */
561 /* If you want to hack/test your own interactions,
562 * do it in this routine and make sure it is called
563 * by setting the environment variable GMX_NB_GENERIC.
565 gmx_nb_generic_kernel(nlist,
580 /* Call nonbonded kernel from function pointer */
582 (*kernelptr)( &(nlist->nri),
601 &(nblists->tab.scale),
616 /* Update flop accounting */
618 /* Outer loop in kernel */
619 switch (nlist->enlist) {
620 case enlistATOM_ATOM: fac = 1; break;
621 case enlistSPC_ATOM: fac = 3; break;
622 case enlistSPC_SPC: fac = 9; break;
623 case enlistTIP4P_ATOM: fac = 4; break;
624 case enlistTIP4P_TIP4P: fac = 16; break;
625 case enlistCG_CG: fac = 1; break;
627 inc_nrnb(nrnb,eNR_NBKERNEL_OUTER,fac*outeriter);
629 /* inner loop in kernel */
630 inc_nrnb(nrnb,nrnb_ind,inneriter);
638 do_listed_vdw_q(int ftype,int nbonds,
639 const t_iatom iatoms[],const t_iparams iparams[],
640 const rvec x[],rvec f[],rvec fshift[],
641 const t_pbc *pbc,const t_graph *g,
642 real lambda,real *dvdlambda,
644 const t_forcerec *fr,gmx_grppairener_t *grppener,
645 int *global_atom_index)
647 static gmx_bool bWarn=FALSE;
648 real eps,r2,*tab,rtab2=0;
649 rvec dx,x14[2],f14[2];
651 int typeA[2]={0,0},typeB[2]={0,1};
652 real chargeA[2]={0,0},chargeB[2];
653 int gid,shift_vir,shift_f;
654 int j_index[] = { 0, 1 };
657 int outeriter,inneriter;
660 real krf,crf,tabscale;
663 real *egnb=NULL,*egcoul=NULL;
666 gmx_bool bMolPBC,bFreeEnergy;
668 #if GMX_THREAD_SHM_FDECOMP
675 #if GMX_THREAD_SHM_FDECOMP
676 pthread_mutex_initialize(&mtx);
679 bMolPBC = fr->bMolPBC;
684 eps = fr->epsfac*fr->fudgeQQ;
686 egnb = grppener->ener[egLJ14];
687 egcoul = grppener->ener[egCOUL14];
692 egnb = grppener->ener[egLJSR];
693 egcoul = grppener->ener[egCOULSR];
696 gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",
700 rtab2 = sqr(fr->tab14.r);
701 tabscale = fr->tab14.scale;
706 /* Determine the values for icoul/ivdw. */
710 else if(fr->bcoultab)
714 else if(fr->eeltype == eelRF_NEC)
737 /* We don't do SSE or altivec here, due to large overhead for 4-fold
738 * unrolling on short lists
742 for(i=0; (i<nbonds); )
747 gid = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
752 (fr->efep != efepNO &&
753 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
754 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
755 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
756 chargeA[0] = md->chargeA[ai];
757 chargeA[1] = md->chargeA[aj];
758 nbfp = (real *)&(iparams[itype].lj14.c6A);
761 eps = fr->epsfac*iparams[itype].ljc14.fqq;
762 chargeA[0] = iparams[itype].ljc14.qi;
763 chargeA[1] = iparams[itype].ljc14.qj;
764 nbfp = (real *)&(iparams[itype].ljc14.c6);
767 chargeA[0] = iparams[itype].ljcnb.qi;
768 chargeA[1] = iparams[itype].ljcnb.qj;
769 nbfp = (real *)&(iparams[itype].ljcnb.c6);
775 /* This is a bonded interaction, atoms are in the same box */
777 r2 = distance2(x[ai],x[aj]);
781 /* Apply full periodic boundary conditions */
782 shift_f = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
790 fprintf(stderr,"Warning: 1-4 interaction between %d and %d "
791 "at distance %.3f which is larger than the 1-4 table size %.3f nm\n",
792 glatnr(global_atom_index,ai),
793 glatnr(global_atom_index,aj),
794 sqrt(r2), sqrt(rtab2));
795 fprintf(stderr,"These are ignored for the rest of the simulation\n");
796 fprintf(stderr,"This usually means your system is exploding,\n"
797 "if not, you should increase table-extension in your mdp file\n"
798 "or with user tables increase the table size\n");
802 fprintf(debug,"%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
803 x[ai][XX],x[ai][YY],x[ai][ZZ],
804 x[aj][XX],x[aj][YY],x[aj][ZZ],
805 glatnr(global_atom_index,ai),
806 glatnr(global_atom_index,aj),
811 copy_rvec(x[ai],x14[0]);
812 copy_rvec(x[aj],x14[1]);
816 fprintf(debug,"LJ14: grp-i=%2d, grp-j=%2d, ngrp=%2d, GID=%d\n",
817 md->cENER[ai],md->cENER[aj],md->nenergrp,gid);
820 outeriter = inneriter = count = 0;
823 chargeB[0] = md->chargeB[ai];
824 chargeB[1] = md->chargeB[aj];
825 /* We pass &(iparams[itype].lj14.c6A) as LJ parameter matrix
827 * Here we use that the LJ-14 parameters are stored in iparams
828 * as c6A,c12A,c6B,c12B, which are referenced correctly
829 * in the innerloops if we assign type combinations 0-0 and 0-1
830 * to atom pair ai-aj in topologies A and B respectively.
834 gmx_fatal(FARGS,"Cannot do free energy Buckingham interactions.");
837 gmx_nb_free_energy_kernel(icoul,
875 /* Not perturbed - call kernel 330 */
911 rvec_inc(f[ai],f14[0]);
912 rvec_dec(f[aj],f14[0]);
916 /* Correct the shift forces using the graph */
917 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
918 shift_vir = IVEC2IS(dt);
919 rvec_inc(fshift[shift_vir],f14[0]);
920 rvec_dec(fshift[CENTRAL],f14[0]);
923 /* flops: eNR_KERNEL_OUTER + eNR_KERNEL330 + 12 */