1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
41 #include <thread_mpi.h>
58 #include "gmx_fatal.h"
64 #include "nonbonded.h"
66 #include "nb_kernel.h"
67 #include "nb_free_energy.h"
68 #include "nb_generic.h"
69 #include "nb_generic_cg.h"
70 #include "nb_generic_adress.h"
72 /* Different default (c) and accelerated interaction-specific kernels */
73 #include "nb_kernel_c/nb_kernel_c.h"
75 /* Temporary enabler until we add the AVX kernels */
76 #if (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) || (defined GMX_CPU_ACCELERATION_X86_AVX_256)
77 # define GMX_CPU_ACCELERATION_X86_SSE4_1
80 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
81 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
83 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
84 # include "nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
89 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
91 static gmx_bool nonbonded_setup_done = FALSE;
95 gmx_nonbonded_setup(FILE * fplog,
97 gmx_bool bGenericKernelOnly)
100 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
102 /* Here we are guaranteed only one thread made it. */
103 if(nonbonded_setup_done==FALSE)
105 if(bGenericKernelOnly==FALSE)
107 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
108 nb_kernel_list_add_kernels(kernellist_c,kernellist_c_size);
110 if(!(fr!=NULL && fr->use_cpu_acceleration==FALSE))
112 /* Add interaction-specific kernels for different architectures */
113 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
114 nb_kernel_list_add_kernels(kernellist_sse2_single,kernellist_sse2_single_size);
116 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
117 nb_kernel_list_add_kernels(kernellist_sse4_1_single,kernellist_sse4_1_single_size);
119 ; /* empty statement to avoid a completely empty block */
122 /* Create a hash for faster lookups */
123 nb_kernel_list_hash_init();
125 nonbonded_setup_done=TRUE;
127 #ifdef GMX_THREAD_MPI
128 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
135 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
138 const char * elec_mod;
140 const char * vdw_mod;
148 int simd_padding_width;
152 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) && !(defined GMX_DOUBLE)
153 { "sse4_1_single", 4 },
155 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
156 { "sse2_single", 4 },
160 int narch = asize(arch_and_padding);
163 if(nonbonded_setup_done==FALSE)
165 /* We typically call this setup routine before starting timers,
166 * but if that has not been done for whatever reason we do it now.
168 gmx_nonbonded_setup(NULL,NULL,FALSE);
174 nl->kernelptr_vf = NULL;
175 nl->kernelptr_v = NULL;
176 nl->kernelptr_f = NULL;
178 elec = gmx_nbkernel_elec_names[nl->ielec];
179 elec_mod = eintmod_names[nl->ielecmod];
180 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
181 vdw_mod = eintmod_names[nl->ivdwmod];
182 geom = gmx_nblist_geometry_names[nl->igeometry];
186 nl->kernelptr_vf = gmx_nb_free_energy_kernel;
187 nl->kernelptr_f = gmx_nb_free_energy_kernel;
188 nl->simd_padding_width = 1;
190 else if(!gmx_strcasecmp_min(geom,"CG-CG"))
192 nl->kernelptr_vf = gmx_nb_generic_cg_kernel;
193 nl->kernelptr_f = gmx_nb_generic_cg_kernel;
194 nl->simd_padding_width = 1;
198 /* Try to find a specific kernel first */
200 for(i=0;i<narch && nl->kernelptr_vf==NULL ;i++)
202 nl->kernelptr_vf = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
203 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
205 for(i=0;i<narch && nl->kernelptr_f==NULL ;i++)
207 nl->kernelptr_f = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"Force");
208 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
210 /* If there is not force-only optimized kernel, is there a potential & force one? */
211 if(nl->kernelptr_f == NULL)
213 nl->kernelptr_f = nb_kernel_list_findkernel(NULL,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
214 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
218 /* Give up, pick a generic one instead */
219 if(nl->kernelptr_vf==NULL)
221 nl->kernelptr_vf = gmx_nb_generic_kernel;
222 nl->kernelptr_f = gmx_nb_generic_kernel;
223 nl->simd_padding_width = 1;
227 "WARNING - Slow generic NB kernel used for neighborlist with\n"
228 " Elec: '%s', Modifier: '%s'\n"
229 " Vdw: '%s', Modifier: '%s'\n"
230 " Geom: '%s', Other: '%s'\n\n",
231 elec,elec_mod,vdw,vdw_mod,geom,other);
239 void do_nonbonded(t_commrec *cr,t_forcerec *fr,
240 rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *mdatoms,t_blocka *excl,
241 gmx_grppairener_t *grppener,rvec box_size,
242 t_nrnb *nrnb,real *lambda, real *dvdl,
243 int nls,int eNL,int flags)
246 int n,n0,n1,i,i0,i1,sz,range;
248 nb_kernel_data_t kernel_data;
249 nb_kernel_t * kernelptr=NULL;
252 kernel_data.flags = flags;
253 kernel_data.exclusions = excl;
254 kernel_data.lambda = lambda;
255 kernel_data.dvdl = dvdl;
284 for(n=n0; (n<n1); n++)
286 nblists = &fr->nblists[n];
288 kernel_data.table_elec = &nblists->table_elec;
289 kernel_data.table_vdw = &nblists->table_vdw;
290 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
292 for(range=0;range<2;range++)
294 /* Are we doing short/long-range? */
298 if(!(flags & GMX_NONBONDED_DO_SR))
302 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
303 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
304 kernel_data.energygrp_polarization = grppener->ener[egGB];
305 nlist = nblists->nlist_sr;
311 if(!(flags & GMX_NONBONDED_DO_LR))
315 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
316 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
317 kernel_data.energygrp_polarization = grppener->ener[egGB];
318 nlist = nblists->nlist_lr;
322 for(i=i0; (i<i1); i++)
324 if (nlist[i].nri > 0)
326 if(flags & GMX_NONBONDED_DO_POTENTIAL)
328 /* Potential and force */
329 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
333 /* Force only, no potential */
334 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
337 if(nlist[i].free_energy==0 && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
339 /* We don't need the non-perturbed interactions */
342 (*kernelptr)(&(nlist[i]),x,f,fr,mdatoms,&kernel_data,nrnb);
350 nb_listed_warning_rlimit(const rvec *x,int ai, int aj,int * global_atom_index,real r, real rlimit)
352 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
353 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
354 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
355 "a smaller molecule you are decoupling during a free energy calculation.\n"
356 "Since interactions at distances beyond the table cannot be computed,\n"
357 "they are skipped until they are inside the table limit again. You will\n"
358 "only see this message once, even if it occurs for several interactions.\n\n"
359 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
360 "probably something wrong with your system. Only change the table-extension\n"
361 "distance in the mdp file if you are really sure that is the reason.\n",
362 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r,rlimit);
367 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
368 x[ai][XX],x[ai][YY],x[ai][ZZ],x[aj][XX],x[aj][YY],x[aj][ZZ],
369 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r);
375 /* This might logically belong better in the nb_generic.c module, but it is only
376 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
377 * extra functional call for every single pair listed in the topology.
380 nb_evaluate_single(real r2, real tabscale,real *vftab,
381 real qq, real c6, real c12, real *velec, real *vvdw)
383 real rinv,r,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VVe,FFe,VVd,FFd,VVr,FFr,fscal;
386 /* Do the tabulated interactions - first table lookup */
387 rinv = gmx_invsqrt(r2);
397 Geps = eps*vftab[ntab+2];
398 Heps2 = eps2*vftab[ntab+3];
401 FFe = Fp+Geps+2.0*Heps2;
405 Geps = eps*vftab[ntab+6];
406 Heps2 = eps2*vftab[ntab+7];
409 FFd = Fp+Geps+2.0*Heps2;
413 Geps = eps*vftab[ntab+10];
414 Heps2 = eps2*vftab[ntab+11];
417 FFr = Fp+Geps+2.0*Heps2;
420 *vvdw = c6*VVd+c12*VVr;
422 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
429 do_nonbonded_listed(int ftype,int nbonds,
430 const t_iatom iatoms[],const t_iparams iparams[],
431 const rvec x[],rvec f[],rvec fshift[],
432 const t_pbc *pbc,const t_graph *g,
433 real *lambda, real *dvdl,
435 const t_forcerec *fr,gmx_grppairener_t *grppener,
436 int *global_atom_index)
442 int i,j,itype,ai,aj,gid;
445 real fscal,velec,vvdw;
446 real * energygrp_elec;
447 real * energygrp_vdw;
448 static gmx_bool warned_rlimit=FALSE;
449 /* Free energy stuff */
450 gmx_bool bFreeEnergy;
451 real LFC[2],LFV[2],DLF[2],lfac_coul[2],lfac_vdw[2],dlfac_coul[2],dlfac_vdw[2];
452 real qqB,c6B,c12B,sigma2_def,sigma2_min;
458 energygrp_elec = grppener->ener[egCOUL14];
459 energygrp_vdw = grppener->ener[egLJ14];
462 energygrp_elec = grppener->ener[egCOULSR];
463 energygrp_vdw = grppener->ener[egLJSR];
466 energygrp_elec = NULL; /* Keep compiler happy */
467 energygrp_vdw = NULL; /* Keep compiler happy */
468 gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",ftype);
472 if(fr->efep != efepNO)
474 /* Lambda factor for state A=1-lambda and B=lambda */
475 LFC[0] = 1.0 - lambda[efptCOUL];
476 LFV[0] = 1.0 - lambda[efptVDW];
477 LFC[1] = lambda[efptCOUL];
478 LFV[1] = lambda[efptVDW];
480 /*derivative of the lambda factor for state A and B */
485 sigma2_def = pow(fr->sc_sigma6_def,1.0/3.0);
486 sigma2_min = pow(fr->sc_sigma6_min,1.0/3.0);
490 lfac_coul[i] = (fr->sc_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
491 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFC[i]) : 1);
492 lfac_vdw[i] = (fr->sc_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
493 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFV[i]) : 1);
498 sigma2_min = sigma2_def = 0;
502 for(i=0; (i<nbonds); )
507 gid = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
513 (fr->efep != efepNO &&
514 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
515 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
516 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
517 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
518 c6 = iparams[itype].lj14.c6A;
519 c12 = iparams[itype].lj14.c12A;
522 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
523 c6 = iparams[itype].ljc14.c6;
524 c12 = iparams[itype].ljc14.c12;
527 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
528 c6 = iparams[itype].ljcnb.c6;
529 c12 = iparams[itype].ljcnb.c12;
532 /* Cannot happen since we called gmx_fatal() above in this case */
533 qq = c6 = c12 = 0; /* Keep compiler happy */
537 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
538 * included in the general nfbp array now. This means the tables are scaled down by the
539 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
545 /* Do we need to apply full periodic boundary conditions? */
546 if(fr->bMolPBC==TRUE)
548 fshift_index = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
552 fshift_index = CENTRAL;
553 rvec_sub(x[ai],x[aj],dx);
557 if(r2>=fr->tab14.r*fr->tab14.r)
559 if(warned_rlimit==FALSE)
561 nb_listed_warning_rlimit(x,ai,aj,global_atom_index,sqrt(r2),fr->tab14.r);
569 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
570 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
571 c6B = iparams[itype].lj14.c6B*6.0;
572 c12B = iparams[itype].lj14.c12B*12.0;
574 fscal = nb_free_energy_evaluate_single(r2,fr->sc_r_power,fr->sc_alphacoul,fr->sc_alphavdw,
575 fr->tab14.scale,fr->tab14.data,qq,c6,c12,qqB,c6B,c12B,
576 LFC,LFV,DLF,lfac_coul,lfac_vdw,dlfac_coul,dlfac_vdw,
577 fr->sc_sigma6_def,fr->sc_sigma6_min,sigma2_def,sigma2_min,&velec,&vvdw,dvdl);
581 /* Evaluate tabulated interaction without free energy */
582 fscal = nb_evaluate_single(r2,fr->tab14.scale,fr->tab14.data,qq,c6,c12,&velec,&vvdw);
585 energygrp_elec[gid] += velec;
586 energygrp_vdw[gid] += vvdw;
595 /* Correct the shift forces using the graph */
596 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
597 fshift_index = IVEC2IS(dt);
599 if(fshift_index!=CENTRAL)
601 rvec_inc(fshift[fshift_index],dx);
602 rvec_dec(fshift[CENTRAL],dx);