1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROningen Mixture of Alchemy and Childrens' Stories
41 #include <thread_mpi.h>
58 #include "gmx_fatal.h"
64 #include "nonbonded.h"
66 #include "nb_kernel.h"
67 #include "nb_free_energy.h"
68 #include "nb_generic.h"
69 #include "nb_generic_cg.h"
70 #include "nb_generic_adress.h"
72 /* Different default (c) and accelerated interaction-specific kernels */
73 #include "nb_kernel_c/nb_kernel_c.h"
75 /* Temporary enabler until we add the AVX kernels */
76 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) || (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) || (defined GMX_CPU_ACCELERATION_X86_AVX_256)
77 # define GMX_CPU_ACCELERATION_X86_SSE2
80 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
81 # include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
86 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
88 static gmx_bool nonbonded_setup_done = FALSE;
92 gmx_nonbonded_setup(FILE * fplog,
94 gmx_bool bGenericKernelOnly)
97 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
99 /* Here we are guaranteed only one thread made it. */
100 if(nonbonded_setup_done==FALSE)
102 if(bGenericKernelOnly==FALSE)
104 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
105 nb_kernel_list_add_kernels(kernellist_c,kernellist_c_size);
107 if(!(fr!=NULL && fr->use_cpu_acceleration==FALSE))
109 /* Add interaction-specific kernels for different architectures */
110 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
111 nb_kernel_list_add_kernels(kernellist_sse2_single,kernellist_sse2_single_size);
113 ; /* empty statement to avoid a completely empty block */
116 /* Create a hash for faster lookups */
117 nb_kernel_list_hash_init();
119 nonbonded_setup_done=TRUE;
121 #ifdef GMX_THREAD_MPI
122 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
129 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
132 const char * elec_mod;
134 const char * vdw_mod;
142 int simd_padding_width;
146 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
147 { "sse2_single", 4 },
151 int narch = asize(arch_and_padding);
154 if(nonbonded_setup_done==FALSE)
156 /* We typically call this setup routine before starting timers,
157 * but if that has not been done for whatever reason we do it now.
159 gmx_nonbonded_setup(NULL,NULL,FALSE);
165 nl->kernelptr_vf = NULL;
166 nl->kernelptr_v = NULL;
167 nl->kernelptr_f = NULL;
169 elec = gmx_nbkernel_elec_names[nl->ielec];
170 elec_mod = eintmod_names[nl->ielecmod];
171 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
172 vdw_mod = eintmod_names[nl->ivdwmod];
173 geom = gmx_nblist_geometry_names[nl->igeometry];
177 nl->kernelptr_vf = gmx_nb_free_energy_kernel;
178 nl->kernelptr_f = gmx_nb_free_energy_kernel;
179 nl->simd_padding_width = 1;
181 else if(!gmx_strcasecmp_min(geom,"CG-CG"))
183 nl->kernelptr_vf = gmx_nb_generic_cg_kernel;
184 nl->kernelptr_f = gmx_nb_generic_cg_kernel;
185 nl->simd_padding_width = 1;
189 /* Try to find a specific kernel first */
191 for(i=0;i<narch && nl->kernelptr_vf==NULL ;i++)
193 nl->kernelptr_vf = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
194 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
196 for(i=0;i<narch && nl->kernelptr_f==NULL ;i++)
198 nl->kernelptr_f = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"Force");
199 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
201 /* If there is not force-only optimized kernel, is there a potential & force one? */
202 if(nl->kernelptr_f == NULL)
204 nl->kernelptr_f = nb_kernel_list_findkernel(NULL,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
205 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
209 /* Give up, pick a generic one instead */
210 if(nl->kernelptr_vf==NULL)
212 nl->kernelptr_vf = gmx_nb_generic_kernel;
213 nl->kernelptr_f = gmx_nb_generic_kernel;
214 nl->simd_padding_width = 1;
218 "WARNING - Slow generic NB kernel used for neighborlist with\n"
219 " Elec: '%s', Modifier: '%s'\n"
220 " Vdw: '%s', Modifier: '%s'\n"
221 " Geom: '%s', Other: '%s'\n\n",
222 elec,elec_mod,vdw,vdw_mod,geom,other);
230 void do_nonbonded(t_commrec *cr,t_forcerec *fr,
231 rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *mdatoms,t_blocka *excl,
232 gmx_grppairener_t *grppener,rvec box_size,
233 t_nrnb *nrnb,real *lambda, real *dvdl,
234 int nls,int eNL,int flags)
237 int n,n0,n1,i,i0,i1,sz,range;
239 nb_kernel_data_t kernel_data;
240 nb_kernel_t * kernelptr=NULL;
243 kernel_data.flags = flags;
244 kernel_data.exclusions = excl;
245 kernel_data.lambda = lambda;
246 kernel_data.dvdl = dvdl;
275 for(n=n0; (n<n1); n++)
277 nblists = &fr->nblists[n];
279 kernel_data.table_elec = &nblists->table_elec;
280 kernel_data.table_vdw = &nblists->table_vdw;
281 kernel_data.table_elec_vdw = &nblists->table_elec_vdw;
283 for(range=0;range<2;range++)
285 /* Are we doing short/long-range? */
289 if(!(flags & GMX_NONBONDED_DO_SR))
293 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
294 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
295 kernel_data.energygrp_polarization = grppener->ener[egGB];
296 nlist = nblists->nlist_sr;
302 if(!(flags & GMX_NONBONDED_DO_LR))
306 kernel_data.energygrp_elec = grppener->ener[egCOULLR];
307 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
308 kernel_data.energygrp_polarization = grppener->ener[egGB];
309 nlist = nblists->nlist_lr;
313 for(i=i0; (i<i1); i++)
315 if (nlist[i].nri > 0)
317 if(flags & GMX_NONBONDED_DO_POTENTIAL)
319 /* Potential and force */
320 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
324 /* Force only, no potential */
325 kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
328 if(nlist[i].free_energy==0 && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
330 /* We don't need the non-perturbed interactions */
333 (*kernelptr)(&(nlist[i]),x,f,fr,mdatoms,&kernel_data,nrnb);
341 nb_listed_warning_rlimit(const rvec *x,int ai, int aj,int * global_atom_index,real r, real rlimit)
343 gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
344 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
345 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
346 "a smaller molecule you are decoupling during a free energy calculation.\n"
347 "Since interactions at distances beyond the table cannot be computed,\n"
348 "they are skipped until they are inside the table limit again. You will\n"
349 "only see this message once, even if it occurs for several interactions.\n\n"
350 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
351 "probably something wrong with your system. Only change the table-extension\n"
352 "distance in the mdp file if you are really sure that is the reason.\n",
353 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r,rlimit);
358 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
359 x[ai][XX],x[ai][YY],x[ai][ZZ],x[aj][XX],x[aj][YY],x[aj][ZZ],
360 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r);
366 /* This might logically belong better in the nb_generic.c module, but it is only
367 * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
368 * extra functional call for every single pair listed in the topology.
371 nb_evaluate_single(real r2, real tabscale,real *vftab,
372 real qq, real c6, real c12, real *velec, real *vvdw)
374 real rinv,r,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VVe,FFe,VVd,FFd,VVr,FFr,fscal;
377 /* Do the tabulated interactions - first table lookup */
378 rinv = gmx_invsqrt(r2);
388 Geps = eps*vftab[ntab+2];
389 Heps2 = eps2*vftab[ntab+3];
392 FFe = Fp+Geps+2.0*Heps2;
396 Geps = eps*vftab[ntab+6];
397 Heps2 = eps2*vftab[ntab+7];
400 FFd = Fp+Geps+2.0*Heps2;
404 Geps = eps*vftab[ntab+10];
405 Heps2 = eps2*vftab[ntab+11];
408 FFr = Fp+Geps+2.0*Heps2;
411 *vvdw = c6*VVd+c12*VVr;
413 fscal = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
420 do_nonbonded_listed(int ftype,int nbonds,
421 const t_iatom iatoms[],const t_iparams iparams[],
422 const rvec x[],rvec f[],rvec fshift[],
423 const t_pbc *pbc,const t_graph *g,
424 real *lambda, real *dvdl,
426 const t_forcerec *fr,gmx_grppairener_t *grppener,
427 int *global_atom_index)
433 int i,j,itype,ai,aj,gid;
436 real fscal,velec,vvdw;
437 real * energygrp_elec;
438 real * energygrp_vdw;
439 static gmx_bool warned_rlimit=FALSE;
440 /* Free energy stuff */
441 gmx_bool bFreeEnergy;
442 real LFC[2],LFV[2],DLF[2],lfac_coul[2],lfac_vdw[2],dlfac_coul[2],dlfac_vdw[2];
443 real qqB,c6B,c12B,sigma2_def,sigma2_min;
449 energygrp_elec = grppener->ener[egCOUL14];
450 energygrp_vdw = grppener->ener[egLJ14];
453 energygrp_elec = grppener->ener[egCOULSR];
454 energygrp_vdw = grppener->ener[egLJSR];
457 energygrp_elec = NULL; /* Keep compiler happy */
458 energygrp_vdw = NULL; /* Keep compiler happy */
459 gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",ftype);
463 if(fr->efep != efepNO)
465 /* Lambda factor for state A=1-lambda and B=lambda */
466 LFC[0] = 1.0 - lambda[efptCOUL];
467 LFV[0] = 1.0 - lambda[efptVDW];
468 LFC[1] = lambda[efptCOUL];
469 LFV[1] = lambda[efptVDW];
471 /*derivative of the lambda factor for state A and B */
476 sigma2_def = pow(fr->sc_sigma6_def,1.0/3.0);
477 sigma2_min = pow(fr->sc_sigma6_min,1.0/3.0);
481 lfac_coul[i] = (fr->sc_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
482 dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFC[i]) : 1);
483 lfac_vdw[i] = (fr->sc_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
484 dlfac_vdw[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFV[i]) : 1);
489 sigma2_min = sigma2_def = 0;
493 for(i=0; (i<nbonds); )
498 gid = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
504 (fr->efep != efepNO &&
505 ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
506 iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
507 iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
508 qq = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
509 c6 = iparams[itype].lj14.c6A;
510 c12 = iparams[itype].lj14.c12A;
513 qq = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
514 c6 = iparams[itype].ljc14.c6;
515 c12 = iparams[itype].ljc14.c12;
518 qq = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
519 c6 = iparams[itype].ljcnb.c6;
520 c12 = iparams[itype].ljcnb.c12;
523 /* Cannot happen since we called gmx_fatal() above in this case */
524 qq = c6 = c12 = 0; /* Keep compiler happy */
528 /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
529 * included in the general nfbp array now. This means the tables are scaled down by the
530 * same factor, so when we use the original c6/c12 parameters from iparams[] they must
536 /* Do we need to apply full periodic boundary conditions? */
537 if(fr->bMolPBC==TRUE)
539 fshift_index = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
543 fshift_index = CENTRAL;
544 rvec_sub(x[ai],x[aj],dx);
548 if(r2>=fr->tab14.r*fr->tab14.r)
550 if(warned_rlimit==FALSE)
552 nb_listed_warning_rlimit(x,ai,aj,global_atom_index,sqrt(r2),fr->tab14.r);
560 /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
561 qqB = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
562 c6B = iparams[itype].lj14.c6B*6.0;
563 c12B = iparams[itype].lj14.c12B*12.0;
565 fscal = nb_free_energy_evaluate_single(r2,fr->sc_r_power,fr->sc_alphacoul,fr->sc_alphavdw,
566 fr->tab14.scale,fr->tab14.data,qq,c6,c12,qqB,c6B,c12B,
567 LFC,LFV,DLF,lfac_coul,lfac_vdw,dlfac_coul,dlfac_vdw,
568 fr->sc_sigma6_def,fr->sc_sigma6_min,sigma2_def,sigma2_min,&velec,&vvdw,dvdl);
572 /* Evaluate tabulated interaction without free energy */
573 fscal = nb_evaluate_single(r2,fr->tab14.scale,fr->tab14.data,qq,c6,c12,&velec,&vvdw);
576 energygrp_elec[gid] += velec;
577 energygrp_vdw[gid] += vvdw;
586 /* Correct the shift forces using the graph */
587 ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
588 fshift_index = IVEC2IS(dt);
590 if(fshift_index!=CENTRAL)
592 rvec_inc(fshift[fshift_index],dx);
593 rvec_dec(fshift[CENTRAL],dx);