Merge 'release-4-6' into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nonbonded.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  *                This source code is part of
5  * 
6  *                 G   R   O   M   A   C   S
7  * 
8  *          GROningen MAchine for Chemical Simulations
9  * 
10  *                        VERSION 3.2.0
11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13  * Copyright (c) 2001-2004, The GROMACS development team,
14  * check out http://www.gromacs.org for more information.
15
16  * This program is free software; you can redistribute it and/or
17  * modify it under the terms of the GNU General Public License
18  * as published by the Free Software Foundation; either version 2
19  * of the License, or (at your option) any later version.
20  * 
21  * If you want to redistribute modifications, please consider that
22  * scientific software is very special. Version control is crucial -
23  * bugs must be traceable. We will be happy to consider code for
24  * inclusion in the official distribution, but derived work must not
25  * be called official GROMACS. Details are found in the README & COPYING
26  * files - if they are missing, get the official version at www.gromacs.org.
27  * 
28  * To help us fund GROMACS development, we humbly ask that you cite
29  * the papers on the package - you can find them in the top README file.
30  * 
31  * For more info, check our website at http://www.gromacs.org
32  * 
33  * And Hey:
34  * GROningen Mixture of Alchemy and Childrens' Stories
35  */
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
39
40 #ifdef GMX_THREAD_MPI
41 #include <thread_mpi.h>
42 #endif
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include "typedefs.h"
47 #include "txtdump.h"
48 #include "smalloc.h"
49 #include "ns.h"
50 #include "vec.h"
51 #include "maths.h"
52 #include "macros.h"
53 #include "string2.h"
54 #include "force.h"
55 #include "names.h"
56 #include "main.h"
57 #include "xvgr.h"
58 #include "gmx_fatal.h"
59 #include "physics.h"
60 #include "force.h"
61 #include "bondf.h"
62 #include "nrnb.h"
63 #include "smalloc.h"
64 #include "nonbonded.h"
65
66 #include "nb_kernel.h"
67 #include "nb_free_energy.h"
68 #include "nb_generic.h"
69 #include "nb_generic_cg.h"
70 #include "nb_generic_adress.h"
71
72 /* Different default (c) and accelerated interaction-specific kernels */
73 #include "nb_kernel_c/nb_kernel_c.h"
74
75 /* Temporary enabler until we add the AVX kernels */
76 #if (defined GMX_CPU_ACCELERATION_X86_SSE4_1) || (defined GMX_CPU_ACCELERATION_X86_AVX_128_FMA) || (defined GMX_CPU_ACCELERATION_X86_AVX_256)
77 #    define GMX_CPU_ACCELERATION_X86_SSE2
78 #endif
79
80 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
81 #    include "nb_kernel_sse2_single/nb_kernel_sse2_single.h"
82 #endif
83
84
85 #ifdef GMX_THREAD_MPI
86 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
87 #endif
88 static gmx_bool            nonbonded_setup_done  = FALSE;
89
90
91 void
92 gmx_nonbonded_setup(FILE *         fplog,
93                     t_forcerec *   fr,
94                     gmx_bool       bGenericKernelOnly)
95 {
96 #ifdef GMX_THREAD_MPI
97     tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
98 #endif
99     /* Here we are guaranteed only one thread made it. */
100     if(nonbonded_setup_done==FALSE)
101     {
102         if(bGenericKernelOnly==FALSE)
103         {
104             /* Add the generic kernels to the structure stored statically in nb_kernel.c */
105             nb_kernel_list_add_kernels(kernellist_c,kernellist_c_size);
106             
107             if(!(fr!=NULL && fr->use_cpu_acceleration==FALSE))
108             {
109                 /* Add interaction-specific kernels for different architectures */
110 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
111                 nb_kernel_list_add_kernels(kernellist_sse2_single,kernellist_sse2_single_size);
112 #endif
113                 ; /* empty statement to avoid a completely empty block */
114             }
115         }
116         /* Create a hash for faster lookups */
117         nb_kernel_list_hash_init();
118
119         nonbonded_setup_done=TRUE;
120     }
121 #ifdef GMX_THREAD_MPI
122     tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
123 #endif
124 }
125
126
127
128 void
129 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl)
130 {
131     const char *     elec;
132     const char *     elec_mod;
133     const char *     vdw;
134     const char *     vdw_mod;
135     const char *     geom;
136     const char *     other;
137     const char *     vf;
138
139     struct
140     {
141         const char *  arch;
142         int           simd_padding_width;
143     }
144     arch_and_padding[] =
145     {
146 #if (defined GMX_CPU_ACCELERATION_X86_SSE2) && !(defined GMX_DOUBLE)
147         { "sse2_single", 4 },
148 #endif
149         { "c", 1 },
150     };
151     int              narch = asize(arch_and_padding);
152     int              i;
153
154     if(nonbonded_setup_done==FALSE)
155     {
156         /* We typically call this setup routine before starting timers,
157          * but if that has not been done for whatever reason we do it now.
158          */
159         gmx_nonbonded_setup(NULL,NULL,FALSE);
160     }
161
162     /* Not used yet */
163     other="";
164
165     nl->kernelptr_vf = NULL;
166     nl->kernelptr_v  = NULL;
167     nl->kernelptr_f  = NULL;
168
169     elec     = gmx_nbkernel_elec_names[nl->ielec];
170     elec_mod = eintmod_names[nl->ielecmod];
171     vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
172     vdw_mod  = eintmod_names[nl->ivdwmod];
173     geom     = gmx_nblist_geometry_names[nl->igeometry];
174
175     if(nl->free_energy)
176     {
177         nl->kernelptr_vf = gmx_nb_free_energy_kernel;
178         nl->kernelptr_f  = gmx_nb_free_energy_kernel;
179         nl->simd_padding_width = 1;
180     }
181     else if(!gmx_strcasecmp_min(geom,"CG-CG"))
182     {
183         nl->kernelptr_vf = gmx_nb_generic_cg_kernel;
184         nl->kernelptr_f  = gmx_nb_generic_cg_kernel;
185         nl->simd_padding_width = 1;
186     }
187     else
188     {
189         /* Try to find a specific kernel first */
190
191         for(i=0;i<narch && nl->kernelptr_vf==NULL ;i++)
192         {
193             nl->kernelptr_vf = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
194             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
195         }
196         for(i=0;i<narch && nl->kernelptr_f==NULL ;i++)
197         {
198             nl->kernelptr_f = nb_kernel_list_findkernel(log,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"Force");
199             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
200
201             /* If there is not force-only optimized kernel, is there a potential & force one? */
202             if(nl->kernelptr_f == NULL)
203             {
204                 nl->kernelptr_f  = nb_kernel_list_findkernel(NULL,arch_and_padding[i].arch,elec,elec_mod,vdw,vdw_mod,geom,other,"PotentialAndForce");
205                 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
206             }
207         }
208         
209         /* Give up, pick a generic one instead */
210         if(nl->kernelptr_vf==NULL)
211         {
212             nl->kernelptr_vf = gmx_nb_generic_kernel;
213             nl->kernelptr_f  = gmx_nb_generic_kernel;
214             nl->simd_padding_width = 1;
215             if(log)
216             {
217                 fprintf(log,
218                         "WARNING - Slow generic NB kernel used for neighborlist with\n"
219                         "    Elec: '%s', Modifier: '%s'\n"
220                         "    Vdw:  '%s', Modifier: '%s'\n"
221                         "    Geom: '%s', Other: '%s'\n\n",
222                         elec,elec_mod,vdw,vdw_mod,geom,other);
223             }
224         }
225     }
226
227     return;
228 }
229
230 void do_nonbonded(t_commrec *cr,t_forcerec *fr,
231                   rvec x[],rvec f_shortrange[],rvec f_longrange[],t_mdatoms *mdatoms,t_blocka *excl,
232                   gmx_grppairener_t *grppener,rvec box_size,
233                   t_nrnb *nrnb,real *lambda, real *dvdl,
234                   int nls,int eNL,int flags)
235 {
236         t_nblist *        nlist;
237         int               n,n0,n1,i,i0,i1,sz,range;
238         t_nblists *       nblists;
239     nb_kernel_data_t  kernel_data;
240     nb_kernel_t *     kernelptr=NULL;
241     rvec *            f;
242     
243     kernel_data.flags                   = flags;
244     kernel_data.exclusions              = excl;
245     kernel_data.lambda                  = lambda;
246     kernel_data.dvdl                    = dvdl;
247         
248     if(fr->bAllvsAll)
249     {
250         return;
251     }
252         
253     if (eNL >= 0)
254     {
255                 i0 = eNL;
256                 i1 = i0+1;
257     }
258     else
259     {
260                 i0 = 0;
261                 i1 = eNL_NR;
262         }
263         
264         if (nls >= 0)
265         {
266                 n0 = nls;
267                 n1 = nls+1;
268         }
269         else
270         {
271                 n0 = 0;
272                 n1 = fr->nnblists;
273         }
274
275         for(n=n0; (n<n1); n++)
276         {
277                 nblists = &fr->nblists[n];
278
279         kernel_data.table_elec              = &nblists->table_elec;
280         kernel_data.table_vdw               = &nblists->table_vdw;
281         kernel_data.table_elec_vdw          = &nblists->table_elec_vdw;
282
283         for(range=0;range<2;range++)
284         {
285             /* Are we doing short/long-range? */
286             if(range==0)
287             {
288                 /* Short-range */
289                 if(!(flags & GMX_NONBONDED_DO_SR))
290                 {
291                     continue;
292                 }
293                 kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
294                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
295                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
296                 nlist = nblists->nlist_sr;
297                 f                                   = f_shortrange;
298             }
299             else if(range==1)
300             {
301                 /* Long-range */
302                 if(!(flags & GMX_NONBONDED_DO_LR))
303                 {
304                     continue;
305                 }
306                 kernel_data.energygrp_elec          = grppener->ener[egCOULLR];
307                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMLR : egLJLR];
308                 kernel_data.energygrp_polarization  = grppener->ener[egGB];
309                 nlist = nblists->nlist_lr;
310                 f                                   = f_longrange;
311             }
312
313             for(i=i0; (i<i1); i++)
314             {
315                 if (nlist[i].nri > 0)
316                 {
317                     if(flags & GMX_NONBONDED_DO_POTENTIAL)
318                     {
319                         /* Potential and force */
320                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
321                     }
322                     else
323                     {
324                         /* Force only, no potential */
325                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
326                     }
327
328                     if(nlist[i].free_energy==0 && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
329                     {
330                         /* We don't need the non-perturbed interactions */
331                         continue;
332                     }
333                     (*kernelptr)(&(nlist[i]),x,f,fr,mdatoms,&kernel_data,nrnb);
334                  }
335             }
336         }
337     }
338 }
339
340 static void
341 nb_listed_warning_rlimit(const rvec *x,int ai, int aj,int * global_atom_index,real r, real rlimit)
342 {
343     gmx_warning("Listed nonbonded interaction between particles %d and %d\n"
344                 "at distance %.3f which is larger than the table limit %.3f nm.\n\n"
345                 "This is likely either a 1,4 interaction, or a listed interaction inside\n"
346                 "a smaller molecule you are decoupling during a free energy calculation.\n"
347                 "Since interactions at distances beyond the table cannot be computed,\n"
348                 "they are skipped until they are inside the table limit again. You will\n"
349                 "only see this message once, even if it occurs for several interactions.\n\n"
350                 "IMPORTANT: This should not happen in a stable simulation, so there is\n"
351                 "probably something wrong with your system. Only change the table-extension\n"
352                 "distance in the mdp file if you are really sure that is the reason.\n",
353                 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r,rlimit);
354
355     if (debug)
356     {
357         fprintf(debug,
358                 "%8f %8f %8f\n%8f %8f %8f\n1-4 (%d,%d) interaction not within cut-off! r=%g. Ignored\n",
359                 x[ai][XX],x[ai][YY],x[ai][ZZ],x[aj][XX],x[aj][YY],x[aj][ZZ],
360                 glatnr(global_atom_index,ai),glatnr(global_atom_index,aj),r);
361     }
362 }
363
364
365
366 /* This might logically belong better in the nb_generic.c module, but it is only
367  * used in do_nonbonded_listed(), and we want it to be inlined there to avoid an
368  * extra functional call for every single pair listed in the topology.
369  */
370 static real
371 nb_evaluate_single(real r2, real tabscale,real *vftab,
372                    real qq, real c6, real c12, real *velec, real *vvdw)
373 {
374     real       rinv,r,rtab,eps,eps2,Y,F,Geps,Heps2,Fp,VVe,FFe,VVd,FFd,VVr,FFr,fscal;
375     int        ntab;
376
377     /* Do the tabulated interactions - first table lookup */
378     rinv             = gmx_invsqrt(r2);
379     r                = r2*rinv;
380     rtab             = r*tabscale;
381     ntab             = rtab;
382     eps              = rtab-ntab;
383     eps2             = eps*eps;
384     ntab             = 12*ntab;
385     /* Electrostatics */
386     Y                = vftab[ntab];
387     F                = vftab[ntab+1];
388     Geps             = eps*vftab[ntab+2];
389     Heps2            = eps2*vftab[ntab+3];
390     Fp               = F+Geps+Heps2;
391     VVe              = Y+eps*Fp;
392     FFe              = Fp+Geps+2.0*Heps2;
393     /* Dispersion */
394     Y                = vftab[ntab+4];
395     F                = vftab[ntab+5];
396     Geps             = eps*vftab[ntab+6];
397     Heps2            = eps2*vftab[ntab+7];
398     Fp               = F+Geps+Heps2;
399     VVd              = Y+eps*Fp;
400     FFd              = Fp+Geps+2.0*Heps2;
401     /* Repulsion */
402     Y                = vftab[ntab+8];
403     F                = vftab[ntab+9];
404     Geps             = eps*vftab[ntab+10];
405     Heps2            = eps2*vftab[ntab+11];
406     Fp               = F+Geps+Heps2;
407     VVr              = Y+eps*Fp;
408     FFr              = Fp+Geps+2.0*Heps2;
409
410     *velec           = qq*VVe;
411     *vvdw            = c6*VVd+c12*VVr;
412
413     fscal            = -(qq*FFe+c6*FFd+c12*FFr)*tabscale*rinv;
414
415     return fscal;
416 }
417
418
419 real
420 do_nonbonded_listed(int ftype,int nbonds,
421                 const t_iatom iatoms[],const t_iparams iparams[],
422                 const rvec x[],rvec f[],rvec fshift[],
423                 const t_pbc *pbc,const t_graph *g,
424                 real *lambda, real *dvdl,
425                 const t_mdatoms *md,
426                 const t_forcerec *fr,gmx_grppairener_t *grppener,
427                 int *global_atom_index)
428 {
429     int              ielec,ivdw;
430     real             qq,c6,c12;
431     rvec             dx;
432     ivec             dt;
433     int              i,j,itype,ai,aj,gid;
434     int              fshift_index;
435     real             r2,rinv;
436     real             fscal,velec,vvdw;
437     real *           energygrp_elec;
438     real *           energygrp_vdw;
439     static gmx_bool  warned_rlimit=FALSE;
440     /* Free energy stuff */
441     gmx_bool         bFreeEnergy;
442     real             LFC[2],LFV[2],DLF[2],lfac_coul[2],lfac_vdw[2],dlfac_coul[2],dlfac_vdw[2];
443     real             qqB,c6B,c12B,sigma2_def,sigma2_min;
444     
445     
446     switch (ftype) {
447         case F_LJ14:
448         case F_LJC14_Q:
449             energygrp_elec = grppener->ener[egCOUL14];
450             energygrp_vdw  = grppener->ener[egLJ14];
451             break;
452         case F_LJC_PAIRS_NB:
453             energygrp_elec = grppener->ener[egCOULSR];
454             energygrp_vdw  = grppener->ener[egLJSR];
455             break;
456         default:
457             energygrp_elec = NULL; /* Keep compiler happy */
458             energygrp_vdw  = NULL; /* Keep compiler happy */
459             gmx_fatal(FARGS,"Unknown function type %d in do_nonbonded14",ftype);
460             break;
461     }
462     
463     if(fr->efep != efepNO)
464     {
465         /* Lambda factor for state A=1-lambda and B=lambda */
466         LFC[0] = 1.0 - lambda[efptCOUL];
467         LFV[0] = 1.0 - lambda[efptVDW];
468         LFC[1] = lambda[efptCOUL];
469         LFV[1] = lambda[efptVDW];
470
471         /*derivative of the lambda factor for state A and B */
472         DLF[0] = -1;
473         DLF[1] = 1;
474
475         /* precalculate */
476         sigma2_def = pow(fr->sc_sigma6_def,1.0/3.0);
477         sigma2_min = pow(fr->sc_sigma6_min,1.0/3.0);
478
479         for (i=0;i<2;i++)
480         {
481             lfac_coul[i]  = (fr->sc_power==2 ? (1-LFC[i])*(1-LFC[i]) : (1-LFC[i]));
482             dlfac_coul[i] = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFC[i]) : 1);
483             lfac_vdw[i]   = (fr->sc_power==2 ? (1-LFV[i])*(1-LFV[i]) : (1-LFV[i]));
484             dlfac_vdw[i]  = DLF[i]*fr->sc_power/fr->sc_r_power*(fr->sc_power==2 ? (1-LFV[i]) : 1);
485         }
486     }
487     else
488     {
489         sigma2_min = sigma2_def = 0;
490     }
491
492     bFreeEnergy = FALSE;
493     for(i=0; (i<nbonds); )
494     {
495         itype = iatoms[i++];
496         ai    = iatoms[i++];
497         aj    = iatoms[i++];
498         gid   = GID(md->cENER[ai],md->cENER[aj],md->nenergrp);
499         
500         /* Get parameters */
501         switch (ftype) {
502             case F_LJ14:
503                 bFreeEnergy =
504                 (fr->efep != efepNO &&
505                  ((md->nPerturbed && (md->bPerturbed[ai] || md->bPerturbed[aj])) ||
506                   iparams[itype].lj14.c6A != iparams[itype].lj14.c6B ||
507                   iparams[itype].lj14.c12A != iparams[itype].lj14.c12B));
508                 qq               = md->chargeA[ai]*md->chargeA[aj]*fr->epsfac*fr->fudgeQQ;
509                 c6               = iparams[itype].lj14.c6A;
510                 c12              = iparams[itype].lj14.c12A;
511                 break;
512             case F_LJC14_Q:
513                 qq               = iparams[itype].ljc14.qi*iparams[itype].ljc14.qj*fr->epsfac*iparams[itype].ljc14.fqq;
514                 c6               = iparams[itype].ljc14.c6;
515                 c12              = iparams[itype].ljc14.c12;
516                 break;
517             case F_LJC_PAIRS_NB:
518                 qq               = iparams[itype].ljcnb.qi*iparams[itype].ljcnb.qj*fr->epsfac;
519                 c6               = iparams[itype].ljcnb.c6;
520                 c12              = iparams[itype].ljcnb.c12;
521                 break;
522             default:
523                 /* Cannot happen since we called gmx_fatal() above in this case */
524                 qq = c6 = c12 = 0; /* Keep compiler happy */
525                 break;
526         }
527         
528         /* To save flops in the optimized kernels, c6/c12 have 6.0/12.0 derivative prefactors
529          * included in the general nfbp array now. This means the tables are scaled down by the
530          * same factor, so when we use the original c6/c12 parameters from iparams[] they must
531          * be scaled up.
532          */
533         c6  *= 6.0;
534         c12 *= 12.0;
535         
536         /* Do we need to apply full periodic boundary conditions? */
537         if(fr->bMolPBC==TRUE)
538         {
539             fshift_index = pbc_dx_aiuc(pbc,x[ai],x[aj],dx);
540         }
541         else
542         {
543             fshift_index = CENTRAL;
544             rvec_sub(x[ai],x[aj],dx);
545         }
546         r2           = norm2(dx);
547
548         if(r2>=fr->tab14.r*fr->tab14.r)
549         {
550             if(warned_rlimit==FALSE)
551             {
552                 nb_listed_warning_rlimit(x,ai,aj,global_atom_index,sqrt(r2),fr->tab14.r);
553                 warned_rlimit=TRUE;
554             }
555             continue;
556         }
557
558         if (bFreeEnergy)
559         {
560             /* Currently free energy is only supported for F_LJ14, so no need to check for that if we got here */
561             qqB              = md->chargeB[ai]*md->chargeB[aj]*fr->epsfac*fr->fudgeQQ;
562             c6B              = iparams[itype].lj14.c6B*6.0;
563             c12B             = iparams[itype].lj14.c12B*12.0;
564
565             fscal            = nb_free_energy_evaluate_single(r2,fr->sc_r_power,fr->sc_alphacoul,fr->sc_alphavdw,
566                                                               fr->tab14.scale,fr->tab14.data,qq,c6,c12,qqB,c6B,c12B,
567                                                               LFC,LFV,DLF,lfac_coul,lfac_vdw,dlfac_coul,dlfac_vdw,
568                                                               fr->sc_sigma6_def,fr->sc_sigma6_min,sigma2_def,sigma2_min,&velec,&vvdw,dvdl);
569         }
570         else
571         {
572             /* Evaluate tabulated interaction without free energy */
573             fscal            = nb_evaluate_single(r2,fr->tab14.scale,fr->tab14.data,qq,c6,c12,&velec,&vvdw);
574         }
575
576         energygrp_elec[gid]  += velec;
577         energygrp_vdw[gid]   += vvdw;
578         svmul(fscal,dx,dx);
579
580         /* Add the forces */
581         rvec_inc(f[ai],dx);
582         rvec_dec(f[aj],dx);
583
584         if (g)
585         {
586             /* Correct the shift forces using the graph */
587             ivec_sub(SHIFT_IVEC(g,ai),SHIFT_IVEC(g,aj),dt);
588             fshift_index = IVEC2IS(dt);
589         }
590         if(fshift_index!=CENTRAL)
591         {
592             rvec_inc(fshift[fshift_index],dx);
593             rvec_dec(fshift[CENTRAL],dx);
594         }
595     }
596     return 0.0;
597 }
598
599