Introduce gmxpre.h for truly global definitions
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_c / nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_c.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS c kernel generator.
37  */
38 #include "gmxpre.h"
39
40 #include "config.h"
41
42 #include <math.h>
43
44 #include "../nb_kernel.h"
45 #include "gromacs/legacyheaders/types/simple.h"
46 #include "gromacs/math/vec.h"
47 #include "gromacs/legacyheaders/nrnb.h"
48
49 /*
50  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c
51  * Electrostatics interaction: Coulomb
52  * VdW interaction:            CubicSplineTable
53  * Geometry:                   Water4-Water4
54  * Calculate force/pot:        PotentialAndForce
55  */
56 void
57 nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_c
58                     (t_nblist                    * gmx_restrict       nlist,
59                      rvec                        * gmx_restrict          xx,
60                      rvec                        * gmx_restrict          ff,
61                      t_forcerec                  * gmx_restrict          fr,
62                      t_mdatoms                   * gmx_restrict     mdatoms,
63                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
64                      t_nrnb                      * gmx_restrict        nrnb)
65 {
66     int              i_shift_offset,i_coord_offset,j_coord_offset;
67     int              j_index_start,j_index_end;
68     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
69     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
70     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
71     real             *shiftvec,*fshift,*x,*f;
72     int              vdwioffset0;
73     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
74     int              vdwioffset1;
75     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
76     int              vdwioffset2;
77     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
78     int              vdwioffset3;
79     real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
80     int              vdwjidx0;
81     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
82     int              vdwjidx1;
83     real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
84     int              vdwjidx2;
85     real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
86     int              vdwjidx3;
87     real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
88     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
89     real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
90     real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
91     real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
92     real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
93     real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
94     real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
95     real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
96     real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
97     real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
98     real             velec,felec,velecsum,facel,crf,krf,krf2;
99     real             *charge;
100     int              nvdwtype;
101     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
102     int              *vdwtype;
103     real             *vdwparam;
104     int              vfitab;
105     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
106     real             *vftab;
107
108     x                = xx[0];
109     f                = ff[0];
110
111     nri              = nlist->nri;
112     iinr             = nlist->iinr;
113     jindex           = nlist->jindex;
114     jjnr             = nlist->jjnr;
115     shiftidx         = nlist->shift;
116     gid              = nlist->gid;
117     shiftvec         = fr->shift_vec[0];
118     fshift           = fr->fshift[0];
119     facel            = fr->epsfac;
120     charge           = mdatoms->chargeA;
121     nvdwtype         = fr->ntype;
122     vdwparam         = fr->nbfp;
123     vdwtype          = mdatoms->typeA;
124
125     vftab            = kernel_data->table_vdw->data;
126     vftabscale       = kernel_data->table_vdw->scale;
127
128     /* Setup water-specific parameters */
129     inr              = nlist->iinr[0];
130     iq1              = facel*charge[inr+1];
131     iq2              = facel*charge[inr+2];
132     iq3              = facel*charge[inr+3];
133     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
134
135     jq1              = charge[inr+1];
136     jq2              = charge[inr+2];
137     jq3              = charge[inr+3];
138     vdwjidx0         = 2*vdwtype[inr+0];
139     c6_00            = vdwparam[vdwioffset0+vdwjidx0];
140     c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
141     qq11             = iq1*jq1;
142     qq12             = iq1*jq2;
143     qq13             = iq1*jq3;
144     qq21             = iq2*jq1;
145     qq22             = iq2*jq2;
146     qq23             = iq2*jq3;
147     qq31             = iq3*jq1;
148     qq32             = iq3*jq2;
149     qq33             = iq3*jq3;
150
151     outeriter        = 0;
152     inneriter        = 0;
153
154     /* Start outer loop over neighborlists */
155     for(iidx=0; iidx<nri; iidx++)
156     {
157         /* Load shift vector for this list */
158         i_shift_offset   = DIM*shiftidx[iidx];
159         shX              = shiftvec[i_shift_offset+XX];
160         shY              = shiftvec[i_shift_offset+YY];
161         shZ              = shiftvec[i_shift_offset+ZZ];
162
163         /* Load limits for loop over neighbors */
164         j_index_start    = jindex[iidx];
165         j_index_end      = jindex[iidx+1];
166
167         /* Get outer coordinate index */
168         inr              = iinr[iidx];
169         i_coord_offset   = DIM*inr;
170
171         /* Load i particle coords and add shift vector */
172         ix0              = shX + x[i_coord_offset+DIM*0+XX];
173         iy0              = shY + x[i_coord_offset+DIM*0+YY];
174         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
175         ix1              = shX + x[i_coord_offset+DIM*1+XX];
176         iy1              = shY + x[i_coord_offset+DIM*1+YY];
177         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
178         ix2              = shX + x[i_coord_offset+DIM*2+XX];
179         iy2              = shY + x[i_coord_offset+DIM*2+YY];
180         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
181         ix3              = shX + x[i_coord_offset+DIM*3+XX];
182         iy3              = shY + x[i_coord_offset+DIM*3+YY];
183         iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
184
185         fix0             = 0.0;
186         fiy0             = 0.0;
187         fiz0             = 0.0;
188         fix1             = 0.0;
189         fiy1             = 0.0;
190         fiz1             = 0.0;
191         fix2             = 0.0;
192         fiy2             = 0.0;
193         fiz2             = 0.0;
194         fix3             = 0.0;
195         fiy3             = 0.0;
196         fiz3             = 0.0;
197
198         /* Reset potential sums */
199         velecsum         = 0.0;
200         vvdwsum          = 0.0;
201
202         /* Start inner kernel loop */
203         for(jidx=j_index_start; jidx<j_index_end; jidx++)
204         {
205             /* Get j neighbor index, and coordinate index */
206             jnr              = jjnr[jidx];
207             j_coord_offset   = DIM*jnr;
208
209             /* load j atom coordinates */
210             jx0              = x[j_coord_offset+DIM*0+XX];
211             jy0              = x[j_coord_offset+DIM*0+YY];
212             jz0              = x[j_coord_offset+DIM*0+ZZ];
213             jx1              = x[j_coord_offset+DIM*1+XX];
214             jy1              = x[j_coord_offset+DIM*1+YY];
215             jz1              = x[j_coord_offset+DIM*1+ZZ];
216             jx2              = x[j_coord_offset+DIM*2+XX];
217             jy2              = x[j_coord_offset+DIM*2+YY];
218             jz2              = x[j_coord_offset+DIM*2+ZZ];
219             jx3              = x[j_coord_offset+DIM*3+XX];
220             jy3              = x[j_coord_offset+DIM*3+YY];
221             jz3              = x[j_coord_offset+DIM*3+ZZ];
222
223             /* Calculate displacement vector */
224             dx00             = ix0 - jx0;
225             dy00             = iy0 - jy0;
226             dz00             = iz0 - jz0;
227             dx11             = ix1 - jx1;
228             dy11             = iy1 - jy1;
229             dz11             = iz1 - jz1;
230             dx12             = ix1 - jx2;
231             dy12             = iy1 - jy2;
232             dz12             = iz1 - jz2;
233             dx13             = ix1 - jx3;
234             dy13             = iy1 - jy3;
235             dz13             = iz1 - jz3;
236             dx21             = ix2 - jx1;
237             dy21             = iy2 - jy1;
238             dz21             = iz2 - jz1;
239             dx22             = ix2 - jx2;
240             dy22             = iy2 - jy2;
241             dz22             = iz2 - jz2;
242             dx23             = ix2 - jx3;
243             dy23             = iy2 - jy3;
244             dz23             = iz2 - jz3;
245             dx31             = ix3 - jx1;
246             dy31             = iy3 - jy1;
247             dz31             = iz3 - jz1;
248             dx32             = ix3 - jx2;
249             dy32             = iy3 - jy2;
250             dz32             = iz3 - jz2;
251             dx33             = ix3 - jx3;
252             dy33             = iy3 - jy3;
253             dz33             = iz3 - jz3;
254
255             /* Calculate squared distance and things based on it */
256             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
257             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
258             rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
259             rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
260             rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
261             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
262             rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
263             rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
264             rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
265             rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
266
267             rinv00           = gmx_invsqrt(rsq00);
268             rinv11           = gmx_invsqrt(rsq11);
269             rinv12           = gmx_invsqrt(rsq12);
270             rinv13           = gmx_invsqrt(rsq13);
271             rinv21           = gmx_invsqrt(rsq21);
272             rinv22           = gmx_invsqrt(rsq22);
273             rinv23           = gmx_invsqrt(rsq23);
274             rinv31           = gmx_invsqrt(rsq31);
275             rinv32           = gmx_invsqrt(rsq32);
276             rinv33           = gmx_invsqrt(rsq33);
277
278             rinvsq11         = rinv11*rinv11;
279             rinvsq12         = rinv12*rinv12;
280             rinvsq13         = rinv13*rinv13;
281             rinvsq21         = rinv21*rinv21;
282             rinvsq22         = rinv22*rinv22;
283             rinvsq23         = rinv23*rinv23;
284             rinvsq31         = rinv31*rinv31;
285             rinvsq32         = rinv32*rinv32;
286             rinvsq33         = rinv33*rinv33;
287
288             /**************************
289              * CALCULATE INTERACTIONS *
290              **************************/
291
292             r00              = rsq00*rinv00;
293
294             /* Calculate table index by multiplying r with table scale and truncate to integer */
295             rt               = r00*vftabscale;
296             vfitab           = rt;
297             vfeps            = rt-vfitab;
298             vfitab           = 2*4*vfitab;
299
300             /* CUBIC SPLINE TABLE DISPERSION */
301             vfitab          += 0;
302             Y                = vftab[vfitab];
303             F                = vftab[vfitab+1];
304             Geps             = vfeps*vftab[vfitab+2];
305             Heps2            = vfeps*vfeps*vftab[vfitab+3];
306             Fp               = F+Geps+Heps2;
307             VV               = Y+vfeps*Fp;
308             vvdw6            = c6_00*VV;
309             FF               = Fp+Geps+2.0*Heps2;
310             fvdw6            = c6_00*FF;
311
312             /* CUBIC SPLINE TABLE REPULSION */
313             Y                = vftab[vfitab+4];
314             F                = vftab[vfitab+5];
315             Geps             = vfeps*vftab[vfitab+6];
316             Heps2            = vfeps*vfeps*vftab[vfitab+7];
317             Fp               = F+Geps+Heps2;
318             VV               = Y+vfeps*Fp;
319             vvdw12           = c12_00*VV;
320             FF               = Fp+Geps+2.0*Heps2;
321             fvdw12           = c12_00*FF;
322             vvdw             = vvdw12+vvdw6;
323             fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
324
325             /* Update potential sums from outer loop */
326             vvdwsum         += vvdw;
327
328             fscal            = fvdw;
329
330             /* Calculate temporary vectorial force */
331             tx               = fscal*dx00;
332             ty               = fscal*dy00;
333             tz               = fscal*dz00;
334
335             /* Update vectorial force */
336             fix0            += tx;
337             fiy0            += ty;
338             fiz0            += tz;
339             f[j_coord_offset+DIM*0+XX] -= tx;
340             f[j_coord_offset+DIM*0+YY] -= ty;
341             f[j_coord_offset+DIM*0+ZZ] -= tz;
342
343             /**************************
344              * CALCULATE INTERACTIONS *
345              **************************/
346
347             /* COULOMB ELECTROSTATICS */
348             velec            = qq11*rinv11;
349             felec            = velec*rinvsq11;
350
351             /* Update potential sums from outer loop */
352             velecsum        += velec;
353
354             fscal            = felec;
355
356             /* Calculate temporary vectorial force */
357             tx               = fscal*dx11;
358             ty               = fscal*dy11;
359             tz               = fscal*dz11;
360
361             /* Update vectorial force */
362             fix1            += tx;
363             fiy1            += ty;
364             fiz1            += tz;
365             f[j_coord_offset+DIM*1+XX] -= tx;
366             f[j_coord_offset+DIM*1+YY] -= ty;
367             f[j_coord_offset+DIM*1+ZZ] -= tz;
368
369             /**************************
370              * CALCULATE INTERACTIONS *
371              **************************/
372
373             /* COULOMB ELECTROSTATICS */
374             velec            = qq12*rinv12;
375             felec            = velec*rinvsq12;
376
377             /* Update potential sums from outer loop */
378             velecsum        += velec;
379
380             fscal            = felec;
381
382             /* Calculate temporary vectorial force */
383             tx               = fscal*dx12;
384             ty               = fscal*dy12;
385             tz               = fscal*dz12;
386
387             /* Update vectorial force */
388             fix1            += tx;
389             fiy1            += ty;
390             fiz1            += tz;
391             f[j_coord_offset+DIM*2+XX] -= tx;
392             f[j_coord_offset+DIM*2+YY] -= ty;
393             f[j_coord_offset+DIM*2+ZZ] -= tz;
394
395             /**************************
396              * CALCULATE INTERACTIONS *
397              **************************/
398
399             /* COULOMB ELECTROSTATICS */
400             velec            = qq13*rinv13;
401             felec            = velec*rinvsq13;
402
403             /* Update potential sums from outer loop */
404             velecsum        += velec;
405
406             fscal            = felec;
407
408             /* Calculate temporary vectorial force */
409             tx               = fscal*dx13;
410             ty               = fscal*dy13;
411             tz               = fscal*dz13;
412
413             /* Update vectorial force */
414             fix1            += tx;
415             fiy1            += ty;
416             fiz1            += tz;
417             f[j_coord_offset+DIM*3+XX] -= tx;
418             f[j_coord_offset+DIM*3+YY] -= ty;
419             f[j_coord_offset+DIM*3+ZZ] -= tz;
420
421             /**************************
422              * CALCULATE INTERACTIONS *
423              **************************/
424
425             /* COULOMB ELECTROSTATICS */
426             velec            = qq21*rinv21;
427             felec            = velec*rinvsq21;
428
429             /* Update potential sums from outer loop */
430             velecsum        += velec;
431
432             fscal            = felec;
433
434             /* Calculate temporary vectorial force */
435             tx               = fscal*dx21;
436             ty               = fscal*dy21;
437             tz               = fscal*dz21;
438
439             /* Update vectorial force */
440             fix2            += tx;
441             fiy2            += ty;
442             fiz2            += tz;
443             f[j_coord_offset+DIM*1+XX] -= tx;
444             f[j_coord_offset+DIM*1+YY] -= ty;
445             f[j_coord_offset+DIM*1+ZZ] -= tz;
446
447             /**************************
448              * CALCULATE INTERACTIONS *
449              **************************/
450
451             /* COULOMB ELECTROSTATICS */
452             velec            = qq22*rinv22;
453             felec            = velec*rinvsq22;
454
455             /* Update potential sums from outer loop */
456             velecsum        += velec;
457
458             fscal            = felec;
459
460             /* Calculate temporary vectorial force */
461             tx               = fscal*dx22;
462             ty               = fscal*dy22;
463             tz               = fscal*dz22;
464
465             /* Update vectorial force */
466             fix2            += tx;
467             fiy2            += ty;
468             fiz2            += tz;
469             f[j_coord_offset+DIM*2+XX] -= tx;
470             f[j_coord_offset+DIM*2+YY] -= ty;
471             f[j_coord_offset+DIM*2+ZZ] -= tz;
472
473             /**************************
474              * CALCULATE INTERACTIONS *
475              **************************/
476
477             /* COULOMB ELECTROSTATICS */
478             velec            = qq23*rinv23;
479             felec            = velec*rinvsq23;
480
481             /* Update potential sums from outer loop */
482             velecsum        += velec;
483
484             fscal            = felec;
485
486             /* Calculate temporary vectorial force */
487             tx               = fscal*dx23;
488             ty               = fscal*dy23;
489             tz               = fscal*dz23;
490
491             /* Update vectorial force */
492             fix2            += tx;
493             fiy2            += ty;
494             fiz2            += tz;
495             f[j_coord_offset+DIM*3+XX] -= tx;
496             f[j_coord_offset+DIM*3+YY] -= ty;
497             f[j_coord_offset+DIM*3+ZZ] -= tz;
498
499             /**************************
500              * CALCULATE INTERACTIONS *
501              **************************/
502
503             /* COULOMB ELECTROSTATICS */
504             velec            = qq31*rinv31;
505             felec            = velec*rinvsq31;
506
507             /* Update potential sums from outer loop */
508             velecsum        += velec;
509
510             fscal            = felec;
511
512             /* Calculate temporary vectorial force */
513             tx               = fscal*dx31;
514             ty               = fscal*dy31;
515             tz               = fscal*dz31;
516
517             /* Update vectorial force */
518             fix3            += tx;
519             fiy3            += ty;
520             fiz3            += tz;
521             f[j_coord_offset+DIM*1+XX] -= tx;
522             f[j_coord_offset+DIM*1+YY] -= ty;
523             f[j_coord_offset+DIM*1+ZZ] -= tz;
524
525             /**************************
526              * CALCULATE INTERACTIONS *
527              **************************/
528
529             /* COULOMB ELECTROSTATICS */
530             velec            = qq32*rinv32;
531             felec            = velec*rinvsq32;
532
533             /* Update potential sums from outer loop */
534             velecsum        += velec;
535
536             fscal            = felec;
537
538             /* Calculate temporary vectorial force */
539             tx               = fscal*dx32;
540             ty               = fscal*dy32;
541             tz               = fscal*dz32;
542
543             /* Update vectorial force */
544             fix3            += tx;
545             fiy3            += ty;
546             fiz3            += tz;
547             f[j_coord_offset+DIM*2+XX] -= tx;
548             f[j_coord_offset+DIM*2+YY] -= ty;
549             f[j_coord_offset+DIM*2+ZZ] -= tz;
550
551             /**************************
552              * CALCULATE INTERACTIONS *
553              **************************/
554
555             /* COULOMB ELECTROSTATICS */
556             velec            = qq33*rinv33;
557             felec            = velec*rinvsq33;
558
559             /* Update potential sums from outer loop */
560             velecsum        += velec;
561
562             fscal            = felec;
563
564             /* Calculate temporary vectorial force */
565             tx               = fscal*dx33;
566             ty               = fscal*dy33;
567             tz               = fscal*dz33;
568
569             /* Update vectorial force */
570             fix3            += tx;
571             fiy3            += ty;
572             fiz3            += tz;
573             f[j_coord_offset+DIM*3+XX] -= tx;
574             f[j_coord_offset+DIM*3+YY] -= ty;
575             f[j_coord_offset+DIM*3+ZZ] -= tz;
576
577             /* Inner loop uses 298 flops */
578         }
579         /* End of innermost loop */
580
581         tx = ty = tz = 0;
582         f[i_coord_offset+DIM*0+XX] += fix0;
583         f[i_coord_offset+DIM*0+YY] += fiy0;
584         f[i_coord_offset+DIM*0+ZZ] += fiz0;
585         tx                         += fix0;
586         ty                         += fiy0;
587         tz                         += fiz0;
588         f[i_coord_offset+DIM*1+XX] += fix1;
589         f[i_coord_offset+DIM*1+YY] += fiy1;
590         f[i_coord_offset+DIM*1+ZZ] += fiz1;
591         tx                         += fix1;
592         ty                         += fiy1;
593         tz                         += fiz1;
594         f[i_coord_offset+DIM*2+XX] += fix2;
595         f[i_coord_offset+DIM*2+YY] += fiy2;
596         f[i_coord_offset+DIM*2+ZZ] += fiz2;
597         tx                         += fix2;
598         ty                         += fiy2;
599         tz                         += fiz2;
600         f[i_coord_offset+DIM*3+XX] += fix3;
601         f[i_coord_offset+DIM*3+YY] += fiy3;
602         f[i_coord_offset+DIM*3+ZZ] += fiz3;
603         tx                         += fix3;
604         ty                         += fiy3;
605         tz                         += fiz3;
606         fshift[i_shift_offset+XX]  += tx;
607         fshift[i_shift_offset+YY]  += ty;
608         fshift[i_shift_offset+ZZ]  += tz;
609
610         ggid                        = gid[iidx];
611         /* Update potential energies */
612         kernel_data->energygrp_elec[ggid] += velecsum;
613         kernel_data->energygrp_vdw[ggid] += vvdwsum;
614
615         /* Increment number of inner iterations */
616         inneriter                  += j_index_end - j_index_start;
617
618         /* Outer loop uses 41 flops */
619     }
620
621     /* Increment number of outer iterations */
622     outeriter        += nri;
623
624     /* Update outer/inner flops */
625
626     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*41 + inneriter*298);
627 }
628 /*
629  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c
630  * Electrostatics interaction: Coulomb
631  * VdW interaction:            CubicSplineTable
632  * Geometry:                   Water4-Water4
633  * Calculate force/pot:        Force
634  */
635 void
636 nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_c
637                     (t_nblist                    * gmx_restrict       nlist,
638                      rvec                        * gmx_restrict          xx,
639                      rvec                        * gmx_restrict          ff,
640                      t_forcerec                  * gmx_restrict          fr,
641                      t_mdatoms                   * gmx_restrict     mdatoms,
642                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
643                      t_nrnb                      * gmx_restrict        nrnb)
644 {
645     int              i_shift_offset,i_coord_offset,j_coord_offset;
646     int              j_index_start,j_index_end;
647     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
648     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
649     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
650     real             *shiftvec,*fshift,*x,*f;
651     int              vdwioffset0;
652     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
653     int              vdwioffset1;
654     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
655     int              vdwioffset2;
656     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
657     int              vdwioffset3;
658     real             ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3;
659     int              vdwjidx0;
660     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
661     int              vdwjidx1;
662     real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
663     int              vdwjidx2;
664     real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
665     int              vdwjidx3;
666     real             jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3;
667     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
668     real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
669     real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
670     real             dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13,cexp1_13,cexp2_13;
671     real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
672     real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
673     real             dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23,cexp1_23,cexp2_23;
674     real             dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31,cexp1_31,cexp2_31;
675     real             dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32,cexp1_32,cexp2_32;
676     real             dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33,cexp1_33,cexp2_33;
677     real             velec,felec,velecsum,facel,crf,krf,krf2;
678     real             *charge;
679     int              nvdwtype;
680     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
681     int              *vdwtype;
682     real             *vdwparam;
683     int              vfitab;
684     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
685     real             *vftab;
686
687     x                = xx[0];
688     f                = ff[0];
689
690     nri              = nlist->nri;
691     iinr             = nlist->iinr;
692     jindex           = nlist->jindex;
693     jjnr             = nlist->jjnr;
694     shiftidx         = nlist->shift;
695     gid              = nlist->gid;
696     shiftvec         = fr->shift_vec[0];
697     fshift           = fr->fshift[0];
698     facel            = fr->epsfac;
699     charge           = mdatoms->chargeA;
700     nvdwtype         = fr->ntype;
701     vdwparam         = fr->nbfp;
702     vdwtype          = mdatoms->typeA;
703
704     vftab            = kernel_data->table_vdw->data;
705     vftabscale       = kernel_data->table_vdw->scale;
706
707     /* Setup water-specific parameters */
708     inr              = nlist->iinr[0];
709     iq1              = facel*charge[inr+1];
710     iq2              = facel*charge[inr+2];
711     iq3              = facel*charge[inr+3];
712     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
713
714     jq1              = charge[inr+1];
715     jq2              = charge[inr+2];
716     jq3              = charge[inr+3];
717     vdwjidx0         = 2*vdwtype[inr+0];
718     c6_00            = vdwparam[vdwioffset0+vdwjidx0];
719     c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
720     qq11             = iq1*jq1;
721     qq12             = iq1*jq2;
722     qq13             = iq1*jq3;
723     qq21             = iq2*jq1;
724     qq22             = iq2*jq2;
725     qq23             = iq2*jq3;
726     qq31             = iq3*jq1;
727     qq32             = iq3*jq2;
728     qq33             = iq3*jq3;
729
730     outeriter        = 0;
731     inneriter        = 0;
732
733     /* Start outer loop over neighborlists */
734     for(iidx=0; iidx<nri; iidx++)
735     {
736         /* Load shift vector for this list */
737         i_shift_offset   = DIM*shiftidx[iidx];
738         shX              = shiftvec[i_shift_offset+XX];
739         shY              = shiftvec[i_shift_offset+YY];
740         shZ              = shiftvec[i_shift_offset+ZZ];
741
742         /* Load limits for loop over neighbors */
743         j_index_start    = jindex[iidx];
744         j_index_end      = jindex[iidx+1];
745
746         /* Get outer coordinate index */
747         inr              = iinr[iidx];
748         i_coord_offset   = DIM*inr;
749
750         /* Load i particle coords and add shift vector */
751         ix0              = shX + x[i_coord_offset+DIM*0+XX];
752         iy0              = shY + x[i_coord_offset+DIM*0+YY];
753         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
754         ix1              = shX + x[i_coord_offset+DIM*1+XX];
755         iy1              = shY + x[i_coord_offset+DIM*1+YY];
756         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
757         ix2              = shX + x[i_coord_offset+DIM*2+XX];
758         iy2              = shY + x[i_coord_offset+DIM*2+YY];
759         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
760         ix3              = shX + x[i_coord_offset+DIM*3+XX];
761         iy3              = shY + x[i_coord_offset+DIM*3+YY];
762         iz3              = shZ + x[i_coord_offset+DIM*3+ZZ];
763
764         fix0             = 0.0;
765         fiy0             = 0.0;
766         fiz0             = 0.0;
767         fix1             = 0.0;
768         fiy1             = 0.0;
769         fiz1             = 0.0;
770         fix2             = 0.0;
771         fiy2             = 0.0;
772         fiz2             = 0.0;
773         fix3             = 0.0;
774         fiy3             = 0.0;
775         fiz3             = 0.0;
776
777         /* Start inner kernel loop */
778         for(jidx=j_index_start; jidx<j_index_end; jidx++)
779         {
780             /* Get j neighbor index, and coordinate index */
781             jnr              = jjnr[jidx];
782             j_coord_offset   = DIM*jnr;
783
784             /* load j atom coordinates */
785             jx0              = x[j_coord_offset+DIM*0+XX];
786             jy0              = x[j_coord_offset+DIM*0+YY];
787             jz0              = x[j_coord_offset+DIM*0+ZZ];
788             jx1              = x[j_coord_offset+DIM*1+XX];
789             jy1              = x[j_coord_offset+DIM*1+YY];
790             jz1              = x[j_coord_offset+DIM*1+ZZ];
791             jx2              = x[j_coord_offset+DIM*2+XX];
792             jy2              = x[j_coord_offset+DIM*2+YY];
793             jz2              = x[j_coord_offset+DIM*2+ZZ];
794             jx3              = x[j_coord_offset+DIM*3+XX];
795             jy3              = x[j_coord_offset+DIM*3+YY];
796             jz3              = x[j_coord_offset+DIM*3+ZZ];
797
798             /* Calculate displacement vector */
799             dx00             = ix0 - jx0;
800             dy00             = iy0 - jy0;
801             dz00             = iz0 - jz0;
802             dx11             = ix1 - jx1;
803             dy11             = iy1 - jy1;
804             dz11             = iz1 - jz1;
805             dx12             = ix1 - jx2;
806             dy12             = iy1 - jy2;
807             dz12             = iz1 - jz2;
808             dx13             = ix1 - jx3;
809             dy13             = iy1 - jy3;
810             dz13             = iz1 - jz3;
811             dx21             = ix2 - jx1;
812             dy21             = iy2 - jy1;
813             dz21             = iz2 - jz1;
814             dx22             = ix2 - jx2;
815             dy22             = iy2 - jy2;
816             dz22             = iz2 - jz2;
817             dx23             = ix2 - jx3;
818             dy23             = iy2 - jy3;
819             dz23             = iz2 - jz3;
820             dx31             = ix3 - jx1;
821             dy31             = iy3 - jy1;
822             dz31             = iz3 - jz1;
823             dx32             = ix3 - jx2;
824             dy32             = iy3 - jy2;
825             dz32             = iz3 - jz2;
826             dx33             = ix3 - jx3;
827             dy33             = iy3 - jy3;
828             dz33             = iz3 - jz3;
829
830             /* Calculate squared distance and things based on it */
831             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
832             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
833             rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
834             rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
835             rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
836             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
837             rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
838             rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
839             rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
840             rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
841
842             rinv00           = gmx_invsqrt(rsq00);
843             rinv11           = gmx_invsqrt(rsq11);
844             rinv12           = gmx_invsqrt(rsq12);
845             rinv13           = gmx_invsqrt(rsq13);
846             rinv21           = gmx_invsqrt(rsq21);
847             rinv22           = gmx_invsqrt(rsq22);
848             rinv23           = gmx_invsqrt(rsq23);
849             rinv31           = gmx_invsqrt(rsq31);
850             rinv32           = gmx_invsqrt(rsq32);
851             rinv33           = gmx_invsqrt(rsq33);
852
853             rinvsq11         = rinv11*rinv11;
854             rinvsq12         = rinv12*rinv12;
855             rinvsq13         = rinv13*rinv13;
856             rinvsq21         = rinv21*rinv21;
857             rinvsq22         = rinv22*rinv22;
858             rinvsq23         = rinv23*rinv23;
859             rinvsq31         = rinv31*rinv31;
860             rinvsq32         = rinv32*rinv32;
861             rinvsq33         = rinv33*rinv33;
862
863             /**************************
864              * CALCULATE INTERACTIONS *
865              **************************/
866
867             r00              = rsq00*rinv00;
868
869             /* Calculate table index by multiplying r with table scale and truncate to integer */
870             rt               = r00*vftabscale;
871             vfitab           = rt;
872             vfeps            = rt-vfitab;
873             vfitab           = 2*4*vfitab;
874
875             /* CUBIC SPLINE TABLE DISPERSION */
876             vfitab          += 0;
877             F                = vftab[vfitab+1];
878             Geps             = vfeps*vftab[vfitab+2];
879             Heps2            = vfeps*vfeps*vftab[vfitab+3];
880             Fp               = F+Geps+Heps2;
881             FF               = Fp+Geps+2.0*Heps2;
882             fvdw6            = c6_00*FF;
883
884             /* CUBIC SPLINE TABLE REPULSION */
885             F                = vftab[vfitab+5];
886             Geps             = vfeps*vftab[vfitab+6];
887             Heps2            = vfeps*vfeps*vftab[vfitab+7];
888             Fp               = F+Geps+Heps2;
889             FF               = Fp+Geps+2.0*Heps2;
890             fvdw12           = c12_00*FF;
891             fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
892
893             fscal            = fvdw;
894
895             /* Calculate temporary vectorial force */
896             tx               = fscal*dx00;
897             ty               = fscal*dy00;
898             tz               = fscal*dz00;
899
900             /* Update vectorial force */
901             fix0            += tx;
902             fiy0            += ty;
903             fiz0            += tz;
904             f[j_coord_offset+DIM*0+XX] -= tx;
905             f[j_coord_offset+DIM*0+YY] -= ty;
906             f[j_coord_offset+DIM*0+ZZ] -= tz;
907
908             /**************************
909              * CALCULATE INTERACTIONS *
910              **************************/
911
912             /* COULOMB ELECTROSTATICS */
913             velec            = qq11*rinv11;
914             felec            = velec*rinvsq11;
915
916             fscal            = felec;
917
918             /* Calculate temporary vectorial force */
919             tx               = fscal*dx11;
920             ty               = fscal*dy11;
921             tz               = fscal*dz11;
922
923             /* Update vectorial force */
924             fix1            += tx;
925             fiy1            += ty;
926             fiz1            += tz;
927             f[j_coord_offset+DIM*1+XX] -= tx;
928             f[j_coord_offset+DIM*1+YY] -= ty;
929             f[j_coord_offset+DIM*1+ZZ] -= tz;
930
931             /**************************
932              * CALCULATE INTERACTIONS *
933              **************************/
934
935             /* COULOMB ELECTROSTATICS */
936             velec            = qq12*rinv12;
937             felec            = velec*rinvsq12;
938
939             fscal            = felec;
940
941             /* Calculate temporary vectorial force */
942             tx               = fscal*dx12;
943             ty               = fscal*dy12;
944             tz               = fscal*dz12;
945
946             /* Update vectorial force */
947             fix1            += tx;
948             fiy1            += ty;
949             fiz1            += tz;
950             f[j_coord_offset+DIM*2+XX] -= tx;
951             f[j_coord_offset+DIM*2+YY] -= ty;
952             f[j_coord_offset+DIM*2+ZZ] -= tz;
953
954             /**************************
955              * CALCULATE INTERACTIONS *
956              **************************/
957
958             /* COULOMB ELECTROSTATICS */
959             velec            = qq13*rinv13;
960             felec            = velec*rinvsq13;
961
962             fscal            = felec;
963
964             /* Calculate temporary vectorial force */
965             tx               = fscal*dx13;
966             ty               = fscal*dy13;
967             tz               = fscal*dz13;
968
969             /* Update vectorial force */
970             fix1            += tx;
971             fiy1            += ty;
972             fiz1            += tz;
973             f[j_coord_offset+DIM*3+XX] -= tx;
974             f[j_coord_offset+DIM*3+YY] -= ty;
975             f[j_coord_offset+DIM*3+ZZ] -= tz;
976
977             /**************************
978              * CALCULATE INTERACTIONS *
979              **************************/
980
981             /* COULOMB ELECTROSTATICS */
982             velec            = qq21*rinv21;
983             felec            = velec*rinvsq21;
984
985             fscal            = felec;
986
987             /* Calculate temporary vectorial force */
988             tx               = fscal*dx21;
989             ty               = fscal*dy21;
990             tz               = fscal*dz21;
991
992             /* Update vectorial force */
993             fix2            += tx;
994             fiy2            += ty;
995             fiz2            += tz;
996             f[j_coord_offset+DIM*1+XX] -= tx;
997             f[j_coord_offset+DIM*1+YY] -= ty;
998             f[j_coord_offset+DIM*1+ZZ] -= tz;
999
1000             /**************************
1001              * CALCULATE INTERACTIONS *
1002              **************************/
1003
1004             /* COULOMB ELECTROSTATICS */
1005             velec            = qq22*rinv22;
1006             felec            = velec*rinvsq22;
1007
1008             fscal            = felec;
1009
1010             /* Calculate temporary vectorial force */
1011             tx               = fscal*dx22;
1012             ty               = fscal*dy22;
1013             tz               = fscal*dz22;
1014
1015             /* Update vectorial force */
1016             fix2            += tx;
1017             fiy2            += ty;
1018             fiz2            += tz;
1019             f[j_coord_offset+DIM*2+XX] -= tx;
1020             f[j_coord_offset+DIM*2+YY] -= ty;
1021             f[j_coord_offset+DIM*2+ZZ] -= tz;
1022
1023             /**************************
1024              * CALCULATE INTERACTIONS *
1025              **************************/
1026
1027             /* COULOMB ELECTROSTATICS */
1028             velec            = qq23*rinv23;
1029             felec            = velec*rinvsq23;
1030
1031             fscal            = felec;
1032
1033             /* Calculate temporary vectorial force */
1034             tx               = fscal*dx23;
1035             ty               = fscal*dy23;
1036             tz               = fscal*dz23;
1037
1038             /* Update vectorial force */
1039             fix2            += tx;
1040             fiy2            += ty;
1041             fiz2            += tz;
1042             f[j_coord_offset+DIM*3+XX] -= tx;
1043             f[j_coord_offset+DIM*3+YY] -= ty;
1044             f[j_coord_offset+DIM*3+ZZ] -= tz;
1045
1046             /**************************
1047              * CALCULATE INTERACTIONS *
1048              **************************/
1049
1050             /* COULOMB ELECTROSTATICS */
1051             velec            = qq31*rinv31;
1052             felec            = velec*rinvsq31;
1053
1054             fscal            = felec;
1055
1056             /* Calculate temporary vectorial force */
1057             tx               = fscal*dx31;
1058             ty               = fscal*dy31;
1059             tz               = fscal*dz31;
1060
1061             /* Update vectorial force */
1062             fix3            += tx;
1063             fiy3            += ty;
1064             fiz3            += tz;
1065             f[j_coord_offset+DIM*1+XX] -= tx;
1066             f[j_coord_offset+DIM*1+YY] -= ty;
1067             f[j_coord_offset+DIM*1+ZZ] -= tz;
1068
1069             /**************************
1070              * CALCULATE INTERACTIONS *
1071              **************************/
1072
1073             /* COULOMB ELECTROSTATICS */
1074             velec            = qq32*rinv32;
1075             felec            = velec*rinvsq32;
1076
1077             fscal            = felec;
1078
1079             /* Calculate temporary vectorial force */
1080             tx               = fscal*dx32;
1081             ty               = fscal*dy32;
1082             tz               = fscal*dz32;
1083
1084             /* Update vectorial force */
1085             fix3            += tx;
1086             fiy3            += ty;
1087             fiz3            += tz;
1088             f[j_coord_offset+DIM*2+XX] -= tx;
1089             f[j_coord_offset+DIM*2+YY] -= ty;
1090             f[j_coord_offset+DIM*2+ZZ] -= tz;
1091
1092             /**************************
1093              * CALCULATE INTERACTIONS *
1094              **************************/
1095
1096             /* COULOMB ELECTROSTATICS */
1097             velec            = qq33*rinv33;
1098             felec            = velec*rinvsq33;
1099
1100             fscal            = felec;
1101
1102             /* Calculate temporary vectorial force */
1103             tx               = fscal*dx33;
1104             ty               = fscal*dy33;
1105             tz               = fscal*dz33;
1106
1107             /* Update vectorial force */
1108             fix3            += tx;
1109             fiy3            += ty;
1110             fiz3            += tz;
1111             f[j_coord_offset+DIM*3+XX] -= tx;
1112             f[j_coord_offset+DIM*3+YY] -= ty;
1113             f[j_coord_offset+DIM*3+ZZ] -= tz;
1114
1115             /* Inner loop uses 281 flops */
1116         }
1117         /* End of innermost loop */
1118
1119         tx = ty = tz = 0;
1120         f[i_coord_offset+DIM*0+XX] += fix0;
1121         f[i_coord_offset+DIM*0+YY] += fiy0;
1122         f[i_coord_offset+DIM*0+ZZ] += fiz0;
1123         tx                         += fix0;
1124         ty                         += fiy0;
1125         tz                         += fiz0;
1126         f[i_coord_offset+DIM*1+XX] += fix1;
1127         f[i_coord_offset+DIM*1+YY] += fiy1;
1128         f[i_coord_offset+DIM*1+ZZ] += fiz1;
1129         tx                         += fix1;
1130         ty                         += fiy1;
1131         tz                         += fiz1;
1132         f[i_coord_offset+DIM*2+XX] += fix2;
1133         f[i_coord_offset+DIM*2+YY] += fiy2;
1134         f[i_coord_offset+DIM*2+ZZ] += fiz2;
1135         tx                         += fix2;
1136         ty                         += fiy2;
1137         tz                         += fiz2;
1138         f[i_coord_offset+DIM*3+XX] += fix3;
1139         f[i_coord_offset+DIM*3+YY] += fiy3;
1140         f[i_coord_offset+DIM*3+ZZ] += fiz3;
1141         tx                         += fix3;
1142         ty                         += fiy3;
1143         tz                         += fiz3;
1144         fshift[i_shift_offset+XX]  += tx;
1145         fshift[i_shift_offset+YY]  += ty;
1146         fshift[i_shift_offset+ZZ]  += tz;
1147
1148         /* Increment number of inner iterations */
1149         inneriter                  += j_index_end - j_index_start;
1150
1151         /* Outer loop uses 39 flops */
1152     }
1153
1154     /* Increment number of outer iterations */
1155     outeriter        += nri;
1156
1157     /* Update outer/inner flops */
1158
1159     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_F,outeriter*39 + inneriter*281);
1160 }