f0f2d149fecf032a34c3bd1b385a61e918c3cc8d
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_c / nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_c.c
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*
36  * Note: this file was generated by the GROMACS c kernel generator.
37  */
38 #include "config.h"
39
40 #include <math.h>
41
42 #include "../nb_kernel.h"
43 #include "gromacs/legacyheaders/types/simple.h"
44 #include "gromacs/math/vec.h"
45 #include "gromacs/legacyheaders/nrnb.h"
46
47 /*
48  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c
49  * Electrostatics interaction: Coulomb
50  * VdW interaction:            CubicSplineTable
51  * Geometry:                   Water3-Water3
52  * Calculate force/pot:        PotentialAndForce
53  */
54 void
55 nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_c
56                     (t_nblist                    * gmx_restrict       nlist,
57                      rvec                        * gmx_restrict          xx,
58                      rvec                        * gmx_restrict          ff,
59                      t_forcerec                  * gmx_restrict          fr,
60                      t_mdatoms                   * gmx_restrict     mdatoms,
61                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
62                      t_nrnb                      * gmx_restrict        nrnb)
63 {
64     int              i_shift_offset,i_coord_offset,j_coord_offset;
65     int              j_index_start,j_index_end;
66     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
67     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
68     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
69     real             *shiftvec,*fshift,*x,*f;
70     int              vdwioffset0;
71     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
72     int              vdwioffset1;
73     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
74     int              vdwioffset2;
75     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
76     int              vdwjidx0;
77     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
78     int              vdwjidx1;
79     real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
80     int              vdwjidx2;
81     real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
82     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
83     real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
84     real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
85     real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
86     real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
87     real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
88     real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
89     real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
90     real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
91     real             velec,felec,velecsum,facel,crf,krf,krf2;
92     real             *charge;
93     int              nvdwtype;
94     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
95     int              *vdwtype;
96     real             *vdwparam;
97     int              vfitab;
98     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
99     real             *vftab;
100
101     x                = xx[0];
102     f                = ff[0];
103
104     nri              = nlist->nri;
105     iinr             = nlist->iinr;
106     jindex           = nlist->jindex;
107     jjnr             = nlist->jjnr;
108     shiftidx         = nlist->shift;
109     gid              = nlist->gid;
110     shiftvec         = fr->shift_vec[0];
111     fshift           = fr->fshift[0];
112     facel            = fr->epsfac;
113     charge           = mdatoms->chargeA;
114     nvdwtype         = fr->ntype;
115     vdwparam         = fr->nbfp;
116     vdwtype          = mdatoms->typeA;
117
118     vftab            = kernel_data->table_vdw->data;
119     vftabscale       = kernel_data->table_vdw->scale;
120
121     /* Setup water-specific parameters */
122     inr              = nlist->iinr[0];
123     iq0              = facel*charge[inr+0];
124     iq1              = facel*charge[inr+1];
125     iq2              = facel*charge[inr+2];
126     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
127
128     jq0              = charge[inr+0];
129     jq1              = charge[inr+1];
130     jq2              = charge[inr+2];
131     vdwjidx0         = 2*vdwtype[inr+0];
132     qq00             = iq0*jq0;
133     c6_00            = vdwparam[vdwioffset0+vdwjidx0];
134     c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
135     qq01             = iq0*jq1;
136     qq02             = iq0*jq2;
137     qq10             = iq1*jq0;
138     qq11             = iq1*jq1;
139     qq12             = iq1*jq2;
140     qq20             = iq2*jq0;
141     qq21             = iq2*jq1;
142     qq22             = iq2*jq2;
143
144     outeriter        = 0;
145     inneriter        = 0;
146
147     /* Start outer loop over neighborlists */
148     for(iidx=0; iidx<nri; iidx++)
149     {
150         /* Load shift vector for this list */
151         i_shift_offset   = DIM*shiftidx[iidx];
152         shX              = shiftvec[i_shift_offset+XX];
153         shY              = shiftvec[i_shift_offset+YY];
154         shZ              = shiftvec[i_shift_offset+ZZ];
155
156         /* Load limits for loop over neighbors */
157         j_index_start    = jindex[iidx];
158         j_index_end      = jindex[iidx+1];
159
160         /* Get outer coordinate index */
161         inr              = iinr[iidx];
162         i_coord_offset   = DIM*inr;
163
164         /* Load i particle coords and add shift vector */
165         ix0              = shX + x[i_coord_offset+DIM*0+XX];
166         iy0              = shY + x[i_coord_offset+DIM*0+YY];
167         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
168         ix1              = shX + x[i_coord_offset+DIM*1+XX];
169         iy1              = shY + x[i_coord_offset+DIM*1+YY];
170         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
171         ix2              = shX + x[i_coord_offset+DIM*2+XX];
172         iy2              = shY + x[i_coord_offset+DIM*2+YY];
173         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
174
175         fix0             = 0.0;
176         fiy0             = 0.0;
177         fiz0             = 0.0;
178         fix1             = 0.0;
179         fiy1             = 0.0;
180         fiz1             = 0.0;
181         fix2             = 0.0;
182         fiy2             = 0.0;
183         fiz2             = 0.0;
184
185         /* Reset potential sums */
186         velecsum         = 0.0;
187         vvdwsum          = 0.0;
188
189         /* Start inner kernel loop */
190         for(jidx=j_index_start; jidx<j_index_end; jidx++)
191         {
192             /* Get j neighbor index, and coordinate index */
193             jnr              = jjnr[jidx];
194             j_coord_offset   = DIM*jnr;
195
196             /* load j atom coordinates */
197             jx0              = x[j_coord_offset+DIM*0+XX];
198             jy0              = x[j_coord_offset+DIM*0+YY];
199             jz0              = x[j_coord_offset+DIM*0+ZZ];
200             jx1              = x[j_coord_offset+DIM*1+XX];
201             jy1              = x[j_coord_offset+DIM*1+YY];
202             jz1              = x[j_coord_offset+DIM*1+ZZ];
203             jx2              = x[j_coord_offset+DIM*2+XX];
204             jy2              = x[j_coord_offset+DIM*2+YY];
205             jz2              = x[j_coord_offset+DIM*2+ZZ];
206
207             /* Calculate displacement vector */
208             dx00             = ix0 - jx0;
209             dy00             = iy0 - jy0;
210             dz00             = iz0 - jz0;
211             dx01             = ix0 - jx1;
212             dy01             = iy0 - jy1;
213             dz01             = iz0 - jz1;
214             dx02             = ix0 - jx2;
215             dy02             = iy0 - jy2;
216             dz02             = iz0 - jz2;
217             dx10             = ix1 - jx0;
218             dy10             = iy1 - jy0;
219             dz10             = iz1 - jz0;
220             dx11             = ix1 - jx1;
221             dy11             = iy1 - jy1;
222             dz11             = iz1 - jz1;
223             dx12             = ix1 - jx2;
224             dy12             = iy1 - jy2;
225             dz12             = iz1 - jz2;
226             dx20             = ix2 - jx0;
227             dy20             = iy2 - jy0;
228             dz20             = iz2 - jz0;
229             dx21             = ix2 - jx1;
230             dy21             = iy2 - jy1;
231             dz21             = iz2 - jz1;
232             dx22             = ix2 - jx2;
233             dy22             = iy2 - jy2;
234             dz22             = iz2 - jz2;
235
236             /* Calculate squared distance and things based on it */
237             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
238             rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
239             rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
240             rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
241             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
242             rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
243             rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
244             rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
245             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
246
247             rinv00           = gmx_invsqrt(rsq00);
248             rinv01           = gmx_invsqrt(rsq01);
249             rinv02           = gmx_invsqrt(rsq02);
250             rinv10           = gmx_invsqrt(rsq10);
251             rinv11           = gmx_invsqrt(rsq11);
252             rinv12           = gmx_invsqrt(rsq12);
253             rinv20           = gmx_invsqrt(rsq20);
254             rinv21           = gmx_invsqrt(rsq21);
255             rinv22           = gmx_invsqrt(rsq22);
256
257             rinvsq00         = rinv00*rinv00;
258             rinvsq01         = rinv01*rinv01;
259             rinvsq02         = rinv02*rinv02;
260             rinvsq10         = rinv10*rinv10;
261             rinvsq11         = rinv11*rinv11;
262             rinvsq12         = rinv12*rinv12;
263             rinvsq20         = rinv20*rinv20;
264             rinvsq21         = rinv21*rinv21;
265             rinvsq22         = rinv22*rinv22;
266
267             /**************************
268              * CALCULATE INTERACTIONS *
269              **************************/
270
271             r00              = rsq00*rinv00;
272
273             /* Calculate table index by multiplying r with table scale and truncate to integer */
274             rt               = r00*vftabscale;
275             vfitab           = rt;
276             vfeps            = rt-vfitab;
277             vfitab           = 2*4*vfitab;
278
279             /* COULOMB ELECTROSTATICS */
280             velec            = qq00*rinv00;
281             felec            = velec*rinvsq00;
282
283             /* CUBIC SPLINE TABLE DISPERSION */
284             vfitab          += 0;
285             Y                = vftab[vfitab];
286             F                = vftab[vfitab+1];
287             Geps             = vfeps*vftab[vfitab+2];
288             Heps2            = vfeps*vfeps*vftab[vfitab+3];
289             Fp               = F+Geps+Heps2;
290             VV               = Y+vfeps*Fp;
291             vvdw6            = c6_00*VV;
292             FF               = Fp+Geps+2.0*Heps2;
293             fvdw6            = c6_00*FF;
294
295             /* CUBIC SPLINE TABLE REPULSION */
296             Y                = vftab[vfitab+4];
297             F                = vftab[vfitab+5];
298             Geps             = vfeps*vftab[vfitab+6];
299             Heps2            = vfeps*vfeps*vftab[vfitab+7];
300             Fp               = F+Geps+Heps2;
301             VV               = Y+vfeps*Fp;
302             vvdw12           = c12_00*VV;
303             FF               = Fp+Geps+2.0*Heps2;
304             fvdw12           = c12_00*FF;
305             vvdw             = vvdw12+vvdw6;
306             fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
307
308             /* Update potential sums from outer loop */
309             velecsum        += velec;
310             vvdwsum         += vvdw;
311
312             fscal            = felec+fvdw;
313
314             /* Calculate temporary vectorial force */
315             tx               = fscal*dx00;
316             ty               = fscal*dy00;
317             tz               = fscal*dz00;
318
319             /* Update vectorial force */
320             fix0            += tx;
321             fiy0            += ty;
322             fiz0            += tz;
323             f[j_coord_offset+DIM*0+XX] -= tx;
324             f[j_coord_offset+DIM*0+YY] -= ty;
325             f[j_coord_offset+DIM*0+ZZ] -= tz;
326
327             /**************************
328              * CALCULATE INTERACTIONS *
329              **************************/
330
331             /* COULOMB ELECTROSTATICS */
332             velec            = qq01*rinv01;
333             felec            = velec*rinvsq01;
334
335             /* Update potential sums from outer loop */
336             velecsum        += velec;
337
338             fscal            = felec;
339
340             /* Calculate temporary vectorial force */
341             tx               = fscal*dx01;
342             ty               = fscal*dy01;
343             tz               = fscal*dz01;
344
345             /* Update vectorial force */
346             fix0            += tx;
347             fiy0            += ty;
348             fiz0            += tz;
349             f[j_coord_offset+DIM*1+XX] -= tx;
350             f[j_coord_offset+DIM*1+YY] -= ty;
351             f[j_coord_offset+DIM*1+ZZ] -= tz;
352
353             /**************************
354              * CALCULATE INTERACTIONS *
355              **************************/
356
357             /* COULOMB ELECTROSTATICS */
358             velec            = qq02*rinv02;
359             felec            = velec*rinvsq02;
360
361             /* Update potential sums from outer loop */
362             velecsum        += velec;
363
364             fscal            = felec;
365
366             /* Calculate temporary vectorial force */
367             tx               = fscal*dx02;
368             ty               = fscal*dy02;
369             tz               = fscal*dz02;
370
371             /* Update vectorial force */
372             fix0            += tx;
373             fiy0            += ty;
374             fiz0            += tz;
375             f[j_coord_offset+DIM*2+XX] -= tx;
376             f[j_coord_offset+DIM*2+YY] -= ty;
377             f[j_coord_offset+DIM*2+ZZ] -= tz;
378
379             /**************************
380              * CALCULATE INTERACTIONS *
381              **************************/
382
383             /* COULOMB ELECTROSTATICS */
384             velec            = qq10*rinv10;
385             felec            = velec*rinvsq10;
386
387             /* Update potential sums from outer loop */
388             velecsum        += velec;
389
390             fscal            = felec;
391
392             /* Calculate temporary vectorial force */
393             tx               = fscal*dx10;
394             ty               = fscal*dy10;
395             tz               = fscal*dz10;
396
397             /* Update vectorial force */
398             fix1            += tx;
399             fiy1            += ty;
400             fiz1            += tz;
401             f[j_coord_offset+DIM*0+XX] -= tx;
402             f[j_coord_offset+DIM*0+YY] -= ty;
403             f[j_coord_offset+DIM*0+ZZ] -= tz;
404
405             /**************************
406              * CALCULATE INTERACTIONS *
407              **************************/
408
409             /* COULOMB ELECTROSTATICS */
410             velec            = qq11*rinv11;
411             felec            = velec*rinvsq11;
412
413             /* Update potential sums from outer loop */
414             velecsum        += velec;
415
416             fscal            = felec;
417
418             /* Calculate temporary vectorial force */
419             tx               = fscal*dx11;
420             ty               = fscal*dy11;
421             tz               = fscal*dz11;
422
423             /* Update vectorial force */
424             fix1            += tx;
425             fiy1            += ty;
426             fiz1            += tz;
427             f[j_coord_offset+DIM*1+XX] -= tx;
428             f[j_coord_offset+DIM*1+YY] -= ty;
429             f[j_coord_offset+DIM*1+ZZ] -= tz;
430
431             /**************************
432              * CALCULATE INTERACTIONS *
433              **************************/
434
435             /* COULOMB ELECTROSTATICS */
436             velec            = qq12*rinv12;
437             felec            = velec*rinvsq12;
438
439             /* Update potential sums from outer loop */
440             velecsum        += velec;
441
442             fscal            = felec;
443
444             /* Calculate temporary vectorial force */
445             tx               = fscal*dx12;
446             ty               = fscal*dy12;
447             tz               = fscal*dz12;
448
449             /* Update vectorial force */
450             fix1            += tx;
451             fiy1            += ty;
452             fiz1            += tz;
453             f[j_coord_offset+DIM*2+XX] -= tx;
454             f[j_coord_offset+DIM*2+YY] -= ty;
455             f[j_coord_offset+DIM*2+ZZ] -= tz;
456
457             /**************************
458              * CALCULATE INTERACTIONS *
459              **************************/
460
461             /* COULOMB ELECTROSTATICS */
462             velec            = qq20*rinv20;
463             felec            = velec*rinvsq20;
464
465             /* Update potential sums from outer loop */
466             velecsum        += velec;
467
468             fscal            = felec;
469
470             /* Calculate temporary vectorial force */
471             tx               = fscal*dx20;
472             ty               = fscal*dy20;
473             tz               = fscal*dz20;
474
475             /* Update vectorial force */
476             fix2            += tx;
477             fiy2            += ty;
478             fiz2            += tz;
479             f[j_coord_offset+DIM*0+XX] -= tx;
480             f[j_coord_offset+DIM*0+YY] -= ty;
481             f[j_coord_offset+DIM*0+ZZ] -= tz;
482
483             /**************************
484              * CALCULATE INTERACTIONS *
485              **************************/
486
487             /* COULOMB ELECTROSTATICS */
488             velec            = qq21*rinv21;
489             felec            = velec*rinvsq21;
490
491             /* Update potential sums from outer loop */
492             velecsum        += velec;
493
494             fscal            = felec;
495
496             /* Calculate temporary vectorial force */
497             tx               = fscal*dx21;
498             ty               = fscal*dy21;
499             tz               = fscal*dz21;
500
501             /* Update vectorial force */
502             fix2            += tx;
503             fiy2            += ty;
504             fiz2            += tz;
505             f[j_coord_offset+DIM*1+XX] -= tx;
506             f[j_coord_offset+DIM*1+YY] -= ty;
507             f[j_coord_offset+DIM*1+ZZ] -= tz;
508
509             /**************************
510              * CALCULATE INTERACTIONS *
511              **************************/
512
513             /* COULOMB ELECTROSTATICS */
514             velec            = qq22*rinv22;
515             felec            = velec*rinvsq22;
516
517             /* Update potential sums from outer loop */
518             velecsum        += velec;
519
520             fscal            = felec;
521
522             /* Calculate temporary vectorial force */
523             tx               = fscal*dx22;
524             ty               = fscal*dy22;
525             tz               = fscal*dz22;
526
527             /* Update vectorial force */
528             fix2            += tx;
529             fiy2            += ty;
530             fiz2            += tz;
531             f[j_coord_offset+DIM*2+XX] -= tx;
532             f[j_coord_offset+DIM*2+YY] -= ty;
533             f[j_coord_offset+DIM*2+ZZ] -= tz;
534
535             /* Inner loop uses 277 flops */
536         }
537         /* End of innermost loop */
538
539         tx = ty = tz = 0;
540         f[i_coord_offset+DIM*0+XX] += fix0;
541         f[i_coord_offset+DIM*0+YY] += fiy0;
542         f[i_coord_offset+DIM*0+ZZ] += fiz0;
543         tx                         += fix0;
544         ty                         += fiy0;
545         tz                         += fiz0;
546         f[i_coord_offset+DIM*1+XX] += fix1;
547         f[i_coord_offset+DIM*1+YY] += fiy1;
548         f[i_coord_offset+DIM*1+ZZ] += fiz1;
549         tx                         += fix1;
550         ty                         += fiy1;
551         tz                         += fiz1;
552         f[i_coord_offset+DIM*2+XX] += fix2;
553         f[i_coord_offset+DIM*2+YY] += fiy2;
554         f[i_coord_offset+DIM*2+ZZ] += fiz2;
555         tx                         += fix2;
556         ty                         += fiy2;
557         tz                         += fiz2;
558         fshift[i_shift_offset+XX]  += tx;
559         fshift[i_shift_offset+YY]  += ty;
560         fshift[i_shift_offset+ZZ]  += tz;
561
562         ggid                        = gid[iidx];
563         /* Update potential energies */
564         kernel_data->energygrp_elec[ggid] += velecsum;
565         kernel_data->energygrp_vdw[ggid] += vvdwsum;
566
567         /* Increment number of inner iterations */
568         inneriter                  += j_index_end - j_index_start;
569
570         /* Outer loop uses 32 flops */
571     }
572
573     /* Increment number of outer iterations */
574     outeriter        += nri;
575
576     /* Update outer/inner flops */
577
578     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*32 + inneriter*277);
579 }
580 /*
581  * Gromacs nonbonded kernel:   nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c
582  * Electrostatics interaction: Coulomb
583  * VdW interaction:            CubicSplineTable
584  * Geometry:                   Water3-Water3
585  * Calculate force/pot:        Force
586  */
587 void
588 nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_c
589                     (t_nblist                    * gmx_restrict       nlist,
590                      rvec                        * gmx_restrict          xx,
591                      rvec                        * gmx_restrict          ff,
592                      t_forcerec                  * gmx_restrict          fr,
593                      t_mdatoms                   * gmx_restrict     mdatoms,
594                      nb_kernel_data_t gmx_unused * gmx_restrict kernel_data,
595                      t_nrnb                      * gmx_restrict        nrnb)
596 {
597     int              i_shift_offset,i_coord_offset,j_coord_offset;
598     int              j_index_start,j_index_end;
599     int              nri,inr,ggid,iidx,jidx,jnr,outeriter,inneriter;
600     real             shX,shY,shZ,tx,ty,tz,fscal,rcutoff,rcutoff2;
601     int              *iinr,*jindex,*jjnr,*shiftidx,*gid;
602     real             *shiftvec,*fshift,*x,*f;
603     int              vdwioffset0;
604     real             ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0;
605     int              vdwioffset1;
606     real             ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1;
607     int              vdwioffset2;
608     real             ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2;
609     int              vdwjidx0;
610     real             jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0;
611     int              vdwjidx1;
612     real             jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1;
613     int              vdwjidx2;
614     real             jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2;
615     real             dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00,cexp1_00,cexp2_00;
616     real             dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01,cexp1_01,cexp2_01;
617     real             dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02,cexp1_02,cexp2_02;
618     real             dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10,cexp1_10,cexp2_10;
619     real             dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11,cexp1_11,cexp2_11;
620     real             dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12,cexp1_12,cexp2_12;
621     real             dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20,cexp1_20,cexp2_20;
622     real             dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21,cexp1_21,cexp2_21;
623     real             dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22,cexp1_22,cexp2_22;
624     real             velec,felec,velecsum,facel,crf,krf,krf2;
625     real             *charge;
626     int              nvdwtype;
627     real             rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,br,vvdwexp,sh_vdw_invrcut6;
628     int              *vdwtype;
629     real             *vdwparam;
630     int              vfitab;
631     real             rt,vfeps,vftabscale,Y,F,Geps,Heps2,Fp,VV,FF;
632     real             *vftab;
633
634     x                = xx[0];
635     f                = ff[0];
636
637     nri              = nlist->nri;
638     iinr             = nlist->iinr;
639     jindex           = nlist->jindex;
640     jjnr             = nlist->jjnr;
641     shiftidx         = nlist->shift;
642     gid              = nlist->gid;
643     shiftvec         = fr->shift_vec[0];
644     fshift           = fr->fshift[0];
645     facel            = fr->epsfac;
646     charge           = mdatoms->chargeA;
647     nvdwtype         = fr->ntype;
648     vdwparam         = fr->nbfp;
649     vdwtype          = mdatoms->typeA;
650
651     vftab            = kernel_data->table_vdw->data;
652     vftabscale       = kernel_data->table_vdw->scale;
653
654     /* Setup water-specific parameters */
655     inr              = nlist->iinr[0];
656     iq0              = facel*charge[inr+0];
657     iq1              = facel*charge[inr+1];
658     iq2              = facel*charge[inr+2];
659     vdwioffset0      = 2*nvdwtype*vdwtype[inr+0];
660
661     jq0              = charge[inr+0];
662     jq1              = charge[inr+1];
663     jq2              = charge[inr+2];
664     vdwjidx0         = 2*vdwtype[inr+0];
665     qq00             = iq0*jq0;
666     c6_00            = vdwparam[vdwioffset0+vdwjidx0];
667     c12_00           = vdwparam[vdwioffset0+vdwjidx0+1];
668     qq01             = iq0*jq1;
669     qq02             = iq0*jq2;
670     qq10             = iq1*jq0;
671     qq11             = iq1*jq1;
672     qq12             = iq1*jq2;
673     qq20             = iq2*jq0;
674     qq21             = iq2*jq1;
675     qq22             = iq2*jq2;
676
677     outeriter        = 0;
678     inneriter        = 0;
679
680     /* Start outer loop over neighborlists */
681     for(iidx=0; iidx<nri; iidx++)
682     {
683         /* Load shift vector for this list */
684         i_shift_offset   = DIM*shiftidx[iidx];
685         shX              = shiftvec[i_shift_offset+XX];
686         shY              = shiftvec[i_shift_offset+YY];
687         shZ              = shiftvec[i_shift_offset+ZZ];
688
689         /* Load limits for loop over neighbors */
690         j_index_start    = jindex[iidx];
691         j_index_end      = jindex[iidx+1];
692
693         /* Get outer coordinate index */
694         inr              = iinr[iidx];
695         i_coord_offset   = DIM*inr;
696
697         /* Load i particle coords and add shift vector */
698         ix0              = shX + x[i_coord_offset+DIM*0+XX];
699         iy0              = shY + x[i_coord_offset+DIM*0+YY];
700         iz0              = shZ + x[i_coord_offset+DIM*0+ZZ];
701         ix1              = shX + x[i_coord_offset+DIM*1+XX];
702         iy1              = shY + x[i_coord_offset+DIM*1+YY];
703         iz1              = shZ + x[i_coord_offset+DIM*1+ZZ];
704         ix2              = shX + x[i_coord_offset+DIM*2+XX];
705         iy2              = shY + x[i_coord_offset+DIM*2+YY];
706         iz2              = shZ + x[i_coord_offset+DIM*2+ZZ];
707
708         fix0             = 0.0;
709         fiy0             = 0.0;
710         fiz0             = 0.0;
711         fix1             = 0.0;
712         fiy1             = 0.0;
713         fiz1             = 0.0;
714         fix2             = 0.0;
715         fiy2             = 0.0;
716         fiz2             = 0.0;
717
718         /* Start inner kernel loop */
719         for(jidx=j_index_start; jidx<j_index_end; jidx++)
720         {
721             /* Get j neighbor index, and coordinate index */
722             jnr              = jjnr[jidx];
723             j_coord_offset   = DIM*jnr;
724
725             /* load j atom coordinates */
726             jx0              = x[j_coord_offset+DIM*0+XX];
727             jy0              = x[j_coord_offset+DIM*0+YY];
728             jz0              = x[j_coord_offset+DIM*0+ZZ];
729             jx1              = x[j_coord_offset+DIM*1+XX];
730             jy1              = x[j_coord_offset+DIM*1+YY];
731             jz1              = x[j_coord_offset+DIM*1+ZZ];
732             jx2              = x[j_coord_offset+DIM*2+XX];
733             jy2              = x[j_coord_offset+DIM*2+YY];
734             jz2              = x[j_coord_offset+DIM*2+ZZ];
735
736             /* Calculate displacement vector */
737             dx00             = ix0 - jx0;
738             dy00             = iy0 - jy0;
739             dz00             = iz0 - jz0;
740             dx01             = ix0 - jx1;
741             dy01             = iy0 - jy1;
742             dz01             = iz0 - jz1;
743             dx02             = ix0 - jx2;
744             dy02             = iy0 - jy2;
745             dz02             = iz0 - jz2;
746             dx10             = ix1 - jx0;
747             dy10             = iy1 - jy0;
748             dz10             = iz1 - jz0;
749             dx11             = ix1 - jx1;
750             dy11             = iy1 - jy1;
751             dz11             = iz1 - jz1;
752             dx12             = ix1 - jx2;
753             dy12             = iy1 - jy2;
754             dz12             = iz1 - jz2;
755             dx20             = ix2 - jx0;
756             dy20             = iy2 - jy0;
757             dz20             = iz2 - jz0;
758             dx21             = ix2 - jx1;
759             dy21             = iy2 - jy1;
760             dz21             = iz2 - jz1;
761             dx22             = ix2 - jx2;
762             dy22             = iy2 - jy2;
763             dz22             = iz2 - jz2;
764
765             /* Calculate squared distance and things based on it */
766             rsq00            = dx00*dx00+dy00*dy00+dz00*dz00;
767             rsq01            = dx01*dx01+dy01*dy01+dz01*dz01;
768             rsq02            = dx02*dx02+dy02*dy02+dz02*dz02;
769             rsq10            = dx10*dx10+dy10*dy10+dz10*dz10;
770             rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
771             rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
772             rsq20            = dx20*dx20+dy20*dy20+dz20*dz20;
773             rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
774             rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
775
776             rinv00           = gmx_invsqrt(rsq00);
777             rinv01           = gmx_invsqrt(rsq01);
778             rinv02           = gmx_invsqrt(rsq02);
779             rinv10           = gmx_invsqrt(rsq10);
780             rinv11           = gmx_invsqrt(rsq11);
781             rinv12           = gmx_invsqrt(rsq12);
782             rinv20           = gmx_invsqrt(rsq20);
783             rinv21           = gmx_invsqrt(rsq21);
784             rinv22           = gmx_invsqrt(rsq22);
785
786             rinvsq00         = rinv00*rinv00;
787             rinvsq01         = rinv01*rinv01;
788             rinvsq02         = rinv02*rinv02;
789             rinvsq10         = rinv10*rinv10;
790             rinvsq11         = rinv11*rinv11;
791             rinvsq12         = rinv12*rinv12;
792             rinvsq20         = rinv20*rinv20;
793             rinvsq21         = rinv21*rinv21;
794             rinvsq22         = rinv22*rinv22;
795
796             /**************************
797              * CALCULATE INTERACTIONS *
798              **************************/
799
800             r00              = rsq00*rinv00;
801
802             /* Calculate table index by multiplying r with table scale and truncate to integer */
803             rt               = r00*vftabscale;
804             vfitab           = rt;
805             vfeps            = rt-vfitab;
806             vfitab           = 2*4*vfitab;
807
808             /* COULOMB ELECTROSTATICS */
809             velec            = qq00*rinv00;
810             felec            = velec*rinvsq00;
811
812             /* CUBIC SPLINE TABLE DISPERSION */
813             vfitab          += 0;
814             F                = vftab[vfitab+1];
815             Geps             = vfeps*vftab[vfitab+2];
816             Heps2            = vfeps*vfeps*vftab[vfitab+3];
817             Fp               = F+Geps+Heps2;
818             FF               = Fp+Geps+2.0*Heps2;
819             fvdw6            = c6_00*FF;
820
821             /* CUBIC SPLINE TABLE REPULSION */
822             F                = vftab[vfitab+5];
823             Geps             = vfeps*vftab[vfitab+6];
824             Heps2            = vfeps*vfeps*vftab[vfitab+7];
825             Fp               = F+Geps+Heps2;
826             FF               = Fp+Geps+2.0*Heps2;
827             fvdw12           = c12_00*FF;
828             fvdw             = -(fvdw6+fvdw12)*vftabscale*rinv00;
829
830             fscal            = felec+fvdw;
831
832             /* Calculate temporary vectorial force */
833             tx               = fscal*dx00;
834             ty               = fscal*dy00;
835             tz               = fscal*dz00;
836
837             /* Update vectorial force */
838             fix0            += tx;
839             fiy0            += ty;
840             fiz0            += tz;
841             f[j_coord_offset+DIM*0+XX] -= tx;
842             f[j_coord_offset+DIM*0+YY] -= ty;
843             f[j_coord_offset+DIM*0+ZZ] -= tz;
844
845             /**************************
846              * CALCULATE INTERACTIONS *
847              **************************/
848
849             /* COULOMB ELECTROSTATICS */
850             velec            = qq01*rinv01;
851             felec            = velec*rinvsq01;
852
853             fscal            = felec;
854
855             /* Calculate temporary vectorial force */
856             tx               = fscal*dx01;
857             ty               = fscal*dy01;
858             tz               = fscal*dz01;
859
860             /* Update vectorial force */
861             fix0            += tx;
862             fiy0            += ty;
863             fiz0            += tz;
864             f[j_coord_offset+DIM*1+XX] -= tx;
865             f[j_coord_offset+DIM*1+YY] -= ty;
866             f[j_coord_offset+DIM*1+ZZ] -= tz;
867
868             /**************************
869              * CALCULATE INTERACTIONS *
870              **************************/
871
872             /* COULOMB ELECTROSTATICS */
873             velec            = qq02*rinv02;
874             felec            = velec*rinvsq02;
875
876             fscal            = felec;
877
878             /* Calculate temporary vectorial force */
879             tx               = fscal*dx02;
880             ty               = fscal*dy02;
881             tz               = fscal*dz02;
882
883             /* Update vectorial force */
884             fix0            += tx;
885             fiy0            += ty;
886             fiz0            += tz;
887             f[j_coord_offset+DIM*2+XX] -= tx;
888             f[j_coord_offset+DIM*2+YY] -= ty;
889             f[j_coord_offset+DIM*2+ZZ] -= tz;
890
891             /**************************
892              * CALCULATE INTERACTIONS *
893              **************************/
894
895             /* COULOMB ELECTROSTATICS */
896             velec            = qq10*rinv10;
897             felec            = velec*rinvsq10;
898
899             fscal            = felec;
900
901             /* Calculate temporary vectorial force */
902             tx               = fscal*dx10;
903             ty               = fscal*dy10;
904             tz               = fscal*dz10;
905
906             /* Update vectorial force */
907             fix1            += tx;
908             fiy1            += ty;
909             fiz1            += tz;
910             f[j_coord_offset+DIM*0+XX] -= tx;
911             f[j_coord_offset+DIM*0+YY] -= ty;
912             f[j_coord_offset+DIM*0+ZZ] -= tz;
913
914             /**************************
915              * CALCULATE INTERACTIONS *
916              **************************/
917
918             /* COULOMB ELECTROSTATICS */
919             velec            = qq11*rinv11;
920             felec            = velec*rinvsq11;
921
922             fscal            = felec;
923
924             /* Calculate temporary vectorial force */
925             tx               = fscal*dx11;
926             ty               = fscal*dy11;
927             tz               = fscal*dz11;
928
929             /* Update vectorial force */
930             fix1            += tx;
931             fiy1            += ty;
932             fiz1            += tz;
933             f[j_coord_offset+DIM*1+XX] -= tx;
934             f[j_coord_offset+DIM*1+YY] -= ty;
935             f[j_coord_offset+DIM*1+ZZ] -= tz;
936
937             /**************************
938              * CALCULATE INTERACTIONS *
939              **************************/
940
941             /* COULOMB ELECTROSTATICS */
942             velec            = qq12*rinv12;
943             felec            = velec*rinvsq12;
944
945             fscal            = felec;
946
947             /* Calculate temporary vectorial force */
948             tx               = fscal*dx12;
949             ty               = fscal*dy12;
950             tz               = fscal*dz12;
951
952             /* Update vectorial force */
953             fix1            += tx;
954             fiy1            += ty;
955             fiz1            += tz;
956             f[j_coord_offset+DIM*2+XX] -= tx;
957             f[j_coord_offset+DIM*2+YY] -= ty;
958             f[j_coord_offset+DIM*2+ZZ] -= tz;
959
960             /**************************
961              * CALCULATE INTERACTIONS *
962              **************************/
963
964             /* COULOMB ELECTROSTATICS */
965             velec            = qq20*rinv20;
966             felec            = velec*rinvsq20;
967
968             fscal            = felec;
969
970             /* Calculate temporary vectorial force */
971             tx               = fscal*dx20;
972             ty               = fscal*dy20;
973             tz               = fscal*dz20;
974
975             /* Update vectorial force */
976             fix2            += tx;
977             fiy2            += ty;
978             fiz2            += tz;
979             f[j_coord_offset+DIM*0+XX] -= tx;
980             f[j_coord_offset+DIM*0+YY] -= ty;
981             f[j_coord_offset+DIM*0+ZZ] -= tz;
982
983             /**************************
984              * CALCULATE INTERACTIONS *
985              **************************/
986
987             /* COULOMB ELECTROSTATICS */
988             velec            = qq21*rinv21;
989             felec            = velec*rinvsq21;
990
991             fscal            = felec;
992
993             /* Calculate temporary vectorial force */
994             tx               = fscal*dx21;
995             ty               = fscal*dy21;
996             tz               = fscal*dz21;
997
998             /* Update vectorial force */
999             fix2            += tx;
1000             fiy2            += ty;
1001             fiz2            += tz;
1002             f[j_coord_offset+DIM*1+XX] -= tx;
1003             f[j_coord_offset+DIM*1+YY] -= ty;
1004             f[j_coord_offset+DIM*1+ZZ] -= tz;
1005
1006             /**************************
1007              * CALCULATE INTERACTIONS *
1008              **************************/
1009
1010             /* COULOMB ELECTROSTATICS */
1011             velec            = qq22*rinv22;
1012             felec            = velec*rinvsq22;
1013
1014             fscal            = felec;
1015
1016             /* Calculate temporary vectorial force */
1017             tx               = fscal*dx22;
1018             ty               = fscal*dy22;
1019             tz               = fscal*dz22;
1020
1021             /* Update vectorial force */
1022             fix2            += tx;
1023             fiy2            += ty;
1024             fiz2            += tz;
1025             f[j_coord_offset+DIM*2+XX] -= tx;
1026             f[j_coord_offset+DIM*2+YY] -= ty;
1027             f[j_coord_offset+DIM*2+ZZ] -= tz;
1028
1029             /* Inner loop uses 260 flops */
1030         }
1031         /* End of innermost loop */
1032
1033         tx = ty = tz = 0;
1034         f[i_coord_offset+DIM*0+XX] += fix0;
1035         f[i_coord_offset+DIM*0+YY] += fiy0;
1036         f[i_coord_offset+DIM*0+ZZ] += fiz0;
1037         tx                         += fix0;
1038         ty                         += fiy0;
1039         tz                         += fiz0;
1040         f[i_coord_offset+DIM*1+XX] += fix1;
1041         f[i_coord_offset+DIM*1+YY] += fiy1;
1042         f[i_coord_offset+DIM*1+ZZ] += fiz1;
1043         tx                         += fix1;
1044         ty                         += fiy1;
1045         tz                         += fiz1;
1046         f[i_coord_offset+DIM*2+XX] += fix2;
1047         f[i_coord_offset+DIM*2+YY] += fiy2;
1048         f[i_coord_offset+DIM*2+ZZ] += fiz2;
1049         tx                         += fix2;
1050         ty                         += fiy2;
1051         tz                         += fiz2;
1052         fshift[i_shift_offset+XX]  += tx;
1053         fshift[i_shift_offset+YY]  += ty;
1054         fshift[i_shift_offset+ZZ]  += tz;
1055
1056         /* Increment number of inner iterations */
1057         inneriter                  += j_index_end - j_index_start;
1058
1059         /* Outer loop uses 30 flops */
1060     }
1061
1062     /* Increment number of outer iterations */
1063     outeriter        += nri;
1064
1065     /* Update outer/inner flops */
1066
1067     inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_F,outeriter*30 + inneriter*260);
1068 }