Merge remote-tracking branch 'gerrit/release-4-6'
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_sse2_single / nb_kernel332_sse2_single.c
1 /*
2  *                This source code is part of
3  *
4  *                 G   R   O   M   A   C   S
5  *
6  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
7  * Copyright (c) 2001-2009, The GROMACS Development Team
8  *
9  * Gromacs is a library for molecular simulation and trajectory analysis,
10  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
11  * a full list of developers and information, check out http://www.gromacs.org
12  *
13  * This program is free software; you can redistribute it and/or modify it under 
14  * the terms of the GNU Lesser General Public License as published by the Free 
15  * Software Foundation; either version 2 of the License, or (at your option) any 
16  * later version.
17  * As a special exception, you may use this file as part of a free software
18  * library without restriction.  Specifically, if other files instantiate
19  * templates or use macros or inline functions from this file, or you compile
20  * this file and link it with other files to produce an executable, this
21  * file does not by itself cause the resulting executable to be covered by
22  * the GNU Lesser General Public License.  
23  *
24  * In plain-speak: do not worry about classes/macros/templates either - only
25  * changes to the library have to be LGPL, not an application linking with it.
26  *
27  * To help fund GROMACS development, we humbly ask that you cite
28  * the papers people have written on it - you can find them on the website!
29  */
30 #if 0
31 #ifdef HAVE_CONFIG_H
32 #include <config.h>
33 #endif
34
35
36 #include <emmintrin.h>
37
38
39 /* #include "sse_common_single.h" */
40
41 /*
42  * Gromacs nonbonded kernel nb_kernel332
43  * Coulomb interaction:     Tabulated
44  * VdW interaction:         Tabulated
45  * water optimization:      pairs of SPC/TIP3P interactions
46  * Calculate forces:        yes
47  */
48 void nb_kernel332_sse2_single(
49                     int *           p_nri,
50                     int *           iinr,
51                     int *           jindex,
52                     int *           jjnr,
53                     int *           shift,
54                     float *         shiftvec,
55                     float *         fshift,
56                     int *           gid,
57                     float *         pos,
58                     float *         faction,
59                     float *         charge,
60                     float *         p_facel,
61                     float *         p_krf,
62                     float *         p_crf,
63                     float *         Vc,
64                     int *           type,
65                     int *           p_ntype,
66                     float *         vdwparam,
67                     float *         Vvdw,
68                     float *         p_tabscale,
69                     float *         VFtab,
70                     float *         invsqrta,
71                     float *         dvda,
72                     float *         p_gbtabscale,
73                     float *         GBtab,
74                     int *           p_nthreads,
75                     int *           count,
76                     void *          mtx,
77                     int *           outeriter,
78                     int *           inneriter,
79                     float *         work)
80 {
81     int           nri,ntype,nthreads;
82     float         facel,krf,crf,tabscale,gbtabscale;
83     int           n,ii,is3,ii3,k,nj0,nj1,jnr,j3,ggid;
84     int           nn0,nn1,nouter,ninner;
85     float         shX,shY,shZ;
86     float         fscal,tx,ty,tz;
87     float         qq,vcoul,vctot;
88     int           tj;
89     float         Vvdw6,Vvdwtot;
90     float         Vvdw12;
91     float         r,rt,eps,eps2;
92     int           n0,nnn;
93     float         Y,F,Geps,Heps2,Fp,VV;
94     float         FF;
95     float         fijC;
96     float         fijD,fijR;
97     float         ix1,iy1,iz1,fix1,fiy1,fiz1;
98     float         ix2,iy2,iz2,fix2,fiy2,fiz2;
99     float         ix3,iy3,iz3,fix3,fiy3,fiz3;
100     float         jx1,jy1,jz1,fjx1,fjy1,fjz1;
101     float         jx2,jy2,jz2,fjx2,fjy2,fjz2;
102     float         jx3,jy3,jz3,fjx3,fjy3,fjz3;
103     float         dx11,dy11,dz11,rsq11,rinv11;
104     float         dx12,dy12,dz12,rsq12,rinv12;
105     float         dx13,dy13,dz13,rsq13,rinv13;
106     float         dx21,dy21,dz21,rsq21,rinv21;
107     float         dx22,dy22,dz22,rsq22,rinv22;
108     float         dx23,dy23,dz23,rsq23,rinv23;
109     float         dx31,dy31,dz31,rsq31,rinv31;
110     float         dx32,dy32,dz32,rsq32,rinv32;
111     float         dx33,dy33,dz33,rsq33,rinv33;
112     float         qO,qH,qqOO,qqOH,qqHH;
113     float         c6,c12;
114
115     nri              = *p_nri;         
116     ntype            = *p_ntype;       
117     nthreads         = *p_nthreads;    
118     facel            = *p_facel;       
119     krf              = *p_krf;         
120     crf              = *p_crf;         
121     tabscale         = *p_tabscale;    
122
123     /* Initialize water data */
124     ii               = iinr[0];        
125     qO               = charge[ii];     
126     qH               = charge[ii+1];   
127     qqOO             = facel*qO*qO;    
128     qqOH             = facel*qO*qH;    
129     qqHH             = facel*qH*qH;    
130     tj               = 2*(ntype+1)*type[ii];
131     c6               = vdwparam[tj];   
132     c12              = vdwparam[tj+1]; 
133
134
135     /* Reset outer and inner iteration counters */
136     nouter           = 0;              
137     ninner           = 0;              
138
139     /* Loop over thread workunits */
140     
141     do
142     {
143 #ifdef GMX_THREAD_SHM_FDECOMP
144         tMPI_Thread_mutex_lock((tMPI_Thread_mutex_t *)mtx);
145         nn0              = *count;         
146                 
147         /* Take successively smaller chunks (at least 10 lists) */
148         nn1              = nn0+(nri-nn0)/(2*nthreads)+10;
149         *count           = nn1;            
150         tMPI_Thread_mutex_unlock((tMPI_Thread_mutex_t *)mtx);
151         if(nn1>nri) nn1=nri;
152 #else
153             nn0 = 0;
154                 nn1 = nri;
155 #endif
156         /* Start outer loop over neighborlists */
157         
158         for(n=nn0; (n<nn1); n++)
159         {
160
161             /* Load shift vector for this list */
162             is3              = 3*shift[n];     
163             shX              = shiftvec[is3];  
164             shY              = shiftvec[is3+1];
165             shZ              = shiftvec[is3+2];
166
167             /* Load limits for loop over neighbors */
168             nj0              = jindex[n];      
169             nj1              = jindex[n+1];    
170
171             /* Get outer coordinate index */
172             ii               = iinr[n];        
173             ii3              = 3*ii;           
174
175             /* Load i atom data, add shift vector */
176             ix1              = shX + pos[ii3+0];
177             iy1              = shY + pos[ii3+1];
178             iz1              = shZ + pos[ii3+2];
179             ix2              = shX + pos[ii3+3];
180             iy2              = shY + pos[ii3+4];
181             iz2              = shZ + pos[ii3+5];
182             ix3              = shX + pos[ii3+6];
183             iy3              = shY + pos[ii3+7];
184             iz3              = shZ + pos[ii3+8];
185
186             /* Zero the potential energy for this list */
187             vctot            = 0;              
188             Vvdwtot          = 0;              
189
190             /* Clear i atom forces */
191             fix1             = 0;              
192             fiy1             = 0;              
193             fiz1             = 0;              
194             fix2             = 0;              
195             fiy2             = 0;              
196             fiz2             = 0;              
197             fix3             = 0;              
198             fiy3             = 0;              
199             fiz3             = 0;              
200             
201             for(k=nj0; (k<nj1); k++)
202             {
203
204                 /* Get j neighbor index, and coordinate index */
205                 jnr              = jjnr[k];        
206                 j3               = 3*jnr;          
207
208                 /* load j atom coordinates */
209                 jx1              = pos[j3+0];      
210                 jy1              = pos[j3+1];      
211                 jz1              = pos[j3+2];      
212                 jx2              = pos[j3+3];      
213                 jy2              = pos[j3+4];      
214                 jz2              = pos[j3+5];      
215                 jx3              = pos[j3+6];      
216                 jy3              = pos[j3+7];      
217                 jz3              = pos[j3+8];      
218
219                 /* Calculate distance */
220                 dx11             = ix1 - jx1;      
221                 dy11             = iy1 - jy1;      
222                 dz11             = iz1 - jz1;      
223                 rsq11            = dx11*dx11+dy11*dy11+dz11*dz11;
224                 dx12             = ix1 - jx2;      
225                 dy12             = iy1 - jy2;      
226                 dz12             = iz1 - jz2;      
227                 rsq12            = dx12*dx12+dy12*dy12+dz12*dz12;
228                 dx13             = ix1 - jx3;      
229                 dy13             = iy1 - jy3;      
230                 dz13             = iz1 - jz3;      
231                 rsq13            = dx13*dx13+dy13*dy13+dz13*dz13;
232                 dx21             = ix2 - jx1;      
233                 dy21             = iy2 - jy1;      
234                 dz21             = iz2 - jz1;      
235                 rsq21            = dx21*dx21+dy21*dy21+dz21*dz21;
236                 dx22             = ix2 - jx2;      
237                 dy22             = iy2 - jy2;      
238                 dz22             = iz2 - jz2;      
239                 rsq22            = dx22*dx22+dy22*dy22+dz22*dz22;
240                 dx23             = ix2 - jx3;      
241                 dy23             = iy2 - jy3;      
242                 dz23             = iz2 - jz3;      
243                 rsq23            = dx23*dx23+dy23*dy23+dz23*dz23;
244                 dx31             = ix3 - jx1;      
245                 dy31             = iy3 - jy1;      
246                 dz31             = iz3 - jz1;      
247                 rsq31            = dx31*dx31+dy31*dy31+dz31*dz31;
248                 dx32             = ix3 - jx2;      
249                 dy32             = iy3 - jy2;      
250                 dz32             = iz3 - jz2;      
251                 rsq32            = dx32*dx32+dy32*dy32+dz32*dz32;
252                 dx33             = ix3 - jx3;      
253                 dy33             = iy3 - jy3;      
254                 dz33             = iz3 - jz3;      
255                 rsq33            = dx33*dx33+dy33*dy33+dz33*dz33;
256
257                 /* Calculate 1/r and 1/r2 */
258                 rinv11           = 1.0/sqrt(rsq11);
259                 rinv12           = 1.0/sqrt(rsq12);
260                 rinv13           = 1.0/sqrt(rsq13);
261                 rinv21           = 1.0/sqrt(rsq21);
262                 rinv22           = 1.0/sqrt(rsq22);
263                 rinv23           = 1.0/sqrt(rsq23);
264                 rinv31           = 1.0/sqrt(rsq31);
265                 rinv32           = 1.0/sqrt(rsq32);
266                 rinv33           = 1.0/sqrt(rsq33);
267
268                 /* Load parameters for j atom */
269                 qq               = qqOO;           
270
271                 /* Calculate table index */
272                 r                = rsq11*rinv11;   
273
274                 /* Calculate table index */
275                 rt               = r*tabscale;     
276                 n0               = rt;             
277                 eps              = rt-n0;          
278                 eps2             = eps*eps;        
279                 nnn              = 12*n0;          
280
281                 /* Tabulated coulomb interaction */
282                 Y                = VFtab[nnn];     
283                 F                = VFtab[nnn+1];   
284                 Geps             = eps*VFtab[nnn+2];
285                 Heps2            = eps2*VFtab[nnn+3];
286                 Fp               = F+Geps+Heps2;   
287                 VV               = Y+eps*Fp;       
288                 FF               = Fp+Geps+2.0*Heps2;
289                 vcoul            = qq*VV;          
290                 fijC             = qq*FF;          
291                 vctot            = vctot + vcoul;  
292
293                 /* Tabulated VdW interaction - dispersion */
294                 nnn              = nnn+4;          
295                 Y                = VFtab[nnn];     
296                 F                = VFtab[nnn+1];   
297                 Geps             = eps*VFtab[nnn+2];
298                 Heps2            = eps2*VFtab[nnn+3];
299                 Fp               = F+Geps+Heps2;   
300                 VV               = Y+eps*Fp;       
301                 FF               = Fp+Geps+2.0*Heps2;
302                 Vvdw6            = c6*VV;          
303                 fijD             = c6*FF;          
304
305                 /* Tabulated VdW interaction - repulsion */
306                 nnn              = nnn+4;          
307                 Y                = VFtab[nnn];     
308                 F                = VFtab[nnn+1];   
309                 Geps             = eps*VFtab[nnn+2];
310                 Heps2            = eps2*VFtab[nnn+3];
311                 Fp               = F+Geps+Heps2;   
312                 VV               = Y+eps*Fp;       
313                 FF               = Fp+Geps+2.0*Heps2;
314                 Vvdw12           = c12*VV;         
315                 fijR             = c12*FF;         
316                 Vvdwtot          = Vvdwtot+ Vvdw6 + Vvdw12;
317                 fscal            = -((fijC+fijD+fijR)*tabscale)*rinv11;
318
319                 /* Calculate temporary vectorial force */
320                 tx               = fscal*dx11;     
321                 ty               = fscal*dy11;     
322                 tz               = fscal*dz11;     
323
324                 /* Increment i atom force */
325                 fix1             = fix1 + tx;      
326                 fiy1             = fiy1 + ty;      
327                 fiz1             = fiz1 + tz;      
328
329                 /* Decrement j atom force */
330                 fjx1             = faction[j3+0] - tx;
331                 fjy1             = faction[j3+1] - ty;
332                 fjz1             = faction[j3+2] - tz;
333
334                 /* Load parameters for j atom */
335                 qq               = qqOH;           
336
337                 /* Calculate table index */
338                 r                = rsq12*rinv12;   
339
340                 /* Calculate table index */
341                 rt               = r*tabscale;     
342                 n0               = rt;             
343                 eps              = rt-n0;          
344                 eps2             = eps*eps;        
345                 nnn              = 12*n0;          
346
347                 /* Tabulated coulomb interaction */
348                 Y                = VFtab[nnn];     
349                 F                = VFtab[nnn+1];   
350                 Geps             = eps*VFtab[nnn+2];
351                 Heps2            = eps2*VFtab[nnn+3];
352                 Fp               = F+Geps+Heps2;   
353                 VV               = Y+eps*Fp;       
354                 FF               = Fp+Geps+2.0*Heps2;
355                 vcoul            = qq*VV;          
356                 fijC             = qq*FF;          
357                 vctot            = vctot + vcoul;  
358                 fscal            = -((fijC)*tabscale)*rinv12;
359
360                 /* Calculate temporary vectorial force */
361                 tx               = fscal*dx12;     
362                 ty               = fscal*dy12;     
363                 tz               = fscal*dz12;     
364
365                 /* Increment i atom force */
366                 fix1             = fix1 + tx;      
367                 fiy1             = fiy1 + ty;      
368                 fiz1             = fiz1 + tz;      
369
370                 /* Decrement j atom force */
371                 fjx2             = faction[j3+3] - tx;
372                 fjy2             = faction[j3+4] - ty;
373                 fjz2             = faction[j3+5] - tz;
374
375                 /* Load parameters for j atom */
376                 qq               = qqOH;           
377
378                 /* Calculate table index */
379                 r                = rsq13*rinv13;   
380
381                 /* Calculate table index */
382                 rt               = r*tabscale;     
383                 n0               = rt;             
384                 eps              = rt-n0;          
385                 eps2             = eps*eps;        
386                 nnn              = 12*n0;          
387
388                 /* Tabulated coulomb interaction */
389                 Y                = VFtab[nnn];     
390                 F                = VFtab[nnn+1];   
391                 Geps             = eps*VFtab[nnn+2];
392                 Heps2            = eps2*VFtab[nnn+3];
393                 Fp               = F+Geps+Heps2;   
394                 VV               = Y+eps*Fp;       
395                 FF               = Fp+Geps+2.0*Heps2;
396                 vcoul            = qq*VV;          
397                 fijC             = qq*FF;          
398                 vctot            = vctot + vcoul;  
399                 fscal            = -((fijC)*tabscale)*rinv13;
400
401                 /* Calculate temporary vectorial force */
402                 tx               = fscal*dx13;     
403                 ty               = fscal*dy13;     
404                 tz               = fscal*dz13;     
405
406                 /* Increment i atom force */
407                 fix1             = fix1 + tx;      
408                 fiy1             = fiy1 + ty;      
409                 fiz1             = fiz1 + tz;      
410
411                 /* Decrement j atom force */
412                 fjx3             = faction[j3+6] - tx;
413                 fjy3             = faction[j3+7] - ty;
414                 fjz3             = faction[j3+8] - tz;
415
416                 /* Load parameters for j atom */
417                 qq               = qqOH;           
418
419                 /* Calculate table index */
420                 r                = rsq21*rinv21;   
421
422                 /* Calculate table index */
423                 rt               = r*tabscale;     
424                 n0               = rt;             
425                 eps              = rt-n0;          
426                 eps2             = eps*eps;        
427                 nnn              = 12*n0;          
428
429                 /* Tabulated coulomb interaction */
430                 Y                = VFtab[nnn];     
431                 F                = VFtab[nnn+1];   
432                 Geps             = eps*VFtab[nnn+2];
433                 Heps2            = eps2*VFtab[nnn+3];
434                 Fp               = F+Geps+Heps2;   
435                 VV               = Y+eps*Fp;       
436                 FF               = Fp+Geps+2.0*Heps2;
437                 vcoul            = qq*VV;          
438                 fijC             = qq*FF;          
439                 vctot            = vctot + vcoul;  
440                 fscal            = -((fijC)*tabscale)*rinv21;
441
442                 /* Calculate temporary vectorial force */
443                 tx               = fscal*dx21;     
444                 ty               = fscal*dy21;     
445                 tz               = fscal*dz21;     
446
447                 /* Increment i atom force */
448                 fix2             = fix2 + tx;      
449                 fiy2             = fiy2 + ty;      
450                 fiz2             = fiz2 + tz;      
451
452                 /* Decrement j atom force */
453                 fjx1             = fjx1 - tx;      
454                 fjy1             = fjy1 - ty;      
455                 fjz1             = fjz1 - tz;      
456
457                 /* Load parameters for j atom */
458                 qq               = qqHH;           
459
460                 /* Calculate table index */
461                 r                = rsq22*rinv22;   
462
463                 /* Calculate table index */
464                 rt               = r*tabscale;     
465                 n0               = rt;             
466                 eps              = rt-n0;          
467                 eps2             = eps*eps;        
468                 nnn              = 12*n0;          
469
470                 /* Tabulated coulomb interaction */
471                 Y                = VFtab[nnn];     
472                 F                = VFtab[nnn+1];   
473                 Geps             = eps*VFtab[nnn+2];
474                 Heps2            = eps2*VFtab[nnn+3];
475                 Fp               = F+Geps+Heps2;   
476                 VV               = Y+eps*Fp;       
477                 FF               = Fp+Geps+2.0*Heps2;
478                 vcoul            = qq*VV;          
479                 fijC             = qq*FF;          
480                 vctot            = vctot + vcoul;  
481                 fscal            = -((fijC)*tabscale)*rinv22;
482
483                 /* Calculate temporary vectorial force */
484                 tx               = fscal*dx22;     
485                 ty               = fscal*dy22;     
486                 tz               = fscal*dz22;     
487
488                 /* Increment i atom force */
489                 fix2             = fix2 + tx;      
490                 fiy2             = fiy2 + ty;      
491                 fiz2             = fiz2 + tz;      
492
493                 /* Decrement j atom force */
494                 fjx2             = fjx2 - tx;      
495                 fjy2             = fjy2 - ty;      
496                 fjz2             = fjz2 - tz;      
497
498                 /* Load parameters for j atom */
499                 qq               = qqHH;           
500
501                 /* Calculate table index */
502                 r                = rsq23*rinv23;   
503
504                 /* Calculate table index */
505                 rt               = r*tabscale;     
506                 n0               = rt;             
507                 eps              = rt-n0;          
508                 eps2             = eps*eps;        
509                 nnn              = 12*n0;          
510
511                 /* Tabulated coulomb interaction */
512                 Y                = VFtab[nnn];     
513                 F                = VFtab[nnn+1];   
514                 Geps             = eps*VFtab[nnn+2];
515                 Heps2            = eps2*VFtab[nnn+3];
516                 Fp               = F+Geps+Heps2;   
517                 VV               = Y+eps*Fp;       
518                 FF               = Fp+Geps+2.0*Heps2;
519                 vcoul            = qq*VV;          
520                 fijC             = qq*FF;          
521                 vctot            = vctot + vcoul;  
522                 fscal            = -((fijC)*tabscale)*rinv23;
523
524                 /* Calculate temporary vectorial force */
525                 tx               = fscal*dx23;     
526                 ty               = fscal*dy23;     
527                 tz               = fscal*dz23;     
528
529                 /* Increment i atom force */
530                 fix2             = fix2 + tx;      
531                 fiy2             = fiy2 + ty;      
532                 fiz2             = fiz2 + tz;      
533
534                 /* Decrement j atom force */
535                 fjx3             = fjx3 - tx;      
536                 fjy3             = fjy3 - ty;      
537                 fjz3             = fjz3 - tz;      
538
539                 /* Load parameters for j atom */
540                 qq               = qqOH;           
541
542                 /* Calculate table index */
543                 r                = rsq31*rinv31;   
544
545                 /* Calculate table index */
546                 rt               = r*tabscale;     
547                 n0               = rt;             
548                 eps              = rt-n0;          
549                 eps2             = eps*eps;        
550                 nnn              = 12*n0;          
551
552                 /* Tabulated coulomb interaction */
553                 Y                = VFtab[nnn];     
554                 F                = VFtab[nnn+1];   
555                 Geps             = eps*VFtab[nnn+2];
556                 Heps2            = eps2*VFtab[nnn+3];
557                 Fp               = F+Geps+Heps2;   
558                 VV               = Y+eps*Fp;       
559                 FF               = Fp+Geps+2.0*Heps2;
560                 vcoul            = qq*VV;          
561                 fijC             = qq*FF;          
562                 vctot            = vctot + vcoul;  
563                 fscal            = -((fijC)*tabscale)*rinv31;
564
565                 /* Calculate temporary vectorial force */
566                 tx               = fscal*dx31;     
567                 ty               = fscal*dy31;     
568                 tz               = fscal*dz31;     
569
570                 /* Increment i atom force */
571                 fix3             = fix3 + tx;      
572                 fiy3             = fiy3 + ty;      
573                 fiz3             = fiz3 + tz;      
574
575                 /* Decrement j atom force */
576                 faction[j3+0]    = fjx1 - tx;      
577                 faction[j3+1]    = fjy1 - ty;      
578                 faction[j3+2]    = fjz1 - tz;      
579
580                 /* Load parameters for j atom */
581                 qq               = qqHH;           
582
583                 /* Calculate table index */
584                 r                = rsq32*rinv32;   
585
586                 /* Calculate table index */
587                 rt               = r*tabscale;     
588                 n0               = rt;             
589                 eps              = rt-n0;          
590                 eps2             = eps*eps;        
591                 nnn              = 12*n0;          
592
593                 /* Tabulated coulomb interaction */
594                 Y                = VFtab[nnn];     
595                 F                = VFtab[nnn+1];   
596                 Geps             = eps*VFtab[nnn+2];
597                 Heps2            = eps2*VFtab[nnn+3];
598                 Fp               = F+Geps+Heps2;   
599                 VV               = Y+eps*Fp;       
600                 FF               = Fp+Geps+2.0*Heps2;
601                 vcoul            = qq*VV;          
602                 fijC             = qq*FF;          
603                 vctot            = vctot + vcoul;  
604                 fscal            = -((fijC)*tabscale)*rinv32;
605
606                 /* Calculate temporary vectorial force */
607                 tx               = fscal*dx32;     
608                 ty               = fscal*dy32;     
609                 tz               = fscal*dz32;     
610
611                 /* Increment i atom force */
612                 fix3             = fix3 + tx;      
613                 fiy3             = fiy3 + ty;      
614                 fiz3             = fiz3 + tz;      
615
616                 /* Decrement j atom force */
617                 faction[j3+3]    = fjx2 - tx;      
618                 faction[j3+4]    = fjy2 - ty;      
619                 faction[j3+5]    = fjz2 - tz;      
620
621                 /* Load parameters for j atom */
622                 qq               = qqHH;           
623
624                 /* Calculate table index */
625                 r                = rsq33*rinv33;   
626
627                 /* Calculate table index */
628                 rt               = r*tabscale;     
629                 n0               = rt;             
630                 eps              = rt-n0;          
631                 eps2             = eps*eps;        
632                 nnn              = 12*n0;          
633
634                 /* Tabulated coulomb interaction */
635                 Y                = VFtab[nnn];     
636                 F                = VFtab[nnn+1];   
637                 Geps             = eps*VFtab[nnn+2];
638                 Heps2            = eps2*VFtab[nnn+3];
639                 Fp               = F+Geps+Heps2;   
640                 VV               = Y+eps*Fp;       
641                 FF               = Fp+Geps+2.0*Heps2;
642                 vcoul            = qq*VV;          
643                 fijC             = qq*FF;          
644                 vctot            = vctot + vcoul;  
645                 fscal            = -((fijC)*tabscale)*rinv33;
646
647                 /* Calculate temporary vectorial force */
648                 tx               = fscal*dx33;     
649                 ty               = fscal*dy33;     
650                 tz               = fscal*dz33;     
651
652                 /* Increment i atom force */
653                 fix3             = fix3 + tx;      
654                 fiy3             = fiy3 + ty;      
655                 fiz3             = fiz3 + tz;      
656
657                 /* Decrement j atom force */
658                 faction[j3+6]    = fjx3 - tx;      
659                 faction[j3+7]    = fjy3 - ty;      
660                 faction[j3+8]    = fjz3 - tz;      
661
662                 /* Inner loop uses 395 flops/iteration */
663             }
664             
665
666             /* Add i forces to mem and shifted force list */
667             faction[ii3+0]   = faction[ii3+0] + fix1;
668             faction[ii3+1]   = faction[ii3+1] + fiy1;
669             faction[ii3+2]   = faction[ii3+2] + fiz1;
670             faction[ii3+3]   = faction[ii3+3] + fix2;
671             faction[ii3+4]   = faction[ii3+4] + fiy2;
672             faction[ii3+5]   = faction[ii3+5] + fiz2;
673             faction[ii3+6]   = faction[ii3+6] + fix3;
674             faction[ii3+7]   = faction[ii3+7] + fiy3;
675             faction[ii3+8]   = faction[ii3+8] + fiz3;
676             fshift[is3]      = fshift[is3]+fix1+fix2+fix3;
677             fshift[is3+1]    = fshift[is3+1]+fiy1+fiy2+fiy3;
678             fshift[is3+2]    = fshift[is3+2]+fiz1+fiz2+fiz3;
679
680             /* Add potential energies to the group for this list */
681             ggid             = gid[n];         
682             Vc[ggid]         = Vc[ggid] + vctot;
683             Vvdw[ggid]       = Vvdw[ggid] + Vvdwtot;
684
685             /* Increment number of inner iterations */
686             ninner           = ninner + nj1 - nj0;
687
688             /* Outer loop uses 29 flops/iteration */
689         }
690         
691
692         /* Increment number of outer iterations */
693         nouter           = nouter + nn1 - nn0;
694     }
695     while (nn1<nri);
696     
697
698     /* Write outer/inner iteration count to pointers */
699     *outeriter       = nouter;         
700     *inneriter       = ninner;         
701 }
702
703 #else
704 int nb_kernel332_sse2_single_dummy;
705 #endif
706
707