Merge gromacs-4-6 into master
[alexxy/gromacs.git] / src / gromacs / gmxlib / nonbonded / nb_kernel_bluegene / nb_kernel_w3w3_bluegene.h
1 #include "types/simple.h"
2
3 #undef FULL_INTERACTION
4 #undef FULL_INTERACTION_
5 #undef COUL_INTERACTION
6 #undef COUL_INTERACTION_
7
8 #ifdef NO_FORCE
9
10  #define FULL_INTERACTION  nfcalc_interaction
11  #define FULL_INTERACTION_ nfcalc_interaction_
12  #define COUL_INTERACTION  nfcalc_coul_interaction
13  #define COUL_INTERACTION_ nfcalc_coul_interaction_
14
15 #else
16
17  #define FULL_INTERACTION  calc_interaction
18  #define FULL_INTERACTION_ calc_interaction_
19  #define COUL_INTERACTION  calc_coul_interaction
20  #define COUL_INTERACTION_ calc_coul_interaction_
21
22 #endif
23
24
25 void NB_KERNEL (
26
27                     int *   p_nri,
28                     int *   iinr,
29                     int *   jindex,
30                     int *   jjnr,
31                     int *   shift,
32                     real *  shiftvec,
33                     real *  fshift,
34                     int *   gid,
35                     real *  pos,
36                     real *  faction,
37                     real *  charge,
38                     real *  p_facel,
39                     real *  p_krf,
40                     real *  p_crf,
41                     real *  Vc,
42                     int *   type,
43                     int *   p_ntype,
44                     real *  vdwparam,
45                     real *  Vvdw,
46                     real *  p_tabscale,
47                     real *  VFtab,
48                     real *  invsqrta,
49                     real *  dvda,
50                     real *  p_gbtabscale,
51                     real *  GBtab,
52                     int *   p_nthreads,
53                     int *   count,
54                     void *  mtx,
55                     int *   outeriter,
56                     int *   inneriter,
57                     real *  work)
58 {
59     double _Complex qqOO_OO,qqOO_OH,qqOH_OH,qqOH_HH,qqHH_HH;
60     double _Complex zero  = __cmplx(0.0,0.0);
61     double _Complex rone  = __cmplx(1.0,0.0);
62     double _Complex two   = __cmplx(2.0,2.0);
63     double _Complex lr    = __cmplx(-1.0,1.0);
64     double _Complex rl    = __cmplx(1.0,-1.0);
65     double _Complex three = __cmplx(3.0,3.0);
66
67     double conv1[2],conv2[2],conv3[2],conv4[2];
68
69     real  _qO,_qH,_qqOO,_qqOH,_qqHH;
70     real  _facel,_tabscale,_gbtabscale,_krf,_crf,_c6,_c12,_cexp1,_cexp2;
71
72     int  nri,nti,ntype,nthreads,n,ii,tj,nj0,nj1;
73
74 #pragma disjoint(*shiftvec,*fshift,*pos,*faction,*charge,*p_facel,*p_krf,*p_crf,*Vc, \
75                  *vdwparam,*Vvdw,*p_tabscale,*VFtab,*invsqrta,*dvda,*p_gbtabscale,*GBtab,*work)
76
77     nri              = *p_nri;         
78     ntype            = *p_ntype;       
79     nthreads         = *p_nthreads;    
80     _facel           = *p_facel;
81 #if (COULOMB == COULOMB_TAB || VDW == VDW_TAB)
82     _tabscale        = *p_tabscale;
83 #else
84     _tabscale        = 0.0;
85 #endif
86 #if COULOMB == REACTION_FIELD
87     _krf             = *p_krf;
88     _crf             = *p_crf;
89 #else
90     _krf             = 0.0;
91     _crf             = 0.0;
92 #endif
93 #if COULOMB == GENERALIZED_BORN
94     _gbtabscale      = *p_gbtabscale;
95 #else
96     _gbtabscale      = 0.0;
97 #endif
98
99     ii               = iinr[0];        
100
101     _qO              = charge[ii];     
102     _qH              = charge[ii+1];   
103     _qqOO            = _facel*_qO*_qO;    
104     _qqOH            = _facel*_qO*_qH;
105     _qqHH            = _facel*_qH*_qH;    
106     qqOO_OO          = __cmplx(_qqOO,_qqOO);
107     qqOH_OH          = __cmplx(_qqOH,_qqOH);
108     qqOO_OH          = __cmplx(_qqOO,_qqOH);
109     qqOH_HH          = __cmplx(_qqOH,_qqHH);
110     qqHH_HH          = __cmplx(_qqHH,_qqHH);
111
112 #if VDW == LENNARD_JONES || VDW == VDW_TAB
113     tj               = 2*(ntype+1)*type[ii];
114     _c6              = vdwparam[tj];   
115     _c12             = vdwparam[tj+1]; 
116 #elif VDW == BUCKINGHAM
117     tj               = 3*(ntype+1)*type[ii];
118     _c6              = vdwparam[tj];
119     _cexp1           = vdwparam[tj+1];
120     _cexp2           = vdwparam[tj+2];
121 #endif
122
123     nj1              = 0;              
124     
125     for(n=0; (n<nri); n++)
126     {
127         double _Complex ix1,ix2,ix3,iy1,iy2,iy3,iz1,iz2,iz3,ix23,iy23,iz23;
128         
129         // initialize potential energies and forces for this paricle
130
131         double _Complex vctot     = zero;
132         double _Complex Vvdwtot   = zero;
133         double _Complex fix1      = zero;
134         double _Complex fix2      = zero;
135         double _Complex fix3      = zero;
136         double _Complex fiy1      = zero;
137         double _Complex fiy2      = zero;
138         double _Complex fiy3      = zero;
139         double _Complex fiz1      = zero;
140         double _Complex fiz2      = zero;
141         double _Complex fiz3      = zero;
142
143         // shift is the position of the n-th water group
144
145         int is3  = 3*shift[n];
146         
147         // shiftvec is the center of a water group
148
149         real _shX  = shiftvec[is3];  
150         real _shY  = shiftvec[is3+1];
151         real _shZ  = shiftvec[is3+2];
152
153         int ii  = iinr[n];        
154         int ii3 = 3*ii;
155         int k,ggid;
156
157         // add the shift vector to all water atoms
158
159         real _ix1 = _shX + pos[ii3+0];
160         real _iy1 = _shY + pos[ii3+1];
161         real _iz1 = _shZ + pos[ii3+2];
162         real _ix2 = _shX + pos[ii3+3];
163         real _iy2 = _shY + pos[ii3+4];
164         real _iz2 = _shZ + pos[ii3+5];
165         real _ix3 = _shX + pos[ii3+6];
166         real _iy3 = _shY + pos[ii3+7];
167         real _iz3 = _shZ + pos[ii3+8];
168
169         // clone all positions in complex variables
170         
171         ix1 = __cmplx(_ix1,_ix1);
172         iy1 = __cmplx(_iy1,_iy1);
173         iz1 = __cmplx(_iz1,_iz1);
174               
175         ix2 = __cmplx(_ix2,_ix2);
176         iy2 = __cmplx(_iy2,_iy2);
177         iz2 = __cmplx(_iz2,_iz2);
178
179         ix3 = __cmplx(_ix3,_ix3);
180         iy3 = __cmplx(_iy3,_iy3);
181         iz3 = __cmplx(_iz3,_iz3);
182
183         nj0 = jindex[n];      
184         nj1 = jindex[n+1];    
185
186         for(k=nj0; (k<nj1-1); k+=2)
187         {
188             double _Complex jx1,jx2,jx3,fjx1,fjx2,fjx3,dx11,dx12,dx13,dx21,dx22,dx23,dx31,dx32,dx33,tx;
189             double _Complex jy1,jy2,jy3,fjy1,fjy2,fjy3,dy11,dy12,dy13,dy21,dy22,dy23,dy31,dy32,dy33,ty;
190             double _Complex jz1,jz2,jz3,fjz1,fjz2,fjz3,dz11,dz12,dz13,dz21,dz22,dz23,dz31,dz32,dz33,tz;
191             double _Complex rsq11,rsq12,rsq13,rsq21,rsq22,rsq23,rsq31,rsq32,rsq33;
192             double _Complex rinv11,rinv12,rinv13,rinv21,rinv22,rinv23,rinv31,rinv32,rinv33;
193             double _Complex rinvsq,krsq,gbscale,fscal;
194             double _Complex rinvsix,Vvdw6,Vvdw12,Vvdwexp;
195             double _Complex rt,r,rti,n0,eps,Y,F,G,H,GHeps,VV,FF,fijC,fijD;
196             
197             int nnn1,nnn2;
198             int jnr1,jnr2,j31,j32;
199
200             jnr1   = jjnr[k];
201             jnr2   = jjnr[k+1];
202
203             j31    = 3*jnr1;        
204             j32    = 3*jnr2;
205
206             // Coulomb and possibly VdW between i-water and Oxygens of j1- and j2-water
207
208             load6_and_merge(pos,j31,j32,jx1,jy1,jz1);
209
210             dx11   = __fpsub(ix1,jx1);
211             dy11   = __fpsub(iy1,jy1);
212             dz11   = __fpsub(iz1,jz1);
213
214             dx21   = __fpsub(ix2,jx1);
215             dy21   = __fpsub(iy2,jy1);
216             dz21   = __fpsub(iz2,jz1);
217
218             dx31   = __fpsub(ix3,jx1);
219             dy31   = __fpsub(iy3,jy1);
220             dz31   = __fpsub(iz3,jz1);
221
222             rsq11  = vdist2(dx11,dy11,dz11);
223             rsq21  = vdist2(dx21,dy21,dz21);
224             rsq31  = vdist2(dx31,dy31,dz31);
225
226             rinv11 = __fprsqrte(rsq11);
227             rinv21 = __fprsqrte(rsq21);
228             rinv31 = __fprsqrte(rsq31);
229
230             rinv11 = sqrt_newton(rinv11,rsq11);
231             rinv21 = sqrt_newton(rinv21,rsq21);
232             rinv31 = sqrt_newton(rinv31,rsq31);
233
234 #ifdef GMX_DOUBLE
235
236             rinv11 = sqrt_newton(rinv11,rsq11);
237             rinv21 = sqrt_newton(rinv21,rsq21);
238             rinv31 = sqrt_newton(rinv31,rsq31);
239
240 #endif
241
242             // only the oxygens will have a LJ-interaction
243
244 #ifndef NO_FORCE
245             load6_and_merge(faction,j31,j32,fjx1,fjy1,fjz1);
246 #endif
247
248             FULL_INTERACTION(qqOO_OO,rinv11,rsq11,conv1,jnr1,jnr2);
249
250 #ifndef NO_FORCE
251             fix1   = __fpnmsub(fix1,dx11,fscal);
252             fiy1   = __fpnmsub(fiy1,dy11,fscal);      
253             fiz1   = __fpnmsub(fiz1,dz11,fscal);
254
255             fjx1   = __fpmadd(fjx1,dx11,fscal);
256             fjy1   = __fpmadd(fjy1,dy11,fscal);
257             fjz1   = __fpmadd(fjz1,dz11,fscal);
258 #endif
259
260             COUL_INTERACTION(qqOH_OH,rinv21,rsq21,conv2,jnr1,jnr2);
261
262 #ifndef NO_FORCE
263             fix2   = __fpnmsub(fix2,dx21,fscal);
264             fiy2   = __fpnmsub(fiy2,dy21,fscal);      
265             fiz2   = __fpnmsub(fiz2,dz21,fscal);
266
267             fjx1   = __fpmadd(fjx1,dx21,fscal);
268             fjy1   = __fpmadd(fjy1,dy21,fscal);
269             fjz1   = __fpmadd(fjz1,dz21,fscal);
270 #endif
271
272             COUL_INTERACTION(qqOH_OH,rinv31,rsq31,conv3,jnr1,jnr2);
273
274 #ifndef NO_FORCE
275             fix3   = __fpnmsub(fix3,dx31,fscal);
276             fiy3   = __fpnmsub(fiy3,dy31,fscal);      
277             fiz3   = __fpnmsub(fiz3,dz31,fscal);
278
279             fjx1   = __fpmadd(fjx1,dx31,fscal);
280             fjy1   = __fpmadd(fjy1,dy31,fscal);
281             fjz1   = __fpmadd(fjz1,dz31,fscal);
282
283             split_and_store6(faction,j31,j32,fjx1,fjy1,fjz1);
284 #endif
285
286             // Coulomb between i-water and first hydrogen in j1- and j2-water
287
288             load6_and_merge(pos,j31+3,j32+3,jx2,jy2,jz2);
289
290             dx12   = __fpsub(ix1,jx2);
291             dy12   = __fpsub(iy1,jy2);
292             dz12   = __fpsub(iz1,jz2);
293             dx22   = __fpsub(ix2,jx2);
294             dy22   = __fpsub(iy2,jy2);
295             dz22   = __fpsub(iz2,jz2);
296             dx32   = __fpsub(ix3,jx2);
297             dy32   = __fpsub(iy3,jy2);
298             dz32   = __fpsub(iz3,jz2);
299
300             rsq12  = vdist2(dx12,dy12,dz12);
301             rsq22  = vdist2(dx22,dy22,dz22);
302             rsq32  = vdist2(dx32,dy32,dz32);
303
304             rinv12 = __fprsqrte(rsq12);
305             rinv22 = __fprsqrte(rsq22);
306             rinv32 = __fprsqrte(rsq32);
307
308             rinv12 = sqrt_newton(rinv12,rsq12);
309             rinv22 = sqrt_newton(rinv22,rsq22);
310             rinv32 = sqrt_newton(rinv32,rsq32);
311
312 #ifdef GMX_DOUBLE
313
314             rinv12 = sqrt_newton(rinv12,rsq12);
315             rinv22 = sqrt_newton(rinv22,rsq22);
316             rinv32 = sqrt_newton(rinv32,rsq32);
317
318 #endif
319
320 #ifndef NO_FORCE
321             load6_and_merge(faction,j31+3,j32+3,fjx2,fjy2,fjz2);
322 #endif
323
324             COUL_INTERACTION(qqOH_OH,rinv12,rsq12,conv1,jnr1,jnr2);
325
326 #ifndef NO_FORCE
327             fix1   = __fpnmsub(fix1,dx12,fscal);
328             fiy1   = __fpnmsub(fiy1,dy12,fscal);      
329             fiz1   = __fpnmsub(fiz1,dz12,fscal);
330
331             fjx2   = __fpmadd(fjx2,dx12,fscal);
332             fjy2   = __fpmadd(fjy2,dy12,fscal);
333             fjz2   = __fpmadd(fjz2,dz12,fscal);
334 #endif
335
336             COUL_INTERACTION(qqHH_HH,rinv22,rsq22,conv2,jnr1,jnr2);
337
338 #ifndef NO_FORCE
339             fix2   = __fpnmsub(fix2,dx22,fscal);
340             fiy2   = __fpnmsub(fiy2,dy22,fscal);      
341             fiz2   = __fpnmsub(fiz2,dz22,fscal);
342
343             fjx2   = __fpmadd(fjx2,dx22,fscal);
344             fjy2   = __fpmadd(fjy2,dy22,fscal);
345             fjz2   = __fpmadd(fjz2,dz22,fscal);
346 #endif
347
348             COUL_INTERACTION(qqHH_HH,rinv32,rsq32,conv3,jnr1,jnr2);
349
350 #ifndef NO_FORCE
351             fix3   = __fpnmsub(fix3,dx32,fscal);
352             fiy3   = __fpnmsub(fiy3,dy32,fscal);      
353             fiz3   = __fpnmsub(fiz3,dz32,fscal);
354
355             fjx2   = __fpmadd(fjx2,dx32,fscal);
356             fjy2   = __fpmadd(fjy2,dy32,fscal);
357             fjz2   = __fpmadd(fjz2,dz32,fscal);
358
359             split_and_store6(faction,j31+3,j32+3,fjx2,fjy2,fjz2);
360 #endif
361
362
363             // Coulomb between i-water and second hydrogen in j1- and j2-water
364
365             load6_and_merge(pos,j31+6,j32+6,jx3,jy3,jz3);
366
367             dx13   = __fpsub(ix1,jx3);
368             dy13   = __fpsub(iy1,jy3);
369             dz13   = __fpsub(iz1,jz3);
370             dx23   = __fpsub(ix2,jx3);
371             dy23   = __fpsub(iy2,jy3);
372             dz23   = __fpsub(iz2,jz3);
373             dx33   = __fpsub(ix3,jx3);
374             dy33   = __fpsub(iy3,jy3);
375             dz33   = __fpsub(iz3,jz3);
376
377             rsq13  = vdist2(dx13,dy13,dz13);
378             rsq23  = vdist2(dx23,dy23,dz23);
379             rsq33  = vdist2(dx33,dy33,dz33);
380
381             rinv13 = __fprsqrte(rsq13);
382             rinv23 = __fprsqrte(rsq23);
383             rinv33 = __fprsqrte(rsq33);
384
385             rinv13 = sqrt_newton(rinv13,rsq13);
386             rinv23 = sqrt_newton(rinv23,rsq23);
387             rinv33 = sqrt_newton(rinv33,rsq33);
388
389 #ifdef GMX_DOUBLE
390
391             rinv13 = sqrt_newton(rinv13,rsq13);
392             rinv23 = sqrt_newton(rinv23,rsq23);
393             rinv33 = sqrt_newton(rinv33,rsq33);
394
395 #endif
396
397 #ifndef NO_FORCE
398             load6_and_merge(faction,j31+6,j32+6,fjx3,fjy3,fjz3);
399 #endif
400
401             COUL_INTERACTION(qqOH_OH,rinv13,rsq13,conv1,jnr1,jnr2);
402
403 #ifndef NO_FORCE
404             fix1   = __fpnmsub(fix1,dx13,fscal);
405             fiy1   = __fpnmsub(fiy1,dy13,fscal);      
406             fiz1   = __fpnmsub(fiz1,dz13,fscal);
407
408             fjx3   = __fpmadd(fjx3,dx13,fscal);
409             fjy3   = __fpmadd(fjy3,dy13,fscal);
410             fjz3   = __fpmadd(fjz3,dz13,fscal);
411 #endif
412
413             COUL_INTERACTION(qqHH_HH,rinv23,rsq23,conv2,jnr1,jnr2);
414
415 #ifndef NO_FORCE
416             fix2   = __fpnmsub(fix2,dx23,fscal);
417             fiy2   = __fpnmsub(fiy2,dy23,fscal);      
418             fiz2   = __fpnmsub(fiz2,dz23,fscal);
419
420             fjx3   = __fpmadd(fjx3,dx23,fscal);
421             fjy3   = __fpmadd(fjy3,dy23,fscal);
422             fjz3   = __fpmadd(fjz3,dz23,fscal);
423 #endif
424
425             COUL_INTERACTION(qqHH_HH,rinv33,rsq33,conv3,jnr1,jnr2);
426
427 #ifndef NO_FORCE
428             fix3   = __fpnmsub(fix3,dx33,fscal);
429             fiy3   = __fpnmsub(fiy3,dy33,fscal);      
430             fiz3   = __fpnmsub(fiz3,dz33,fscal);
431
432             fjx3   = __fpmadd(fjx3,dx33,fscal);
433             fjy3   = __fpmadd(fjy3,dy33,fscal);
434             fjz3   = __fpmadd(fjz3,dz33,fscal);
435
436             split_and_store6(faction,j31+6,j32+6,fjx3,fjy3,fjz3);
437 #endif
438         }
439
440         ix23 = __cmplx(_ix2,_ix3);
441         iy23 = __cmplx(_iy2,_iy3);
442         iz23 = __cmplx(_iz2,_iz3);
443         
444         for(; (k<nj1); k++)
445         {
446             double _Complex dx123,dx223,dx323,dx231,tx;
447             double _Complex dy123,dy223,dy323,dy231,ty;
448             double _Complex dz123,dz223,dz323,dz231,tz;
449             double _Complex jx23,jy23,jz23,jx1,jy1,jz1;
450             double _Complex fjx23,fjy23,fjz23,fjx1,fjy1,fjz1;
451             double _Complex rsq123,rsq223,rsq323,rsq231;
452             double _Complex rinv123,rinv223,rinv323,rinv231;
453             double _Complex rinvsq,krsq,fscal;
454             double _Complex rt,r,rti,eps,YF1,YF2,GH1,GH2,Y,F,G,H,GHeps,VV,FF,fijC,fijD,n0;
455             real _dx11,_dy11,_dz11,_rsq11,_rinv11;
456             real _rinvsq,_krsq,_fscal;
457             real _rt,_r,_eps,_Y,_F,_G,_H,_GHeps,_VV,_FF,_fijC,_fijD,_n0;
458             real _rinvsix,_Vvdw6,_Vvdw12,_Vvdwexp;
459
460             int nnn1,nnn2;
461             int jnr,j3;
462
463             jnr    = jjnr[k];
464             j3     = 3*jnr;
465
466             load3_and_clone(pos,j3,jx1,jy1,jz1);
467
468             _dx11   = ix1 - __creal(jx1);
469             _dy11   = iy1 - __creal(jy1);
470             _dz11   = iz1 - __creal(jz1);
471             dx231   = __fpsub(ix23,jx1);
472             dy231   = __fpsub(iy23,jy1);
473             dz231   = __fpsub(iz23,jz1);
474
475             _rsq11  = _dx11 * _dx11 + _dy11 * _dy11 + _dz11 * _dz11;
476             rsq231  = vdist2(dx231,dy231,dz231);
477             _rinv11 = __frsqrte(_rsq11);
478             rinv231 = __fprsqrte(rsq231);
479
480             _rinv11 = sqrt_newton_scalar(_rinv11,_rsq11);
481             rinv231 = sqrt_newton(rinv231,rsq231);
482
483 #ifdef GMX_DOUBLE
484
485             _rinv11 = sqrt_newton_scalar(_rinv11,_rsq11);
486             rinv231 = sqrt_newton(rinv231,rsq231);
487
488 #endif
489
490             FULL_INTERACTION_(_qqOO,_rinv11,_rsq11,jnr);
491
492 #ifndef NO_FORCE
493
494             load3_real(faction,j3,fjx1,fjy1,fjz1);
495
496             fix1            -= _dx11 * _fscal;
497             fiy1            -= _dy11 * _fscal;
498             fiz1            -= _dz11 * _fscal;
499
500             fjx1            += _dx11 * _fscal;
501             fjy1            += _dy11 * _fscal;
502             fjz1            += _dz11 * _fscal;
503 #endif
504
505             COUL_INTERACTION(qqOH_OH,rinv231,rsq231,conv4,jnr,jnr);
506
507 #ifndef NO_FORCE
508             tx     = __fpmul(dx231,fscal);
509             ty     = __fpmul(dy231,fscal);
510             tz     = __fpmul(dz231,fscal);
511
512             fix2    = __fpnmsub(fix2,rone,tx);      
513             fiy2    = __fpnmsub(fiy2,rone,ty); 
514             fiz2    = __fpnmsub(fiz2,rone,tz);
515             fix3    = __fxnmsub(fix3,rone,tx);      
516             fiy3    = __fxnmsub(fiy3,rone,ty); 
517             fiz3    = __fxnmsub(fiz3,rone,tz);
518
519             fjx1   = __fpadd(fjx1,tx);
520             fjy1   = __fpadd(fjy1,ty);
521             fjz1   = __fpadd(fjz1,tz);
522
523             sum_and_store3(faction,j3,fjx1,fjy1,fjz1);
524 #endif
525
526             load6_and_merge(pos,j3+3,j3+6,jx23,jy23,jz23);
527
528             dx123   = __fpsub(ix1,jx23);
529             dy123   = __fpsub(iy1,jy23);
530             dz123   = __fpsub(iz1,jz23);
531             dx223   = __fpsub(ix2,jx23);
532             dy223   = __fpsub(iy2,jy23);
533             dz223   = __fpsub(iz2,jz23);
534             dx323   = __fpsub(ix3,jx23);
535             dy323   = __fpsub(iy3,jy23);
536             dz323   = __fpsub(iz3,jz23);
537
538             rsq123  = vdist2(dx123,dy123,dz123);
539             rsq223  = vdist2(dx223,dy223,dz223);
540             rsq323  = vdist2(dx323,dy323,dz323);
541
542             rinv123 = __fprsqrte(rsq123);
543             rinv223 = __fprsqrte(rsq223);
544             rinv323 = __fprsqrte(rsq323);
545
546             rinv123 = sqrt_newton(rinv123,rsq123);
547             rinv223 = sqrt_newton(rinv223,rsq223);
548             rinv323 = sqrt_newton(rinv323,rsq323);
549
550 #ifdef GMX_DOUBLE
551
552             rinv123 = sqrt_newton(rinv123,rsq123);
553             rinv223 = sqrt_newton(rinv223,rsq223);
554             rinv323 = sqrt_newton(rinv323,rsq323);
555
556 #endif
557             COUL_INTERACTION(qqOH_OH,rinv123,rsq123,conv1,jnr,jnr);
558
559 #ifndef NO_FORCE
560             load6_and_merge(faction,j3+3,j3+6,fjx23,fjy23,fjz23);
561
562             fix1   = __fpnmsub(fix1,dx123,fscal);
563             fiy1   = __fpnmsub(fiy1,dy123,fscal);      
564             fiz1   = __fpnmsub(fiz1,dz123,fscal);
565
566             fjx23  = __fpmadd(fjx23,dx123,fscal);
567             fjy23  = __fpmadd(fjy23,dy123,fscal);
568             fjz23  = __fpmadd(fjz23,dz123,fscal);
569 #endif
570
571             COUL_INTERACTION(qqHH_HH,rinv223,rsq223,conv2,jnr,jnr);
572
573 #ifndef NO_FORCE
574             fix2   = __fpnmsub(fix2,dx223,fscal);
575             fiy2   = __fpnmsub(fiy2,dy223,fscal);      
576             fiz2   = __fpnmsub(fiz2,dz223,fscal);
577
578             fjx23  = __fpmadd(fjx23,dx223,fscal);
579             fjy23  = __fpmadd(fjy23,dy223,fscal);
580             fjz23  = __fpmadd(fjz23,dz223,fscal);
581 #endif
582
583             COUL_INTERACTION(qqHH_HH,rinv323,rsq323,conv3,jnr,jnr);
584
585 #ifndef NO_FORCE
586             fix3   = __fpnmsub(fix3,dx323,fscal);
587             fiy3   = __fpnmsub(fiy3,dy323,fscal);      
588             fiz3   = __fpnmsub(fiz3,dz323,fscal);
589
590             fjx23  = __fpmadd(fjx23,dx323,fscal);
591             fjy23  = __fpmadd(fjy23,dy323,fscal);
592             fjz23  = __fpmadd(fjz23,dz323,fscal);
593
594             split_and_store6(faction,j3+3,j3+6,fjx23,fjy23,fjz23);
595 #endif
596         }
597
598         ggid = gid[n];
599
600 #ifndef NO_FORCE
601         sum_and_add3(faction,ii3  ,fix1,fiy1,fiz1);
602         sum_and_add3(faction,ii3+3,fix2,fiy2,fiz2);
603         sum_and_add3(faction,ii3+6,fix3,fiy3,fiz3);
604
605         fshift[is3]      = fshift[is3]
606                          + (__creal(fix1) + __cimag(fix1)) 
607                          + (__creal(fix2) + __cimag(fix2))
608                          + (__creal(fix3) + __cimag(fix3));
609         fshift[is3+1]    = fshift[is3+1]
610                          + (__creal(fiy1) + __cimag(fiy1)) 
611                          + (__creal(fiy2) + __cimag(fiy2))
612                          + (__creal(fiy3) + __cimag(fiy3));
613         fshift[is3+2]    = fshift[is3+2]
614                          + (__creal(fiz1) + __cimag(fiz1)) 
615                          + (__creal(fiz2) + __cimag(fiz2))
616                          + (__creal(fiz3) + __cimag(fiz3));
617 #endif
618
619 #if COULOMB != COULOMB_NONE
620         Vc[ggid]         += __creal(vctot)   + __cimag(vctot);
621 #endif
622
623 #if VDW != VDW_NONE
624         Vvdw[ggid]       += __creal(Vvdwtot) + __cimag(Vvdwtot);
625 #endif
626     }
627     
628     *outeriter       = nri;            
629     *inneriter       = nj1;            
630 }