added Verlet scheme and NxN non-bonded functionality
[alexxy/gromacs.git] / src / mdlib / nbnxn_kernels / nbnxn_kernel_ref_inner.h
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  *                This source code is part of
5  *
6  *                 G   R   O   M   A   C   S
7  *
8  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
9  * Copyright (c) 2001-2009, The GROMACS Development Team
10  *
11  * Gromacs is a library for molecular simulation and trajectory analysis,
12  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
13  * a full list of developers and information, check out http://www.gromacs.org
14  *
15  * This program is free software; you can redistribute it and/or modify it under
16  * the terms of the GNU Lesser General Public License as published by the Free
17  * Software Foundation; either version 2 of the License, or (at your option) any
18  * later version.
19  * As a special exception, you may use this file as part of a free software
20  * library without restriction.  Specifically, if other files instantiate
21  * templates or use macros or inline functions from this file, or you compile
22  * this file and link it with other files to produce an executable, this
23  * file does not by itself cause the resulting executable to be covered by
24  * the GNU Lesser General Public License.
25  *
26  * In plain-speak: do not worry about classes/macros/templates either - only
27  * changes to the library have to be LGPL, not an application linking with it.
28  *
29  * To help fund GROMACS development, we humbly ask that you cite
30  * the papers people have written on it - you can find them on the website!
31  */
32
33 /* When calculating RF or Ewald interactions we calculate the electrostatic
34  * forces and energies on excluded atom pairs here in the non-bonded loops.
35  */
36 #if defined CHECK_EXCLS && defined CALC_COULOMB
37 #define EXCL_FORCES
38 #endif
39
40         {
41             int cj;
42 #ifdef ENERGY_GROUPS
43             int egp_cj;
44 #endif
45             int i;
46
47             cj = l_cj[cjind].cj;
48
49 #ifdef ENERGY_GROUPS
50             egp_cj = nbat->energrp[cj];
51 #endif
52             for(i=0; i<UNROLLI; i++)
53             {
54                 int ai;
55                 int type_i_off;
56                 int j;
57
58                 ai = ci*UNROLLI + i;
59
60                 type_i_off = type[ai]*ntype2;
61
62                 for(j=0; j<UNROLLJ; j++)
63                 {
64                     int  aj;
65                     real dx,dy,dz;
66                     real rsq,rinv;
67                     real rinvsq,rinvsix;
68                     real c6,c12;
69                     real FrLJ6=0,FrLJ12=0,VLJ=0;
70 #ifdef CALC_COULOMB
71                     real qq;
72                     real fcoul;
73 #ifdef CALC_COUL_TAB
74                     real rs,frac;
75                     int  ri;
76                     real fexcl;
77 #endif
78 #ifdef CALC_ENERGIES
79                     real vcoul;
80 #endif
81 #endif
82                     real fscal;
83                     real fx,fy,fz;
84
85                     /* A multiply mask used to zero an interaction
86                      * when either the distance cutoff is exceeded, or
87                      * (if appropriate) the i and j indices are
88                      * unsuitable for this kind of inner loop. */
89                     real skipmask;
90 #ifdef VDW_CUTOFF_CHECK
91                     real skipmask_rvdw;
92 #endif
93 #ifdef CHECK_EXCLS
94                     /* A multiply mask used to zero an interaction
95                      * when that interaction should be excluded
96                      * (e.g. because of bonding). */
97                     int interact;
98
99                     interact = ((l_cj[cjind].excl>>(i*UNROLLI + j)) & 1);
100 #ifndef EXCL_FORCES
101                     skipmask = interact;
102 #else
103                     skipmask = !(cj == ci_sh && j <= i);
104 #endif
105 #else
106 #define interact 1.0
107                     skipmask = 1.0;
108 #endif
109
110                     aj = cj*UNROLLJ + j;
111
112                     dx  = xi[i*XI_STRIDE+XX] - x[aj*X_STRIDE+XX];
113                     dy  = xi[i*XI_STRIDE+YY] - x[aj*X_STRIDE+YY];
114                     dz  = xi[i*XI_STRIDE+ZZ] - x[aj*X_STRIDE+ZZ];
115
116                     rsq = dx*dx + dy*dy + dz*dz;
117
118                     /* Prepare to enforce the cut-off. */
119                     skipmask = (rsq >= rcut2) ? 0 : skipmask;
120                     /* 9 flops for r^2 + cut-off check */
121
122 #ifdef CHECK_EXCLS
123                     /* Excluded atoms are allowed to be on top of each other.
124                      * To avoid overflow of rinv, rinvsq and rinvsix
125                      * we add a small number to rsq for excluded pairs only.
126                      */
127                     rsq += (1 - interact)*NBNXN_AVOID_SING_R2_INC;
128 #endif
129
130 #ifdef COUNT_PAIRS
131                     npair++;
132 #endif
133
134                     rinv = gmx_invsqrt(rsq);
135                     /* 5 flops for invsqrt */
136
137                     /* Partially enforce the cut-off (and perhaps
138                      * exclusions) to avoid possible overflow of
139                      * rinvsix when computing LJ, and/or overflowing
140                      * the Coulomb table during lookup. */
141                     rinv = rinv * skipmask;
142
143                     rinvsq  = rinv*rinv;
144
145 #ifdef HALF_LJ
146                     if (i < UNROLLI/2)
147 #endif
148                     {
149                         rinvsix = interact*rinvsq*rinvsq*rinvsq;
150
151 #ifdef VDW_CUTOFF_CHECK
152                         skipmask_rvdw = (rsq < rvdw2);
153                         rinvsix *= skipmask_rvdw;
154 #endif
155
156                         c6      = nbfp[type_i_off+type[aj]*2  ];
157                         c12     = nbfp[type_i_off+type[aj]*2+1];
158                         FrLJ6   = c6*rinvsix;
159                         FrLJ12  = c12*rinvsix*rinvsix;
160                         /* 6 flops for r^-2 + LJ force */
161 #ifdef CALC_ENERGIES
162                         VLJ     = (FrLJ12 - c12*sh_invrc6*sh_invrc6)/12 -
163                                   (FrLJ6 - c6*sh_invrc6)/6;
164                         /* Need to zero the interaction if r >= rcut
165                          * or there should be exclusion. */
166                         VLJ     = VLJ * skipmask * interact;
167                         /* 9 flops for LJ energy */
168 #ifdef VDW_CUTOFF_CHECK
169                         VLJ    *= skipmask_rvdw;
170 #endif
171 #ifdef ENERGY_GROUPS
172                         Vvdw[egp_sh_i[i]+((egp_cj>>(nbat->neg_2log*j)) & egp_mask)] += VLJ;
173 #else
174                         Vvdw_ci += VLJ;
175                         /* 1 flop for LJ energy addition */
176 #endif
177 #endif
178                     }
179
180 #ifdef CALC_COULOMB
181                     /* Enforce the cut-off and perhaps exclusions. In
182                      * those cases, rinv is zero because of skipmask,
183                      * but fcoul and vcoul will later be non-zero (in
184                      * both RF and table cases) because of the
185                      * contributions that do not depend on rinv. These
186                      * contributions cannot be allowed to accumulate
187                      * to the force and potential, and the easiest way
188                      * to do this is to zero the charges in
189                      * advance. */
190                     qq = skipmask * qi[i] * q[aj];
191
192 #ifdef CALC_COUL_RF
193                     fcoul  = qq*(interact*rinv*rinvsq - k_rf2);
194                     /* 4 flops for RF force */
195 #ifdef CALC_ENERGIES
196                     vcoul  = qq*(interact*rinv + k_rf*rsq - c_rf);
197                     /* 4 flops for RF energy */
198 #endif
199 #endif
200
201 #ifdef CALC_COUL_TAB
202                     rs     = rsq*rinv*ic->tabq_scale;
203                     ri     = (int)rs;
204                     frac   = rs - ri;
205 #ifndef GMX_DOUBLE
206                     /* fexcl = F_i + frac * (F_(i+1)-F_i) */
207                     fexcl  = tab_coul_FDV0[ri*4] + frac*tab_coul_FDV0[ri*4+1];
208 #else
209                     /* fexcl = (1-frac) * F_i + frac * F_(i+1) */
210                     fexcl  = (1 - frac)*tab_coul_F[ri] + frac*tab_coul_F[ri+1];
211 #endif
212                     fcoul  = interact*rinvsq - fexcl;
213                     /* 7 flops for float 1/r-table force */
214 #ifdef CALC_ENERGIES
215 #ifndef GMX_DOUBLE
216                     vcoul  = qq*(interact*(rinv - ic->sh_ewald)
217                                  -(tab_coul_FDV0[ri*4+2]
218                                    -halfsp*frac*(tab_coul_FDV0[ri*4] + fexcl)));
219                     /* 7 flops for float 1/r-table energy (8 with excls) */
220 #else
221                     vcoul  = qq*(interact*(rinv - ic->sh_ewald)
222                                  -(tab_coul_V[ri]
223                                    -halfsp*frac*(tab_coul_F[ri] + fexcl)));
224 #endif
225 #endif
226                     fcoul *= qq*rinv;
227 #endif
228
229 #ifdef CALC_ENERGIES
230 #ifdef ENERGY_GROUPS
231                     Vc[egp_sh_i[i]+((egp_cj>>(nbat->neg_2log*j)) & egp_mask)] += vcoul;
232 #else
233                     Vc_ci += vcoul;
234                     /* 1 flop for Coulomb energy addition */
235 #endif
236 #endif
237 #endif
238
239 #ifdef CALC_COULOMB
240 #ifdef HALF_LJ
241                     if (i < UNROLLI/2)
242 #endif
243                     {
244                         fscal = (FrLJ12 - FrLJ6)*rinvsq + fcoul;
245                         /* 3 flops for scalar LJ+Coulomb force */
246                     }
247 #ifdef HALF_LJ
248                     else
249                     {
250                         fscal = fcoul;
251                     }
252 #endif
253 #else
254                     fscal = (FrLJ12 - FrLJ6)*rinvsq;
255 #endif
256                     fx = fscal*dx;
257                     fy = fscal*dy;
258                     fz = fscal*dz;
259
260                     /* Increment i-atom force */
261                     fi[i*FI_STRIDE+XX] += fx;
262                     fi[i*FI_STRIDE+YY] += fy;
263                     fi[i*FI_STRIDE+ZZ] += fz;
264                     /* Decrement j-atom force */
265                     f[aj*F_STRIDE+XX]  -= fx;
266                     f[aj*F_STRIDE+YY]  -= fy;
267                     f[aj*F_STRIDE+ZZ]  -= fz;
268                     /* 9 flops for force addition */
269                 }
270             }
271         }
272
273 #undef interact
274 #undef EXCL_FORCES