File: | gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c |
Location: | line 261, column 33 |
Description: | Value stored to 'fscal' is never read |
1 | /* |
2 | * This file is part of the GROMACS molecular simulation package. |
3 | * |
4 | * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by |
5 | * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, |
6 | * and including many others, as listed in the AUTHORS file in the |
7 | * top-level source directory and at http://www.gromacs.org. |
8 | * |
9 | * GROMACS is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Lesser General Public License |
11 | * as published by the Free Software Foundation; either version 2.1 |
12 | * of the License, or (at your option) any later version. |
13 | * |
14 | * GROMACS is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Lesser General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Lesser General Public |
20 | * License along with GROMACS; if not, see |
21 | * http://www.gnu.org/licenses, or write to the Free Software Foundation, |
22 | * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
23 | * |
24 | * If you want to redistribute modifications to GROMACS, please |
25 | * consider that scientific software is very special. Version |
26 | * control is crucial - bugs must be traceable. We will be happy to |
27 | * consider code for inclusion in the official distribution, but |
28 | * derived work must not be called official GROMACS. Details are found |
29 | * in the README & COPYING files - if they are missing, get the |
30 | * official version at http://www.gromacs.org. |
31 | * |
32 | * To help us fund GROMACS development, we humbly ask that you cite |
33 | * the research papers on the package. Check out http://www.gromacs.org. |
34 | */ |
35 | #ifdef HAVE_CONFIG_H1 |
36 | #include <config.h> |
37 | #endif |
38 | |
39 | #include <math.h> |
40 | |
41 | #include "types/simple.h" |
42 | #include "gromacs/math/utilities.h" |
43 | #include "gromacs/math/vec.h" |
44 | #include "typedefs.h" |
45 | #include "force.h" |
46 | #include "nbnxn_kernel_gpu_ref.h" |
47 | #include "../nbnxn_consts.h" |
48 | #include "nbnxn_kernel_common.h" |
49 | |
50 | #define NCL_PER_SUPERCL((2*2*2)) (NBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER(2*2*2)) |
51 | #define CL_SIZE(8) (NBNXN_GPU_CLUSTER_SIZE8) |
52 | |
53 | void |
54 | nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t *nbl, |
55 | const nbnxn_atomdata_t *nbat, |
56 | const interaction_const_t *iconst, |
57 | rvec *shift_vec, |
58 | int force_flags, |
59 | int clearF, |
60 | real * f, |
61 | real * fshift, |
62 | real * Vc, |
63 | real * Vvdw) |
64 | { |
65 | const nbnxn_sci_t *nbln; |
66 | const real *x; |
67 | gmx_bool bEner; |
68 | gmx_bool bEwald; |
69 | const real *Ftab = NULL((void*)0); |
70 | real rcut2, rvdw2, rlist2; |
71 | int ntype; |
72 | real facel; |
73 | int n; |
74 | int ish3; |
75 | int sci; |
76 | int cj4_ind0, cj4_ind1, cj4_ind; |
77 | int ci, cj; |
78 | int ic, jc, ia, ja, is, ifs, js, jfs, im, jm; |
79 | int n0; |
80 | int ggid; |
81 | real shX, shY, shZ; |
82 | real fscal, tx, ty, tz; |
83 | real rinvsq; |
84 | real iq; |
85 | real qq, vcoul = 0, krsq, vctot; |
86 | int nti; |
87 | int tj; |
88 | real rt, r, eps; |
89 | real rinvsix; |
90 | real Vvdwtot; |
91 | real Vvdw_rep, Vvdw_disp; |
92 | real ix, iy, iz, fix, fiy, fiz; |
93 | real jx, jy, jz; |
94 | real dx, dy, dz, rsq, rinv; |
95 | int int_bit; |
96 | real fexcl; |
97 | real c6, c12, cexp1, cexp2, br; |
98 | const real * shiftvec; |
99 | real * vdwparam; |
100 | int * shift; |
101 | int * type; |
102 | const nbnxn_excl_t *excl[2]; |
103 | |
104 | int npair_tot, npair; |
105 | int nhwu, nhwu_pruned; |
106 | |
107 | if (nbl->na_ci != CL_SIZE(8)) |
108 | { |
109 | gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c" , 109, "The neighborlist cluster size in the GPU reference kernel is %d, expected it to be %d", nbl->na_ci, CL_SIZE(8)); |
110 | } |
111 | |
112 | if (clearF == enbvClearFYes) |
113 | { |
114 | clear_f(nbat, 0, f); |
115 | } |
116 | |
117 | bEner = (force_flags & GMX_FORCE_ENERGY(1<<9)); |
118 | |
119 | bEwald = EEL_FULL(iconst->eeltype)((((iconst->eeltype) == eelPME || (iconst->eeltype) == eelPMESWITCH || (iconst->eeltype) == eelPMEUSER || (iconst->eeltype ) == eelPMEUSERSWITCH || (iconst->eeltype) == eelP3M_AD) || (iconst->eeltype) == eelEWALD) || (iconst->eeltype) == eelPOISSON); |
120 | if (bEwald) |
121 | { |
122 | Ftab = iconst->tabq_coul_F; |
123 | } |
124 | |
125 | rcut2 = iconst->rcoulomb*iconst->rcoulomb; |
126 | rvdw2 = iconst->rvdw*iconst->rvdw; |
127 | |
128 | rlist2 = nbl->rlist*nbl->rlist; |
129 | |
130 | type = nbat->type; |
131 | facel = iconst->epsfac; |
132 | shiftvec = shift_vec[0]; |
133 | vdwparam = nbat->nbfp; |
134 | ntype = nbat->ntype; |
135 | |
136 | x = nbat->x; |
137 | |
138 | npair_tot = 0; |
139 | nhwu = 0; |
140 | nhwu_pruned = 0; |
141 | |
142 | for (n = 0; n < nbl->nsci; n++) |
143 | { |
144 | nbln = &nbl->sci[n]; |
145 | |
146 | ish3 = 3*nbln->shift; |
147 | shX = shiftvec[ish3]; |
148 | shY = shiftvec[ish3+1]; |
149 | shZ = shiftvec[ish3+2]; |
150 | cj4_ind0 = nbln->cj4_ind_start; |
151 | cj4_ind1 = nbln->cj4_ind_end; |
152 | sci = nbln->sci; |
153 | vctot = 0; |
154 | Vvdwtot = 0; |
155 | |
156 | if (nbln->shift == CENTRAL(((2*1 +1)*(2*1 +1)*(2*2 +1))/2) && |
157 | nbl->cj4[cj4_ind0].cj[0] == sci*NCL_PER_SUPERCL((2*2*2))) |
158 | { |
159 | /* we have the diagonal: |
160 | * add the charge self interaction energy term |
161 | */ |
162 | for (im = 0; im < NCL_PER_SUPERCL((2*2*2)); im++) |
163 | { |
164 | ci = sci*NCL_PER_SUPERCL((2*2*2)) + im; |
165 | for (ic = 0; ic < CL_SIZE(8); ic++) |
166 | { |
167 | ia = ci*CL_SIZE(8) + ic; |
168 | iq = x[ia*nbat->xstride+3]; |
169 | vctot += iq*iq; |
170 | } |
171 | } |
172 | if (!bEwald) |
173 | { |
174 | vctot *= -facel*0.5*iconst->c_rf; |
175 | } |
176 | else |
177 | { |
178 | /* last factor 1/sqrt(pi) */ |
179 | vctot *= -facel*iconst->ewaldcoeff_q*M_1_SQRTPI0.564189583547756; |
180 | } |
181 | } |
182 | |
183 | for (cj4_ind = cj4_ind0; (cj4_ind < cj4_ind1); cj4_ind++) |
184 | { |
185 | excl[0] = &nbl->excl[nbl->cj4[cj4_ind].imei[0].excl_ind]; |
186 | excl[1] = &nbl->excl[nbl->cj4[cj4_ind].imei[1].excl_ind]; |
187 | |
188 | for (jm = 0; jm < NBNXN_GPU_JGROUP_SIZE4; jm++) |
189 | { |
190 | cj = nbl->cj4[cj4_ind].cj[jm]; |
191 | |
192 | for (im = 0; im < NCL_PER_SUPERCL((2*2*2)); im++) |
193 | { |
194 | /* We're only using the first imask, |
195 | * but here imei[1].imask is identical. |
196 | */ |
197 | if ((nbl->cj4[cj4_ind].imei[0].imask >> (jm*NCL_PER_SUPERCL((2*2*2))+im)) & 1) |
198 | { |
199 | gmx_bool within_rlist; |
200 | |
201 | ci = sci*NCL_PER_SUPERCL((2*2*2)) + im; |
202 | |
203 | within_rlist = FALSE0; |
204 | npair = 0; |
205 | for (ic = 0; ic < CL_SIZE(8); ic++) |
206 | { |
207 | ia = ci*CL_SIZE(8) + ic; |
208 | |
209 | is = ia*nbat->xstride; |
210 | ifs = ia*nbat->fstride; |
211 | ix = shX + x[is+0]; |
212 | iy = shY + x[is+1]; |
213 | iz = shZ + x[is+2]; |
214 | iq = facel*x[is+3]; |
215 | nti = ntype*2*type[ia]; |
216 | |
217 | fix = 0; |
218 | fiy = 0; |
219 | fiz = 0; |
220 | |
221 | for (jc = 0; jc < CL_SIZE(8); jc++) |
222 | { |
223 | ja = cj*CL_SIZE(8) + jc; |
224 | |
225 | if (nbln->shift == CENTRAL(((2*1 +1)*(2*1 +1)*(2*2 +1))/2) && |
226 | ci == cj && ja <= ia) |
227 | { |
228 | continue; |
229 | } |
230 | |
231 | int_bit = ((excl[jc>>2]->pair[(jc & 3)*CL_SIZE(8)+ic] >> (jm*NCL_PER_SUPERCL((2*2*2))+im)) & 1); |
232 | |
233 | js = ja*nbat->xstride; |
234 | jfs = ja*nbat->fstride; |
235 | jx = x[js+0]; |
236 | jy = x[js+1]; |
237 | jz = x[js+2]; |
238 | dx = ix - jx; |
239 | dy = iy - jy; |
240 | dz = iz - jz; |
241 | rsq = dx*dx + dy*dy + dz*dz; |
242 | if (rsq < rlist2) |
243 | { |
244 | within_rlist = TRUE1; |
245 | } |
246 | if (rsq >= rcut2) |
247 | { |
248 | continue; |
249 | } |
250 | |
251 | if (type[ia] != ntype-1 && type[ja] != ntype-1) |
252 | { |
253 | npair++; |
254 | } |
255 | |
256 | /* avoid NaN for excluded pairs at r=0 */ |
257 | rsq += (1.0 - int_bit)*NBNXN_AVOID_SING_R2_INC1.0e-12f; |
258 | |
259 | rinv = gmx_invsqrt(rsq)gmx_software_invsqrt(rsq); |
260 | rinvsq = rinv*rinv; |
261 | fscal = 0; |
Value stored to 'fscal' is never read | |
262 | |
263 | qq = iq*x[js+3]; |
264 | if (!bEwald) |
265 | { |
266 | /* Reaction-field */ |
267 | krsq = iconst->k_rf*rsq; |
268 | fscal = qq*(int_bit*rinv - 2*krsq)*rinvsq; |
269 | if (bEner) |
270 | { |
271 | vcoul = qq*(int_bit*rinv + krsq - iconst->c_rf); |
272 | } |
273 | } |
274 | else |
275 | { |
276 | r = rsq*rinv; |
277 | rt = r*iconst->tabq_scale; |
278 | n0 = rt; |
279 | eps = rt - n0; |
280 | |
281 | fexcl = (1 - eps)*Ftab[n0] + eps*Ftab[n0+1]; |
282 | |
283 | fscal = qq*(int_bit*rinvsq - fexcl)*rinv; |
284 | |
285 | if (bEner) |
286 | { |
287 | vcoul = qq*((int_bit - gmx_erf(iconst->ewaldcoeff_q*r)gmx_erff(iconst->ewaldcoeff_q*r))*rinv - int_bit*iconst->sh_ewald); |
288 | } |
289 | } |
290 | |
291 | if (rsq < rvdw2) |
292 | { |
293 | tj = nti + 2*type[ja]; |
294 | |
295 | /* Vanilla Lennard-Jones cutoff */ |
296 | c6 = vdwparam[tj]; |
297 | c12 = vdwparam[tj+1]; |
298 | |
299 | rinvsix = int_bit*rinvsq*rinvsq*rinvsq; |
300 | Vvdw_disp = c6*rinvsix; |
301 | Vvdw_rep = c12*rinvsix*rinvsix; |
302 | fscal += (Vvdw_rep - Vvdw_disp)*rinvsq; |
303 | |
304 | if (bEner) |
305 | { |
306 | vctot += vcoul; |
307 | |
308 | Vvdwtot += |
309 | (Vvdw_rep - int_bit*c12*iconst->sh_invrc6*iconst->sh_invrc6)/12 - |
310 | (Vvdw_disp - int_bit*c6*iconst->sh_invrc6)/6; |
311 | } |
312 | } |
313 | |
314 | tx = fscal*dx; |
315 | ty = fscal*dy; |
316 | tz = fscal*dz; |
317 | fix = fix + tx; |
318 | fiy = fiy + ty; |
319 | fiz = fiz + tz; |
320 | f[jfs+0] -= tx; |
321 | f[jfs+1] -= ty; |
322 | f[jfs+2] -= tz; |
323 | } |
324 | |
325 | f[ifs+0] += fix; |
326 | f[ifs+1] += fiy; |
327 | f[ifs+2] += fiz; |
328 | fshift[ish3] = fshift[ish3] + fix; |
329 | fshift[ish3+1] = fshift[ish3+1] + fiy; |
330 | fshift[ish3+2] = fshift[ish3+2] + fiz; |
331 | |
332 | /* Count in half work-units. |
333 | * In CUDA one work-unit is 2 warps. |
334 | */ |
335 | if ((ic+1) % (CL_SIZE(8)/2) == 0) |
336 | { |
337 | npair_tot += npair; |
338 | |
339 | nhwu++; |
340 | if (within_rlist) |
341 | { |
342 | nhwu_pruned++; |
343 | } |
344 | |
345 | within_rlist = FALSE0; |
346 | npair = 0; |
347 | } |
348 | } |
349 | } |
350 | } |
351 | } |
352 | } |
353 | |
354 | if (bEner) |
355 | { |
356 | ggid = 0; |
357 | Vc[ggid] = Vc[ggid] + vctot; |
358 | Vvdw[ggid] = Vvdw[ggid] + Vvdwtot; |
359 | } |
360 | } |
361 | |
362 | if (debug) |
363 | { |
364 | fprintf(debug, "number of half %dx%d atom pairs: %d after pruning: %d fraction %4.2f\n", |
365 | nbl->na_ci, nbl->na_ci, |
366 | nhwu, nhwu_pruned, nhwu_pruned/(double)nhwu); |
367 | fprintf(debug, "generic kernel pair interactions: %d\n", |
368 | nhwu*nbl->na_ci/2*nbl->na_ci); |
369 | fprintf(debug, "generic kernel post-prune pair interactions: %d\n", |
370 | nhwu_pruned*nbl->na_ci/2*nbl->na_ci); |
371 | fprintf(debug, "generic kernel non-zero pair interactions: %d\n", |
372 | npair_tot); |
373 | fprintf(debug, "ratio non-zero/post-prune pair interactions: %4.2f\n", |
374 | npair_tot/(double)(nhwu_pruned*nbl->na_ci/2*nbl->na_ci)); |
375 | } |
376 | } |