Bug Summary

File:gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
Location:line 261, column 33
Description:Value stored to 'fscal' is never read

Annotated Source Code

1/*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
8 *
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
13 *
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 *
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
31 *
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
34 */
35#ifdef HAVE_CONFIG_H1
36#include <config.h>
37#endif
38
39#include <math.h>
40
41#include "types/simple.h"
42#include "gromacs/math/utilities.h"
43#include "gromacs/math/vec.h"
44#include "typedefs.h"
45#include "force.h"
46#include "nbnxn_kernel_gpu_ref.h"
47#include "../nbnxn_consts.h"
48#include "nbnxn_kernel_common.h"
49
50#define NCL_PER_SUPERCL((2*2*2)) (NBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER(2*2*2))
51#define CL_SIZE(8) (NBNXN_GPU_CLUSTER_SIZE8)
52
53void
54nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t *nbl,
55 const nbnxn_atomdata_t *nbat,
56 const interaction_const_t *iconst,
57 rvec *shift_vec,
58 int force_flags,
59 int clearF,
60 real * f,
61 real * fshift,
62 real * Vc,
63 real * Vvdw)
64{
65 const nbnxn_sci_t *nbln;
66 const real *x;
67 gmx_bool bEner;
68 gmx_bool bEwald;
69 const real *Ftab = NULL((void*)0);
70 real rcut2, rvdw2, rlist2;
71 int ntype;
72 real facel;
73 int n;
74 int ish3;
75 int sci;
76 int cj4_ind0, cj4_ind1, cj4_ind;
77 int ci, cj;
78 int ic, jc, ia, ja, is, ifs, js, jfs, im, jm;
79 int n0;
80 int ggid;
81 real shX, shY, shZ;
82 real fscal, tx, ty, tz;
83 real rinvsq;
84 real iq;
85 real qq, vcoul = 0, krsq, vctot;
86 int nti;
87 int tj;
88 real rt, r, eps;
89 real rinvsix;
90 real Vvdwtot;
91 real Vvdw_rep, Vvdw_disp;
92 real ix, iy, iz, fix, fiy, fiz;
93 real jx, jy, jz;
94 real dx, dy, dz, rsq, rinv;
95 int int_bit;
96 real fexcl;
97 real c6, c12, cexp1, cexp2, br;
98 const real * shiftvec;
99 real * vdwparam;
100 int * shift;
101 int * type;
102 const nbnxn_excl_t *excl[2];
103
104 int npair_tot, npair;
105 int nhwu, nhwu_pruned;
106
107 if (nbl->na_ci != CL_SIZE(8))
108 {
109 gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c"
, 109
, "The neighborlist cluster size in the GPU reference kernel is %d, expected it to be %d", nbl->na_ci, CL_SIZE(8));
110 }
111
112 if (clearF == enbvClearFYes)
113 {
114 clear_f(nbat, 0, f);
115 }
116
117 bEner = (force_flags & GMX_FORCE_ENERGY(1<<9));
118
119 bEwald = EEL_FULL(iconst->eeltype)((((iconst->eeltype) == eelPME || (iconst->eeltype) == eelPMESWITCH
|| (iconst->eeltype) == eelPMEUSER || (iconst->eeltype
) == eelPMEUSERSWITCH || (iconst->eeltype) == eelP3M_AD) ||
(iconst->eeltype) == eelEWALD) || (iconst->eeltype) ==
eelPOISSON)
;
120 if (bEwald)
121 {
122 Ftab = iconst->tabq_coul_F;
123 }
124
125 rcut2 = iconst->rcoulomb*iconst->rcoulomb;
126 rvdw2 = iconst->rvdw*iconst->rvdw;
127
128 rlist2 = nbl->rlist*nbl->rlist;
129
130 type = nbat->type;
131 facel = iconst->epsfac;
132 shiftvec = shift_vec[0];
133 vdwparam = nbat->nbfp;
134 ntype = nbat->ntype;
135
136 x = nbat->x;
137
138 npair_tot = 0;
139 nhwu = 0;
140 nhwu_pruned = 0;
141
142 for (n = 0; n < nbl->nsci; n++)
143 {
144 nbln = &nbl->sci[n];
145
146 ish3 = 3*nbln->shift;
147 shX = shiftvec[ish3];
148 shY = shiftvec[ish3+1];
149 shZ = shiftvec[ish3+2];
150 cj4_ind0 = nbln->cj4_ind_start;
151 cj4_ind1 = nbln->cj4_ind_end;
152 sci = nbln->sci;
153 vctot = 0;
154 Vvdwtot = 0;
155
156 if (nbln->shift == CENTRAL(((2*1 +1)*(2*1 +1)*(2*2 +1))/2) &&
157 nbl->cj4[cj4_ind0].cj[0] == sci*NCL_PER_SUPERCL((2*2*2)))
158 {
159 /* we have the diagonal:
160 * add the charge self interaction energy term
161 */
162 for (im = 0; im < NCL_PER_SUPERCL((2*2*2)); im++)
163 {
164 ci = sci*NCL_PER_SUPERCL((2*2*2)) + im;
165 for (ic = 0; ic < CL_SIZE(8); ic++)
166 {
167 ia = ci*CL_SIZE(8) + ic;
168 iq = x[ia*nbat->xstride+3];
169 vctot += iq*iq;
170 }
171 }
172 if (!bEwald)
173 {
174 vctot *= -facel*0.5*iconst->c_rf;
175 }
176 else
177 {
178 /* last factor 1/sqrt(pi) */
179 vctot *= -facel*iconst->ewaldcoeff_q*M_1_SQRTPI0.564189583547756;
180 }
181 }
182
183 for (cj4_ind = cj4_ind0; (cj4_ind < cj4_ind1); cj4_ind++)
184 {
185 excl[0] = &nbl->excl[nbl->cj4[cj4_ind].imei[0].excl_ind];
186 excl[1] = &nbl->excl[nbl->cj4[cj4_ind].imei[1].excl_ind];
187
188 for (jm = 0; jm < NBNXN_GPU_JGROUP_SIZE4; jm++)
189 {
190 cj = nbl->cj4[cj4_ind].cj[jm];
191
192 for (im = 0; im < NCL_PER_SUPERCL((2*2*2)); im++)
193 {
194 /* We're only using the first imask,
195 * but here imei[1].imask is identical.
196 */
197 if ((nbl->cj4[cj4_ind].imei[0].imask >> (jm*NCL_PER_SUPERCL((2*2*2))+im)) & 1)
198 {
199 gmx_bool within_rlist;
200
201 ci = sci*NCL_PER_SUPERCL((2*2*2)) + im;
202
203 within_rlist = FALSE0;
204 npair = 0;
205 for (ic = 0; ic < CL_SIZE(8); ic++)
206 {
207 ia = ci*CL_SIZE(8) + ic;
208
209 is = ia*nbat->xstride;
210 ifs = ia*nbat->fstride;
211 ix = shX + x[is+0];
212 iy = shY + x[is+1];
213 iz = shZ + x[is+2];
214 iq = facel*x[is+3];
215 nti = ntype*2*type[ia];
216
217 fix = 0;
218 fiy = 0;
219 fiz = 0;
220
221 for (jc = 0; jc < CL_SIZE(8); jc++)
222 {
223 ja = cj*CL_SIZE(8) + jc;
224
225 if (nbln->shift == CENTRAL(((2*1 +1)*(2*1 +1)*(2*2 +1))/2) &&
226 ci == cj && ja <= ia)
227 {
228 continue;
229 }
230
231 int_bit = ((excl[jc>>2]->pair[(jc & 3)*CL_SIZE(8)+ic] >> (jm*NCL_PER_SUPERCL((2*2*2))+im)) & 1);
232
233 js = ja*nbat->xstride;
234 jfs = ja*nbat->fstride;
235 jx = x[js+0];
236 jy = x[js+1];
237 jz = x[js+2];
238 dx = ix - jx;
239 dy = iy - jy;
240 dz = iz - jz;
241 rsq = dx*dx + dy*dy + dz*dz;
242 if (rsq < rlist2)
243 {
244 within_rlist = TRUE1;
245 }
246 if (rsq >= rcut2)
247 {
248 continue;
249 }
250
251 if (type[ia] != ntype-1 && type[ja] != ntype-1)
252 {
253 npair++;
254 }
255
256 /* avoid NaN for excluded pairs at r=0 */
257 rsq += (1.0 - int_bit)*NBNXN_AVOID_SING_R2_INC1.0e-12f;
258
259 rinv = gmx_invsqrt(rsq)gmx_software_invsqrt(rsq);
260 rinvsq = rinv*rinv;
261 fscal = 0;
Value stored to 'fscal' is never read
262
263 qq = iq*x[js+3];
264 if (!bEwald)
265 {
266 /* Reaction-field */
267 krsq = iconst->k_rf*rsq;
268 fscal = qq*(int_bit*rinv - 2*krsq)*rinvsq;
269 if (bEner)
270 {
271 vcoul = qq*(int_bit*rinv + krsq - iconst->c_rf);
272 }
273 }
274 else
275 {
276 r = rsq*rinv;
277 rt = r*iconst->tabq_scale;
278 n0 = rt;
279 eps = rt - n0;
280
281 fexcl = (1 - eps)*Ftab[n0] + eps*Ftab[n0+1];
282
283 fscal = qq*(int_bit*rinvsq - fexcl)*rinv;
284
285 if (bEner)
286 {
287 vcoul = qq*((int_bit - gmx_erf(iconst->ewaldcoeff_q*r)gmx_erff(iconst->ewaldcoeff_q*r))*rinv - int_bit*iconst->sh_ewald);
288 }
289 }
290
291 if (rsq < rvdw2)
292 {
293 tj = nti + 2*type[ja];
294
295 /* Vanilla Lennard-Jones cutoff */
296 c6 = vdwparam[tj];
297 c12 = vdwparam[tj+1];
298
299 rinvsix = int_bit*rinvsq*rinvsq*rinvsq;
300 Vvdw_disp = c6*rinvsix;
301 Vvdw_rep = c12*rinvsix*rinvsix;
302 fscal += (Vvdw_rep - Vvdw_disp)*rinvsq;
303
304 if (bEner)
305 {
306 vctot += vcoul;
307
308 Vvdwtot +=
309 (Vvdw_rep - int_bit*c12*iconst->sh_invrc6*iconst->sh_invrc6)/12 -
310 (Vvdw_disp - int_bit*c6*iconst->sh_invrc6)/6;
311 }
312 }
313
314 tx = fscal*dx;
315 ty = fscal*dy;
316 tz = fscal*dz;
317 fix = fix + tx;
318 fiy = fiy + ty;
319 fiz = fiz + tz;
320 f[jfs+0] -= tx;
321 f[jfs+1] -= ty;
322 f[jfs+2] -= tz;
323 }
324
325 f[ifs+0] += fix;
326 f[ifs+1] += fiy;
327 f[ifs+2] += fiz;
328 fshift[ish3] = fshift[ish3] + fix;
329 fshift[ish3+1] = fshift[ish3+1] + fiy;
330 fshift[ish3+2] = fshift[ish3+2] + fiz;
331
332 /* Count in half work-units.
333 * In CUDA one work-unit is 2 warps.
334 */
335 if ((ic+1) % (CL_SIZE(8)/2) == 0)
336 {
337 npair_tot += npair;
338
339 nhwu++;
340 if (within_rlist)
341 {
342 nhwu_pruned++;
343 }
344
345 within_rlist = FALSE0;
346 npair = 0;
347 }
348 }
349 }
350 }
351 }
352 }
353
354 if (bEner)
355 {
356 ggid = 0;
357 Vc[ggid] = Vc[ggid] + vctot;
358 Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
359 }
360 }
361
362 if (debug)
363 {
364 fprintf(debug, "number of half %dx%d atom pairs: %d after pruning: %d fraction %4.2f\n",
365 nbl->na_ci, nbl->na_ci,
366 nhwu, nhwu_pruned, nhwu_pruned/(double)nhwu);
367 fprintf(debug, "generic kernel pair interactions: %d\n",
368 nhwu*nbl->na_ci/2*nbl->na_ci);
369 fprintf(debug, "generic kernel post-prune pair interactions: %d\n",
370 nhwu_pruned*nbl->na_ci/2*nbl->na_ci);
371 fprintf(debug, "generic kernel non-zero pair interactions: %d\n",
372 npair_tot);
373 fprintf(debug, "ratio non-zero/post-prune pair interactions: %4.2f\n",
374 npair_tot/(double)(nhwu_pruned*nbl->na_ci/2*nbl->na_ci));
375 }
376}