/home/alexxy/Develop/gromacs/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu

Bug Summary

File:	gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
Location:	line 261, column 33
Description:	Value stored to 'fscal' is never read

Annotated Source Code

1	/*
2	* This file is part of the GROMACS molecular simulation package.
3	*
4	* Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5	* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6	* and including many others, as listed in the AUTHORS file in the
7	* top-level source directory and at http://www.gromacs.org.
8	*
9	* GROMACS is free software; you can redistribute it and/or
10	* modify it under the terms of the GNU Lesser General Public License
11	* as published by the Free Software Foundation; either version 2.1
12	* of the License, or (at your option) any later version.
13	*
14	* GROMACS is distributed in the hope that it will be useful,
15	* but WITHOUT ANY WARRANTY; without even the implied warranty of
16	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17	* Lesser General Public License for more details.
18	*
19	* You should have received a copy of the GNU Lesser General Public
20	* License along with GROMACS; if not, see
21	* http://www.gnu.org/licenses, or write to the Free Software Foundation,
22	* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23	*
24	* If you want to redistribute modifications to GROMACS, please
25	* consider that scientific software is very special. Version
26	* control is crucial - bugs must be traceable. We will be happy to
27	* consider code for inclusion in the official distribution, but
28	* derived work must not be called official GROMACS. Details are found
29	* in the README & COPYING files - if they are missing, get the
30	* official version at http://www.gromacs.org.
31	*
32	* To help us fund GROMACS development, we humbly ask that you cite
33	* the research papers on the package. Check out http://www.gromacs.org.
34	*/
35	#ifdef HAVE_CONFIG_H1
36	#include <config.h>
37	#endif
38
39	#include <math.h>
40
41	#include "types/simple.h"
42	#include "gromacs/math/utilities.h"
43	#include "gromacs/math/vec.h"
44	#include "typedefs.h"
45	#include "force.h"
46	#include "nbnxn_kernel_gpu_ref.h"
47	#include "../nbnxn_consts.h"
48	#include "nbnxn_kernel_common.h"
49
50	#define NCL_PER_SUPERCL((222)) (NBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER(222))
51	#define CL_SIZE(8) (NBNXN_GPU_CLUSTER_SIZE8)
52
53	void
54	nbnxn_kernel_gpu_ref(const nbnxn_pairlist_t *nbl,
55	const nbnxn_atomdata_t *nbat,
56	const interaction_const_t *iconst,
57	rvec *shift_vec,
58	int force_flags,
59	int clearF,
60	real * f,
61	real * fshift,
62	real * Vc,
63	real * Vvdw)
64	{
65	const nbnxn_sci_t *nbln;
66	const real *x;
67	gmx_bool bEner;
68	gmx_bool bEwald;
69	const real Ftab = NULL((void)0);
70	real rcut2, rvdw2, rlist2;
71	int ntype;
72	real facel;
73	int n;
74	int ish3;
75	int sci;
76	int cj4_ind0, cj4_ind1, cj4_ind;
77	int ci, cj;
78	int ic, jc, ia, ja, is, ifs, js, jfs, im, jm;
79	int n0;
80	int ggid;
81	real shX, shY, shZ;
82	real fscal, tx, ty, tz;
83	real rinvsq;
84	real iq;
85	real qq, vcoul = 0, krsq, vctot;
86	int nti;
87	int tj;
88	real rt, r, eps;
89	real rinvsix;
90	real Vvdwtot;
91	real Vvdw_rep, Vvdw_disp;
92	real ix, iy, iz, fix, fiy, fiz;
93	real jx, jy, jz;
94	real dx, dy, dz, rsq, rinv;
95	int int_bit;
96	real fexcl;
97	real c6, c12, cexp1, cexp2, br;
98	const real * shiftvec;
99	real * vdwparam;
100	int * shift;
101	int * type;
102	const nbnxn_excl_t *excl[2];
103
104	int npair_tot, npair;
105	int nhwu, nhwu_pruned;
106
107	if (nbl->na_ci != CL_SIZE(8))
108	{
109	gmx_fatal(FARGS0, "/home/alexxy/Develop/gromacs/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c" , 109, "The neighborlist cluster size in the GPU reference kernel is %d, expected it to be %d", nbl->na_ci, CL_SIZE(8));
110	}
111
112	if (clearF == enbvClearFYes)
113	{
114	clear_f(nbat, 0, f);
115	}
116
117	bEner = (force_flags & GMX_FORCE_ENERGY(1<<9));
118
119	bEwald = EEL_FULL(iconst->eeltype)((((iconst->eeltype) == eelPME \|\| (iconst->eeltype) == eelPMESWITCH \|\| (iconst->eeltype) == eelPMEUSER \|\| (iconst->eeltype ) == eelPMEUSERSWITCH \|\| (iconst->eeltype) == eelP3M_AD) \|\| (iconst->eeltype) == eelEWALD) \|\| (iconst->eeltype) == eelPOISSON);
120	if (bEwald)
121	{
122	Ftab = iconst->tabq_coul_F;
123	}
124
125	rcut2 = iconst->rcoulomb*iconst->rcoulomb;
126	rvdw2 = iconst->rvdw*iconst->rvdw;
127
128	rlist2 = nbl->rlist*nbl->rlist;
129
130	type = nbat->type;
131	facel = iconst->epsfac;
132	shiftvec = shift_vec[0];
133	vdwparam = nbat->nbfp;
134	ntype = nbat->ntype;
135
136	x = nbat->x;
137
138	npair_tot = 0;
139	nhwu = 0;
140	nhwu_pruned = 0;
141
142	for (n = 0; n < nbl->nsci; n++)
143	{
144	nbln = &nbl->sci[n];
145
146	ish3 = 3*nbln->shift;
147	shX = shiftvec[ish3];
148	shY = shiftvec[ish3+1];
149	shZ = shiftvec[ish3+2];
150	cj4_ind0 = nbln->cj4_ind_start;
151	cj4_ind1 = nbln->cj4_ind_end;
152	sci = nbln->sci;
153	vctot = 0;
154	Vvdwtot = 0;
155
156	if (nbln->shift == CENTRAL(((21 +1)(21 +1)(2*2 +1))/2) &&
157	nbl->cj4[cj4_ind0].cj[0] == sciNCL_PER_SUPERCL((22*2)))
158	{
159	/* we have the diagonal:
160	* add the charge self interaction energy term
161	*/
162	for (im = 0; im < NCL_PER_SUPERCL((222)); im++)
163	{
164	ci = sciNCL_PER_SUPERCL((22*2)) + im;
165	for (ic = 0; ic < CL_SIZE(8); ic++)
166	{
167	ia = ci*CL_SIZE(8) + ic;
168	iq = x[ia*nbat->xstride+3];
169	vctot += iq*iq;
170	}
171	}
172	if (!bEwald)
173	{
174	vctot = -facel0.5*iconst->c_rf;
175	}
176	else
177	{
178	/* last factor 1/sqrt(pi) */
179	vctot = -faceliconst->ewaldcoeff_q*M_1_SQRTPI0.564189583547756;
180	}
181	}
182
183	for (cj4_ind = cj4_ind0; (cj4_ind < cj4_ind1); cj4_ind++)
184	{
185	excl[0] = &nbl->excl[nbl->cj4[cj4_ind].imei[0].excl_ind];
186	excl[1] = &nbl->excl[nbl->cj4[cj4_ind].imei[1].excl_ind];
187
188	for (jm = 0; jm < NBNXN_GPU_JGROUP_SIZE4; jm++)
189	{
190	cj = nbl->cj4[cj4_ind].cj[jm];
191
192	for (im = 0; im < NCL_PER_SUPERCL((222)); im++)
193	{
194	/* We're only using the first imask,
195	* but here imei[1].imask is identical.
196	*/
197	if ((nbl->cj4[cj4_ind].imei[0].imask >> (jmNCL_PER_SUPERCL((22*2))+im)) & 1)
198	{
199	gmx_bool within_rlist;
200
201	ci = sciNCL_PER_SUPERCL((22*2)) + im;
202
203	within_rlist = FALSE0;
204	npair = 0;
205	for (ic = 0; ic < CL_SIZE(8); ic++)
206	{
207	ia = ci*CL_SIZE(8) + ic;
208
209	is = ia*nbat->xstride;
210	ifs = ia*nbat->fstride;
211	ix = shX + x[is+0];
212	iy = shY + x[is+1];
213	iz = shZ + x[is+2];
214	iq = facel*x[is+3];
215	nti = ntype2type[ia];
216
217	fix = 0;
218	fiy = 0;
219	fiz = 0;
220
221	for (jc = 0; jc < CL_SIZE(8); jc++)
222	{
223	ja = cj*CL_SIZE(8) + jc;
224
225	if (nbln->shift == CENTRAL(((21 +1)(21 +1)(2*2 +1))/2) &&
226	ci == cj && ja <= ia)
227	{
228	continue;
229	}
230
231	int_bit = ((excl[jc>>2]->pair[(jc & 3)CL_SIZE(8)+ic] >> (jmNCL_PER_SUPERCL((222))+im)) & 1);
232
233	js = ja*nbat->xstride;
234	jfs = ja*nbat->fstride;
235	jx = x[js+0];
236	jy = x[js+1];
237	jz = x[js+2];
238	dx = ix - jx;
239	dy = iy - jy;
240	dz = iz - jz;
241	rsq = dxdx + dydy + dz*dz;
242	if (rsq < rlist2)
243	{
244	within_rlist = TRUE1;
245	}
246	if (rsq >= rcut2)
247	{
248	continue;
249	}
250
251	if (type[ia] != ntype-1 && type[ja] != ntype-1)
252	{
253	npair++;
254	}
255
256	/* avoid NaN for excluded pairs at r=0 */
257	rsq += (1.0 - int_bit)*NBNXN_AVOID_SING_R2_INC1.0e-12f;
258
259	rinv = gmx_invsqrt(rsq)gmx_software_invsqrt(rsq);
260	rinvsq = rinv*rinv;
261	fscal = 0;
	Value stored to 'fscal' is never read
262
263	qq = iq*x[js+3];
264	if (!bEwald)
265	{
266	/* Reaction-field */
267	krsq = iconst->k_rf*rsq;
268	fscal = qq(int_bitrinv - 2krsq)rinvsq;
269	if (bEner)
270	{
271	vcoul = qq(int_bitrinv + krsq - iconst->c_rf);
272	}
273	}
274	else
275	{
276	r = rsq*rinv;
277	rt = r*iconst->tabq_scale;
278	n0 = rt;
279	eps = rt - n0;
280
281	fexcl = (1 - eps)Ftab[n0] + epsFtab[n0+1];
282
283	fscal = qq(int_bitrinvsq - fexcl)*rinv;
284
285	if (bEner)
286	{
287	vcoul = qq((int_bit - gmx_erf(iconst->ewaldcoeff_qr)gmx_erff(iconst->ewaldcoeff_qr))rinv - int_bit*iconst->sh_ewald);
288	}
289	}
290
291	if (rsq < rvdw2)
292	{
293	tj = nti + 2*type[ja];
294
295	/* Vanilla Lennard-Jones cutoff */
296	c6 = vdwparam[tj];
297	c12 = vdwparam[tj+1];
298
299	rinvsix = int_bitrinvsqrinvsq*rinvsq;
300	Vvdw_disp = c6*rinvsix;
301	Vvdw_rep = c12rinvsixrinvsix;
302	fscal += (Vvdw_rep - Vvdw_disp)*rinvsq;
303
304	if (bEner)
305	{
306	vctot += vcoul;
307
308	Vvdwtot +=
309	(Vvdw_rep - int_bitc12iconst->sh_invrc6*iconst->sh_invrc6)/12 -
310	(Vvdw_disp - int_bitc6iconst->sh_invrc6)/6;
311	}
312	}
313
314	tx = fscal*dx;
315	ty = fscal*dy;
316	tz = fscal*dz;
317	fix = fix + tx;
318	fiy = fiy + ty;
319	fiz = fiz + tz;
320	f[jfs+0] -= tx;
321	f[jfs+1] -= ty;
322	f[jfs+2] -= tz;
323	}
324
325	f[ifs+0] += fix;
326	f[ifs+1] += fiy;
327	f[ifs+2] += fiz;
328	fshift[ish3] = fshift[ish3] + fix;
329	fshift[ish3+1] = fshift[ish3+1] + fiy;
330	fshift[ish3+2] = fshift[ish3+2] + fiz;
331
332	/* Count in half work-units.
333	* In CUDA one work-unit is 2 warps.
334	*/
335	if ((ic+1) % (CL_SIZE(8)/2) == 0)
336	{
337	npair_tot += npair;
338
339	nhwu++;
340	if (within_rlist)
341	{
342	nhwu_pruned++;
343	}
344
345	within_rlist = FALSE0;
346	npair = 0;
347	}
348	}
349	}
350	}
351	}
352	}
353
354	if (bEner)
355	{
356	ggid = 0;
357	Vc[ggid] = Vc[ggid] + vctot;
358	Vvdw[ggid] = Vvdw[ggid] + Vvdwtot;
359	}
360	}
361
362	if (debug)
363	{
364	fprintf(debug, "number of half %dx%d atom pairs: %d after pruning: %d fraction %4.2f\n",
365	nbl->na_ci, nbl->na_ci,
366	nhwu, nhwu_pruned, nhwu_pruned/(double)nhwu);
367	fprintf(debug, "generic kernel pair interactions: %d\n",
368	nhwunbl->na_ci/2nbl->na_ci);
369	fprintf(debug, "generic kernel post-prune pair interactions: %d\n",
370	nhwu_prunednbl->na_ci/2nbl->na_ci);
371	fprintf(debug, "generic kernel non-zero pair interactions: %d\n",
372	npair_tot);
373	fprintf(debug, "ratio non-zero/post-prune pair interactions: %4.2f\n",
374	npair_tot/(double)(nhwu_prunednbl->na_ci/2nbl->na_ci));
375	}
376	}