/home/alexxy/Develop/gromacs/src/gromacs/mdlib/perf

Bug Summary

File:	gromacs/mdlib/perf_est.c
Location:	line 299, column 9
Description:	Value stored to 'a' is never read

Annotated Source Code

1	/*
2	* This file is part of the GROMACS molecular simulation package.
3	*
4	* Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5	* Copyright (c) 2001-2008, The GROMACS development team.
6	* Copyright (c) 2012,2014, by the GROMACS development team, led by
7	* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8	* and including many others, as listed in the AUTHORS file in the
9	* top-level source directory and at http://www.gromacs.org.
10	*
11	* GROMACS is free software; you can redistribute it and/or
12	* modify it under the terms of the GNU Lesser General Public License
13	* as published by the Free Software Foundation; either version 2.1
14	* of the License, or (at your option) any later version.
15	*
16	* GROMACS is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19	* Lesser General Public License for more details.
20	*
21	* You should have received a copy of the GNU Lesser General Public
22	* License along with GROMACS; if not, see
23	* http://www.gnu.org/licenses, or write to the Free Software Foundation,
24	* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25	*
26	* If you want to redistribute modifications to GROMACS, please
27	* consider that scientific software is very special. Version
28	* control is crucial - bugs must be traceable. We will be happy to
29	* consider code for inclusion in the official distribution, but
30	* derived work must not be called official GROMACS. Details are found
31	* in the README & COPYING files - if they are missing, get the
32	* official version at http://www.gromacs.org.
33	*
34	* To help us fund GROMACS development, we humbly ask that you cite
35	* the research papers on the package. Check out http://www.gromacs.org.
36	*/
37	#ifdef HAVE_CONFIG_H1
38	#include <config.h>
39	#endif
40
41	#include <math.h>
42
43	#include "perf_est.h"
44	#include "physics.h"
45	#include "gromacs/math/vec.h"
46	#include "mtop_util.h"
47	#include "types/commrec.h"
48	#include "nbnxn_search.h"
49	#include "nbnxn_consts.h"
50
51
52	/* Computational cost of bonded, non-bonded and PME calculations.
53	* This will be machine dependent.
54	* The numbers here are accurate for Intel Core2 and AMD Athlon 64
55	* in single precision. In double precision PME mesh is slightly cheaper,
56	* although not so much that the numbers need to be adjusted.
57	*/
58
59	/* Cost of a pair interaction in the "group" cut-off scheme */
60	#define C_GR_FQ1.5 1.5
61	#define C_GR_QLJ_CUT1.5 1.5
62	#define C_GR_QLJ_TAB2.0 2.0
63	#define C_GR_LJ_CUT1.0 1.0
64	#define C_GR_LJ_TAB1.75 1.75
65	/* Cost of 1 water with one Q/LJ atom */
66	#define C_GR_QLJW_CUT2.0 2.0
67	#define C_GR_QLJW_TAB2.25 2.25
68	/* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
69	#define C_GR_QW1.75 1.75
70
71	/* Cost of a pair interaction in the "Verlet" cut-off scheme, QEXP is Ewald */
72	#define C_VT_LJ0.30 0.30
73	#define C_VT_QRF_LJ0.40 0.40
74	#define C_VT_QRF0.30 0.30
75	#define C_VT_QEXP_LJ0.55 0.55
76	#define C_VT_QEXP0.50 0.50
77	/* Extra cost for expensive LJ interaction, e.g. pot-switch or LJ-PME */
78	#define C_VT_LJEXP_ADD0.20 0.20
79
80	/* Cost of PME, with all components running with SSE instructions */
81	/* Cost of particle reordering and redistribution */
82	#define C_PME_REDIST12.0 12.0
83	/* Cost of q spreading and force interpolation per charge (mainly memory) */
84	#define C_PME_SPREAD0.30 0.30
85	/* Cost of fft's, will be multiplied with N log(N) */
86	#define C_PME_FFT0.20 0.20
87	/* Cost of pme_solve, will be multiplied with N */
88	#define C_PME_SOLVE0.50 0.50
89
90	/* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
91	#define C_BOND5.0 5.0
92
93	int n_bonded_dx(gmx_mtop_t *mtop, gmx_bool bExcl)
94	{
95	int mb, nmol, ftype, ndxb, ndx_excl;
96	int ndx;
97	gmx_moltype_t *molt;
98
99	/* Count the number of pbc_rvec_sub calls required for bonded interactions.
100	* This number is also roughly proportional to the computational cost.
101	*/
102	ndx = 0;
103	ndx_excl = 0;
104	for (mb = 0; mb < mtop->nmolblock; mb++)
105	{
106	molt = &mtop->moltype[mtop->molblock[mb].type];
107	nmol = mtop->molblock[mb].nmol;
108	for (ftype = 0; ftype < F_NRE; ftype++)
109	{
110	if (interaction_function[ftype].flags & IF_BOND1)
111	{
112	switch (ftype)
113	{
114	case F_POSRES:
115	case F_FBPOSRES: ndxb = 1; break;
116	case F_CONNBONDS: ndxb = 0; break;
117	default: ndxb = NRAL(ftype)(interaction_function[(ftype)].nratoms) - 1; break;
118	}
119	ndx += nmolndxbmolt->ilist[ftype].nr/(1 + NRAL(ftype)(interaction_function[(ftype)].nratoms));
120	}
121	}
122	if (bExcl)
123	{
124	ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
125	}
126	else
127	{
128	ndx_excl = 0;
129	}
130	}
131
132	if (debug)
133	{
134	fprintf(debug, "ndx bonded %d exclusions %d\n", ndx, ndx_excl);
135	}
136
137	ndx += ndx_excl;
138
139	return ndx;
140	}
141
142	static void pp_group_load(gmx_mtop_t mtop, t_inputrec ir, matrix box,
143	int nq_tot, int nlj_tot,
144	double *cost_pp,
145	gmx_bool bChargePerturbed, gmx_bool bTypePerturbed)
146	{
147	t_atom *atom;
148	int mb, nmol, atnr, cg, a, a0, ncqlj, ncq, nclj;
149	gmx_bool bBHAM, bLJcut, bWater, bQ, bLJ;
150	int nw, nqlj, nq, nlj;
151	float fq, fqlj, flj, fljtab, fqljw, fqw;
152	t_iparams *iparams;
153	gmx_moltype_t *molt;
154
155	bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
156
157	bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
158
159	/* Computational cost of bonded, non-bonded and PME calculations.
160	* This will be machine dependent.
161	* The numbers here are accurate for Intel Core2 and AMD Athlon 64
162	* in single precision. In double precision PME mesh is slightly cheaper,
163	* although not so much that the numbers need to be adjusted.
164	*/
165	fq = C_GR_FQ1.5;
166	fqlj = (bLJcut ? C_GR_QLJ_CUT1.5 : C_GR_QLJ_TAB2.0);
167	flj = (bLJcut ? C_GR_LJ_CUT1.0 : C_GR_LJ_TAB1.75);
168	/* Cost of 1 water with one Q/LJ atom */
169	fqljw = (bLJcut ? C_GR_QLJW_CUT2.0 : C_GR_QLJW_TAB2.25);
170	/* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
171	fqw = C_GR_QW1.75;
172
173	iparams = mtop->ffparams.iparams;
174	atnr = mtop->ffparams.atnr;
175	nw = 0;
176	nqlj = 0;
177	nq = 0;
178	nlj = 0;
179	*bChargePerturbed = FALSE0;
180	for (mb = 0; mb < mtop->nmolblock; mb++)
181	{
182	molt = &mtop->moltype[mtop->molblock[mb].type];
183	atom = molt->atoms.atom;
184	nmol = mtop->molblock[mb].nmol;
185	a = 0;
186	for (cg = 0; cg < molt->cgs.nr; cg++)
187	{
188	bWater = !bBHAM;
189	ncqlj = 0;
190	ncq = 0;
191	nclj = 0;
192	a0 = a;
193	while (a < molt->cgs.index[cg+1])
194	{
195	bQ = (atom[a].q != 0 \|\| atom[a].qB != 0);
196	bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 \|\|
197	iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
198	if (atom[a].q != atom[a].qB)
199	{
200	*bChargePerturbed = TRUE1;
201	}
202	if (atom[a].type != atom[a].typeB)
203	{
204	*bTypePerturbed = TRUE1;
205	}
206	/* This if this atom fits into water optimization */
207	if (!((a == a0 && bQ && bLJ) \|\|
208	(a == a0+1 && bQ && !bLJ) \|\|
209	(a == a0+2 && bQ && !bLJ && atom[a].q == atom[a-1].q) \|\|
210	(a == a0+3 && !bQ && bLJ)))
211	{
212	bWater = FALSE0;
213	}
214	if (bQ && bLJ)
215	{
216	ncqlj++;
217	}
218	else
219	{
220	if (bQ)
221	{
222	ncq++;
223	}
224	if (bLJ)
225	{
226	nclj++;
227	}
228	}
229	a++;
230	}
231	if (bWater)
232	{
233	nw += nmol;
234	}
235	else
236	{
237	nqlj += nmol*ncqlj;
238	nq += nmol*ncq;
239	nlj += nmol*nclj;
240	}
241	}
242	}
243
244	nq_tot = nq + nqlj + nw3;
245	*nlj_tot = nlj + nqlj + nw;
246
247	if (debug)
248	{
249	fprintf(debug, "nw %d nqlj %d nq %d nlj %d\n", nw, nqlj, nq, nlj);
250	}
251
252	/* For the PP non-bonded cost it is (unrealistically) assumed
253	* that all atoms are distributed homogeneously in space.
254	* Factor 3 is used because a water molecule has 3 atoms
255	* (and TIP4P effectively has 3 interactions with (water) atoms)).
256	*/
257	cost_pp = 0.5(fqljwnwnqlj +
258	fqw nw(3*nw + nq) +
259	fqlj nqljnqlj +
260	fq nq(3*nw + nqlj + nq) +
261	flj nlj(nw + nqlj + nlj))
262	4/3M_PI3.14159265358979323846ir->rlistir->rlist*ir->rlist/det(box);
263	}
264
265	static void pp_verlet_load(gmx_mtop_t mtop, t_inputrec ir, matrix box,
266	int nq_tot, int nlj_tot,
267	double *cost_pp,
268	gmx_bool bChargePerturbed, gmx_bool bTypePerturbed)
269	{
270	t_atom *atom;
271	int mb, nmol, atnr, cg, a, a0, nqlj, nq, nlj;
272	gmx_bool bQRF;
273	t_iparams *iparams;
274	gmx_moltype_t *molt;
275	real r_eff;
276	double c_qlj, c_q, c_lj;
277	double nat;
278	/* Conversion factor for reference vs SIMD kernel performance.
279	* The factor is about right for SSE2/4, but should be 2 higher for AVX256.
280	*/
281	#ifdef GMX_DOUBLE
282	const real nbnxn_refkernel_fac = 4.0;
283	#else
284	const real nbnxn_refkernel_fac = 8.0;
285	#endif
286
287	bQRF = (EEL_RF(ir->coulombtype)((ir->coulombtype) == eelRF \|\| (ir->coulombtype) == eelGRF \|\| (ir->coulombtype) == eelRF_NEC \|\| (ir->coulombtype) == eelRF_ZERO ) \|\| ir->coulombtype == eelCUT);
288
289	iparams = mtop->ffparams.iparams;
290	atnr = mtop->ffparams.atnr;
291	nqlj = 0;
292	nq = 0;
293	*bChargePerturbed = FALSE0;
294	for (mb = 0; mb < mtop->nmolblock; mb++)
295	{
296	molt = &mtop->moltype[mtop->molblock[mb].type];
297	atom = molt->atoms.atom;
298	nmol = mtop->molblock[mb].nmol;
299	a = 0;
	Value stored to 'a' is never read
300	for (a = 0; a < molt->atoms.nr; a++)
301	{
302	if (atom[a].q != 0 \|\| atom[a].qB != 0)
303	{
304	if (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 \|\|
305	iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
306	{
307	nqlj += nmol;
308	}
309	else
310	{
311	nq += nmol;
312	}
313	}
314	if (atom[a].q != atom[a].qB)
315	{
316	*bChargePerturbed = TRUE1;
317	}
318	if (atom[a].type != atom[a].typeB)
319	{
320	*bTypePerturbed = TRUE1;
321	}
322	}
323	}
324
325	nlj = mtop->natoms - nqlj - nq;
326
327	*nq_tot = nqlj + nq;
328	*nlj_tot = nqlj + nlj;
329
330	/* Effective cut-off for cluster pair list of 4x4 atoms */
331	r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE4, mtop->natoms/det(box));
332
333	if (debug)
334	{
335	fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
336	nqlj, nq, nlj, ir->rlist, r_eff);
337	}
338
339	/* Determine the cost per pair interaction */
340	c_qlj = (bQRF ? C_VT_QRF_LJ0.40 : C_VT_QEXP_LJ0.55);
341	c_q = (bQRF ? C_VT_QRF0.30 : C_VT_QEXP0.50);
342	c_lj = C_VT_LJ0.30;
343	if (ir->vdw_modifier == eintmodPOTSWITCH \|\| EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME))
344	{
345	c_qlj += C_VT_LJEXP_ADD0.20;
346	c_lj += C_VT_LJEXP_ADD0.20;
347	}
348	if (EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME) && ir->ljpme_combination_rule == eljpmeLB)
349	{
350	/* We don't have LJ-PME LB comb. rule kernels, we use slow kernels */
351	c_qlj *= nbnxn_refkernel_fac;
352	c_q *= nbnxn_refkernel_fac;
353	c_lj *= nbnxn_refkernel_fac;
354	}
355
356	/* For the PP non-bonded cost it is (unrealistically) assumed
357	* that all atoms are distributed homogeneously in space.
358	*/
359	/* Convert mtop->natoms to double to avoid int overflow */
360	nat = mtop->natoms;
361	cost_pp = 0.5nat(nqljc_qlj + nqc_q + nljc_lj)
362	4/3M_PI3.14159265358979323846r_effr_eff*r_eff/det(box);
363	}
364
365	float pme_load_estimate(gmx_mtop_t mtop, t_inputrec ir, matrix box)
366	{
367	t_atom *atom;
368	int mb, nmol, atnr, cg, a, a0, nq_tot, nlj_tot, f;
369	gmx_bool bBHAM, bLJcut, bChargePerturbed, bTypePerturbed;
370	gmx_bool bWater, bQ, bLJ;
371	double cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;
372	float ratio;
373	t_iparams *iparams;
374	gmx_moltype_t *molt;
375
376	/* Computational cost of bonded, non-bonded and PME calculations.
377	* This will be machine dependent.
378	* The numbers here are accurate for Intel Core2 and AMD Athlon 64
379	* in single precision. In double precision PME mesh is slightly cheaper,
380	* although not so much that the numbers need to be adjusted.
381	*/
382
383	iparams = mtop->ffparams.iparams;
384	atnr = mtop->ffparams.atnr;
385
386	cost_bond = C_BOND5.0*n_bonded_dx(mtop, TRUE1);
387
388	if (ir->cutoff_scheme == ecutsGROUP)
389	{
390	pp_group_load(mtop, ir, box,
391	&nq_tot, &nlj_tot, &cost_pp,
392	&bChargePerturbed, &bTypePerturbed);
393	}
394	else
395	{
396	pp_verlet_load(mtop, ir, box,
397	&nq_tot, &nlj_tot, &cost_pp,
398	&bChargePerturbed, &bTypePerturbed);
399	}
400
401	cost_redist = 0;
402	cost_spread = 0;
403	cost_fft = 0;
404	cost_solve = 0;
405
406	if (EEL_PME(ir->coulombtype)((ir->coulombtype) == eelPME \|\| (ir->coulombtype) == eelPMESWITCH \|\| (ir->coulombtype) == eelPMEUSER \|\| (ir->coulombtype ) == eelPMEUSERSWITCH \|\| (ir->coulombtype) == eelP3M_AD))
407	{
408	f = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1);
409	cost_redist += C_PME_REDIST12.0*nq_tot;
410	cost_spread += fC_PME_SPREAD0.30nq_tot*pow(ir->pme_order, 3);
411	cost_fft += fC_PME_FFT0.20ir->nkxir->nkyir->nkzlog(ir->nkxir->nky*ir->nkz);
412	cost_solve += fC_PME_SOLVE0.50ir->nkxir->nkyir->nkz;
413	}
414
415	if (EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME))
416	{
417	f = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1);
418	if (ir->ljpme_combination_rule == eljpmeLB)
419	{
420	/* LB combination rule: we have 7 mesh terms */
421	f *= 7;
422	}
423	cost_redist += C_PME_REDIST12.0*nlj_tot;
424	cost_spread += fC_PME_SPREAD0.30nlj_tot*pow(ir->pme_order, 3);
425	cost_fft += fC_PME_FFT0.20ir->nkxir->nkyir->nkzlog(ir->nkxir->nky*ir->nkz);
426	cost_solve += fC_PME_SOLVE0.50ir->nkxir->nkyir->nkz;
427	}
428
429	cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
430
431	ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
432
433	if (debug)
434	{
435	fprintf(debug,
436	"cost_bond %f\n"
437	"cost_pp %f\n"
438	"cost_redist %f\n"
439	"cost_spread %f\n"
440	"cost_fft %f\n"
441	"cost_solve %f\n",
442	cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);
443
444	fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);
445	}
446
447	return ratio;
448	}