Bug Summary

File:gromacs/mdlib/perf_est.c
Location:line 299, column 9
Description:Value stored to 'a' is never read

Annotated Source Code

1/*
2 * This file is part of the GROMACS molecular simulation package.
3 *
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2008, The GROMACS development team.
6 * Copyright (c) 2012,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
10 *
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
15 *
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 *
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
33 *
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
36 */
37#ifdef HAVE_CONFIG_H1
38#include <config.h>
39#endif
40
41#include <math.h>
42
43#include "perf_est.h"
44#include "physics.h"
45#include "gromacs/math/vec.h"
46#include "mtop_util.h"
47#include "types/commrec.h"
48#include "nbnxn_search.h"
49#include "nbnxn_consts.h"
50
51
52/* Computational cost of bonded, non-bonded and PME calculations.
53 * This will be machine dependent.
54 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
55 * in single precision. In double precision PME mesh is slightly cheaper,
56 * although not so much that the numbers need to be adjusted.
57 */
58
59/* Cost of a pair interaction in the "group" cut-off scheme */
60#define C_GR_FQ1.5 1.5
61#define C_GR_QLJ_CUT1.5 1.5
62#define C_GR_QLJ_TAB2.0 2.0
63#define C_GR_LJ_CUT1.0 1.0
64#define C_GR_LJ_TAB1.75 1.75
65/* Cost of 1 water with one Q/LJ atom */
66#define C_GR_QLJW_CUT2.0 2.0
67#define C_GR_QLJW_TAB2.25 2.25
68/* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
69#define C_GR_QW1.75 1.75
70
71/* Cost of a pair interaction in the "Verlet" cut-off scheme, QEXP is Ewald */
72#define C_VT_LJ0.30 0.30
73#define C_VT_QRF_LJ0.40 0.40
74#define C_VT_QRF0.30 0.30
75#define C_VT_QEXP_LJ0.55 0.55
76#define C_VT_QEXP0.50 0.50
77/* Extra cost for expensive LJ interaction, e.g. pot-switch or LJ-PME */
78#define C_VT_LJEXP_ADD0.20 0.20
79
80/* Cost of PME, with all components running with SSE instructions */
81/* Cost of particle reordering and redistribution */
82#define C_PME_REDIST12.0 12.0
83/* Cost of q spreading and force interpolation per charge (mainly memory) */
84#define C_PME_SPREAD0.30 0.30
85/* Cost of fft's, will be multiplied with N log(N) */
86#define C_PME_FFT0.20 0.20
87/* Cost of pme_solve, will be multiplied with N */
88#define C_PME_SOLVE0.50 0.50
89
90/* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
91#define C_BOND5.0 5.0
92
93int n_bonded_dx(gmx_mtop_t *mtop, gmx_bool bExcl)
94{
95 int mb, nmol, ftype, ndxb, ndx_excl;
96 int ndx;
97 gmx_moltype_t *molt;
98
99 /* Count the number of pbc_rvec_sub calls required for bonded interactions.
100 * This number is also roughly proportional to the computational cost.
101 */
102 ndx = 0;
103 ndx_excl = 0;
104 for (mb = 0; mb < mtop->nmolblock; mb++)
105 {
106 molt = &mtop->moltype[mtop->molblock[mb].type];
107 nmol = mtop->molblock[mb].nmol;
108 for (ftype = 0; ftype < F_NRE; ftype++)
109 {
110 if (interaction_function[ftype].flags & IF_BOND1)
111 {
112 switch (ftype)
113 {
114 case F_POSRES:
115 case F_FBPOSRES: ndxb = 1; break;
116 case F_CONNBONDS: ndxb = 0; break;
117 default: ndxb = NRAL(ftype)(interaction_function[(ftype)].nratoms) - 1; break;
118 }
119 ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype)(interaction_function[(ftype)].nratoms));
120 }
121 }
122 if (bExcl)
123 {
124 ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
125 }
126 else
127 {
128 ndx_excl = 0;
129 }
130 }
131
132 if (debug)
133 {
134 fprintf(debug, "ndx bonded %d exclusions %d\n", ndx, ndx_excl);
135 }
136
137 ndx += ndx_excl;
138
139 return ndx;
140}
141
142static void pp_group_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
143 int *nq_tot, int *nlj_tot,
144 double *cost_pp,
145 gmx_bool *bChargePerturbed, gmx_bool *bTypePerturbed)
146{
147 t_atom *atom;
148 int mb, nmol, atnr, cg, a, a0, ncqlj, ncq, nclj;
149 gmx_bool bBHAM, bLJcut, bWater, bQ, bLJ;
150 int nw, nqlj, nq, nlj;
151 float fq, fqlj, flj, fljtab, fqljw, fqw;
152 t_iparams *iparams;
153 gmx_moltype_t *molt;
154
155 bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
156
157 bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
158
159 /* Computational cost of bonded, non-bonded and PME calculations.
160 * This will be machine dependent.
161 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
162 * in single precision. In double precision PME mesh is slightly cheaper,
163 * although not so much that the numbers need to be adjusted.
164 */
165 fq = C_GR_FQ1.5;
166 fqlj = (bLJcut ? C_GR_QLJ_CUT1.5 : C_GR_QLJ_TAB2.0);
167 flj = (bLJcut ? C_GR_LJ_CUT1.0 : C_GR_LJ_TAB1.75);
168 /* Cost of 1 water with one Q/LJ atom */
169 fqljw = (bLJcut ? C_GR_QLJW_CUT2.0 : C_GR_QLJW_TAB2.25);
170 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
171 fqw = C_GR_QW1.75;
172
173 iparams = mtop->ffparams.iparams;
174 atnr = mtop->ffparams.atnr;
175 nw = 0;
176 nqlj = 0;
177 nq = 0;
178 nlj = 0;
179 *bChargePerturbed = FALSE0;
180 for (mb = 0; mb < mtop->nmolblock; mb++)
181 {
182 molt = &mtop->moltype[mtop->molblock[mb].type];
183 atom = molt->atoms.atom;
184 nmol = mtop->molblock[mb].nmol;
185 a = 0;
186 for (cg = 0; cg < molt->cgs.nr; cg++)
187 {
188 bWater = !bBHAM;
189 ncqlj = 0;
190 ncq = 0;
191 nclj = 0;
192 a0 = a;
193 while (a < molt->cgs.index[cg+1])
194 {
195 bQ = (atom[a].q != 0 || atom[a].qB != 0);
196 bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 ||
197 iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
198 if (atom[a].q != atom[a].qB)
199 {
200 *bChargePerturbed = TRUE1;
201 }
202 if (atom[a].type != atom[a].typeB)
203 {
204 *bTypePerturbed = TRUE1;
205 }
206 /* This if this atom fits into water optimization */
207 if (!((a == a0 && bQ && bLJ) ||
208 (a == a0+1 && bQ && !bLJ) ||
209 (a == a0+2 && bQ && !bLJ && atom[a].q == atom[a-1].q) ||
210 (a == a0+3 && !bQ && bLJ)))
211 {
212 bWater = FALSE0;
213 }
214 if (bQ && bLJ)
215 {
216 ncqlj++;
217 }
218 else
219 {
220 if (bQ)
221 {
222 ncq++;
223 }
224 if (bLJ)
225 {
226 nclj++;
227 }
228 }
229 a++;
230 }
231 if (bWater)
232 {
233 nw += nmol;
234 }
235 else
236 {
237 nqlj += nmol*ncqlj;
238 nq += nmol*ncq;
239 nlj += nmol*nclj;
240 }
241 }
242 }
243
244 *nq_tot = nq + nqlj + nw*3;
245 *nlj_tot = nlj + nqlj + nw;
246
247 if (debug)
248 {
249 fprintf(debug, "nw %d nqlj %d nq %d nlj %d\n", nw, nqlj, nq, nlj);
250 }
251
252 /* For the PP non-bonded cost it is (unrealistically) assumed
253 * that all atoms are distributed homogeneously in space.
254 * Factor 3 is used because a water molecule has 3 atoms
255 * (and TIP4P effectively has 3 interactions with (water) atoms)).
256 */
257 *cost_pp = 0.5*(fqljw*nw*nqlj +
258 fqw *nw*(3*nw + nq) +
259 fqlj *nqlj*nqlj +
260 fq *nq*(3*nw + nqlj + nq) +
261 flj *nlj*(nw + nqlj + nlj))
262 *4/3*M_PI3.14159265358979323846*ir->rlist*ir->rlist*ir->rlist/det(box);
263}
264
265static void pp_verlet_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
266 int *nq_tot, int *nlj_tot,
267 double *cost_pp,
268 gmx_bool *bChargePerturbed, gmx_bool *bTypePerturbed)
269{
270 t_atom *atom;
271 int mb, nmol, atnr, cg, a, a0, nqlj, nq, nlj;
272 gmx_bool bQRF;
273 t_iparams *iparams;
274 gmx_moltype_t *molt;
275 real r_eff;
276 double c_qlj, c_q, c_lj;
277 double nat;
278 /* Conversion factor for reference vs SIMD kernel performance.
279 * The factor is about right for SSE2/4, but should be 2 higher for AVX256.
280 */
281#ifdef GMX_DOUBLE
282 const real nbnxn_refkernel_fac = 4.0;
283#else
284 const real nbnxn_refkernel_fac = 8.0;
285#endif
286
287 bQRF = (EEL_RF(ir->coulombtype)((ir->coulombtype) == eelRF || (ir->coulombtype) == eelGRF
|| (ir->coulombtype) == eelRF_NEC || (ir->coulombtype)
== eelRF_ZERO )
|| ir->coulombtype == eelCUT);
288
289 iparams = mtop->ffparams.iparams;
290 atnr = mtop->ffparams.atnr;
291 nqlj = 0;
292 nq = 0;
293 *bChargePerturbed = FALSE0;
294 for (mb = 0; mb < mtop->nmolblock; mb++)
295 {
296 molt = &mtop->moltype[mtop->molblock[mb].type];
297 atom = molt->atoms.atom;
298 nmol = mtop->molblock[mb].nmol;
299 a = 0;
Value stored to 'a' is never read
300 for (a = 0; a < molt->atoms.nr; a++)
301 {
302 if (atom[a].q != 0 || atom[a].qB != 0)
303 {
304 if (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 ||
305 iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
306 {
307 nqlj += nmol;
308 }
309 else
310 {
311 nq += nmol;
312 }
313 }
314 if (atom[a].q != atom[a].qB)
315 {
316 *bChargePerturbed = TRUE1;
317 }
318 if (atom[a].type != atom[a].typeB)
319 {
320 *bTypePerturbed = TRUE1;
321 }
322 }
323 }
324
325 nlj = mtop->natoms - nqlj - nq;
326
327 *nq_tot = nqlj + nq;
328 *nlj_tot = nqlj + nlj;
329
330 /* Effective cut-off for cluster pair list of 4x4 atoms */
331 r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE4, mtop->natoms/det(box));
332
333 if (debug)
334 {
335 fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
336 nqlj, nq, nlj, ir->rlist, r_eff);
337 }
338
339 /* Determine the cost per pair interaction */
340 c_qlj = (bQRF ? C_VT_QRF_LJ0.40 : C_VT_QEXP_LJ0.55);
341 c_q = (bQRF ? C_VT_QRF0.30 : C_VT_QEXP0.50);
342 c_lj = C_VT_LJ0.30;
343 if (ir->vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME))
344 {
345 c_qlj += C_VT_LJEXP_ADD0.20;
346 c_lj += C_VT_LJEXP_ADD0.20;
347 }
348 if (EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME) && ir->ljpme_combination_rule == eljpmeLB)
349 {
350 /* We don't have LJ-PME LB comb. rule kernels, we use slow kernels */
351 c_qlj *= nbnxn_refkernel_fac;
352 c_q *= nbnxn_refkernel_fac;
353 c_lj *= nbnxn_refkernel_fac;
354 }
355
356 /* For the PP non-bonded cost it is (unrealistically) assumed
357 * that all atoms are distributed homogeneously in space.
358 */
359 /* Convert mtop->natoms to double to avoid int overflow */
360 nat = mtop->natoms;
361 *cost_pp = 0.5*nat*(nqlj*c_qlj + nq*c_q + nlj*c_lj)
362 *4/3*M_PI3.14159265358979323846*r_eff*r_eff*r_eff/det(box);
363}
364
365float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box)
366{
367 t_atom *atom;
368 int mb, nmol, atnr, cg, a, a0, nq_tot, nlj_tot, f;
369 gmx_bool bBHAM, bLJcut, bChargePerturbed, bTypePerturbed;
370 gmx_bool bWater, bQ, bLJ;
371 double cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;
372 float ratio;
373 t_iparams *iparams;
374 gmx_moltype_t *molt;
375
376 /* Computational cost of bonded, non-bonded and PME calculations.
377 * This will be machine dependent.
378 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
379 * in single precision. In double precision PME mesh is slightly cheaper,
380 * although not so much that the numbers need to be adjusted.
381 */
382
383 iparams = mtop->ffparams.iparams;
384 atnr = mtop->ffparams.atnr;
385
386 cost_bond = C_BOND5.0*n_bonded_dx(mtop, TRUE1);
387
388 if (ir->cutoff_scheme == ecutsGROUP)
389 {
390 pp_group_load(mtop, ir, box,
391 &nq_tot, &nlj_tot, &cost_pp,
392 &bChargePerturbed, &bTypePerturbed);
393 }
394 else
395 {
396 pp_verlet_load(mtop, ir, box,
397 &nq_tot, &nlj_tot, &cost_pp,
398 &bChargePerturbed, &bTypePerturbed);
399 }
400
401 cost_redist = 0;
402 cost_spread = 0;
403 cost_fft = 0;
404 cost_solve = 0;
405
406 if (EEL_PME(ir->coulombtype)((ir->coulombtype) == eelPME || (ir->coulombtype) == eelPMESWITCH
|| (ir->coulombtype) == eelPMEUSER || (ir->coulombtype
) == eelPMEUSERSWITCH || (ir->coulombtype) == eelP3M_AD)
)
407 {
408 f = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1);
409 cost_redist += C_PME_REDIST12.0*nq_tot;
410 cost_spread += f*C_PME_SPREAD0.30*nq_tot*pow(ir->pme_order, 3);
411 cost_fft += f*C_PME_FFT0.20*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
412 cost_solve += f*C_PME_SOLVE0.50*ir->nkx*ir->nky*ir->nkz;
413 }
414
415 if (EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME))
416 {
417 f = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1);
418 if (ir->ljpme_combination_rule == eljpmeLB)
419 {
420 /* LB combination rule: we have 7 mesh terms */
421 f *= 7;
422 }
423 cost_redist += C_PME_REDIST12.0*nlj_tot;
424 cost_spread += f*C_PME_SPREAD0.30*nlj_tot*pow(ir->pme_order, 3);
425 cost_fft += f*C_PME_FFT0.20*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
426 cost_solve += f*C_PME_SOLVE0.50*ir->nkx*ir->nky*ir->nkz;
427 }
428
429 cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
430
431 ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
432
433 if (debug)
434 {
435 fprintf(debug,
436 "cost_bond %f\n"
437 "cost_pp %f\n"
438 "cost_redist %f\n"
439 "cost_spread %f\n"
440 "cost_fft %f\n"
441 "cost_solve %f\n",
442 cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);
443
444 fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);
445 }
446
447 return ratio;
448}