File: | gromacs/mdlib/perf_est.c |
Location: | line 299, column 9 |
Description: | Value stored to 'a' is never read |
1 | /* |
2 | * This file is part of the GROMACS molecular simulation package. |
3 | * |
4 | * Copyright (c) 1991-2000, University of Groningen, The Netherlands. |
5 | * Copyright (c) 2001-2008, The GROMACS development team. |
6 | * Copyright (c) 2012,2014, by the GROMACS development team, led by |
7 | * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, |
8 | * and including many others, as listed in the AUTHORS file in the |
9 | * top-level source directory and at http://www.gromacs.org. |
10 | * |
11 | * GROMACS is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU Lesser General Public License |
13 | * as published by the Free Software Foundation; either version 2.1 |
14 | * of the License, or (at your option) any later version. |
15 | * |
16 | * GROMACS is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | * Lesser General Public License for more details. |
20 | * |
21 | * You should have received a copy of the GNU Lesser General Public |
22 | * License along with GROMACS; if not, see |
23 | * http://www.gnu.org/licenses, or write to the Free Software Foundation, |
24 | * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
25 | * |
26 | * If you want to redistribute modifications to GROMACS, please |
27 | * consider that scientific software is very special. Version |
28 | * control is crucial - bugs must be traceable. We will be happy to |
29 | * consider code for inclusion in the official distribution, but |
30 | * derived work must not be called official GROMACS. Details are found |
31 | * in the README & COPYING files - if they are missing, get the |
32 | * official version at http://www.gromacs.org. |
33 | * |
34 | * To help us fund GROMACS development, we humbly ask that you cite |
35 | * the research papers on the package. Check out http://www.gromacs.org. |
36 | */ |
37 | #ifdef HAVE_CONFIG_H1 |
38 | #include <config.h> |
39 | #endif |
40 | |
41 | #include <math.h> |
42 | |
43 | #include "perf_est.h" |
44 | #include "physics.h" |
45 | #include "gromacs/math/vec.h" |
46 | #include "mtop_util.h" |
47 | #include "types/commrec.h" |
48 | #include "nbnxn_search.h" |
49 | #include "nbnxn_consts.h" |
50 | |
51 | |
52 | /* Computational cost of bonded, non-bonded and PME calculations. |
53 | * This will be machine dependent. |
54 | * The numbers here are accurate for Intel Core2 and AMD Athlon 64 |
55 | * in single precision. In double precision PME mesh is slightly cheaper, |
56 | * although not so much that the numbers need to be adjusted. |
57 | */ |
58 | |
59 | /* Cost of a pair interaction in the "group" cut-off scheme */ |
60 | #define C_GR_FQ1.5 1.5 |
61 | #define C_GR_QLJ_CUT1.5 1.5 |
62 | #define C_GR_QLJ_TAB2.0 2.0 |
63 | #define C_GR_LJ_CUT1.0 1.0 |
64 | #define C_GR_LJ_TAB1.75 1.75 |
65 | /* Cost of 1 water with one Q/LJ atom */ |
66 | #define C_GR_QLJW_CUT2.0 2.0 |
67 | #define C_GR_QLJW_TAB2.25 2.25 |
68 | /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */ |
69 | #define C_GR_QW1.75 1.75 |
70 | |
71 | /* Cost of a pair interaction in the "Verlet" cut-off scheme, QEXP is Ewald */ |
72 | #define C_VT_LJ0.30 0.30 |
73 | #define C_VT_QRF_LJ0.40 0.40 |
74 | #define C_VT_QRF0.30 0.30 |
75 | #define C_VT_QEXP_LJ0.55 0.55 |
76 | #define C_VT_QEXP0.50 0.50 |
77 | /* Extra cost for expensive LJ interaction, e.g. pot-switch or LJ-PME */ |
78 | #define C_VT_LJEXP_ADD0.20 0.20 |
79 | |
80 | /* Cost of PME, with all components running with SSE instructions */ |
81 | /* Cost of particle reordering and redistribution */ |
82 | #define C_PME_REDIST12.0 12.0 |
83 | /* Cost of q spreading and force interpolation per charge (mainly memory) */ |
84 | #define C_PME_SPREAD0.30 0.30 |
85 | /* Cost of fft's, will be multiplied with N log(N) */ |
86 | #define C_PME_FFT0.20 0.20 |
87 | /* Cost of pme_solve, will be multiplied with N */ |
88 | #define C_PME_SOLVE0.50 0.50 |
89 | |
90 | /* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */ |
91 | #define C_BOND5.0 5.0 |
92 | |
93 | int n_bonded_dx(gmx_mtop_t *mtop, gmx_bool bExcl) |
94 | { |
95 | int mb, nmol, ftype, ndxb, ndx_excl; |
96 | int ndx; |
97 | gmx_moltype_t *molt; |
98 | |
99 | /* Count the number of pbc_rvec_sub calls required for bonded interactions. |
100 | * This number is also roughly proportional to the computational cost. |
101 | */ |
102 | ndx = 0; |
103 | ndx_excl = 0; |
104 | for (mb = 0; mb < mtop->nmolblock; mb++) |
105 | { |
106 | molt = &mtop->moltype[mtop->molblock[mb].type]; |
107 | nmol = mtop->molblock[mb].nmol; |
108 | for (ftype = 0; ftype < F_NRE; ftype++) |
109 | { |
110 | if (interaction_function[ftype].flags & IF_BOND1) |
111 | { |
112 | switch (ftype) |
113 | { |
114 | case F_POSRES: |
115 | case F_FBPOSRES: ndxb = 1; break; |
116 | case F_CONNBONDS: ndxb = 0; break; |
117 | default: ndxb = NRAL(ftype)(interaction_function[(ftype)].nratoms) - 1; break; |
118 | } |
119 | ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype)(interaction_function[(ftype)].nratoms)); |
120 | } |
121 | } |
122 | if (bExcl) |
123 | { |
124 | ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2; |
125 | } |
126 | else |
127 | { |
128 | ndx_excl = 0; |
129 | } |
130 | } |
131 | |
132 | if (debug) |
133 | { |
134 | fprintf(debug, "ndx bonded %d exclusions %d\n", ndx, ndx_excl); |
135 | } |
136 | |
137 | ndx += ndx_excl; |
138 | |
139 | return ndx; |
140 | } |
141 | |
142 | static void pp_group_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box, |
143 | int *nq_tot, int *nlj_tot, |
144 | double *cost_pp, |
145 | gmx_bool *bChargePerturbed, gmx_bool *bTypePerturbed) |
146 | { |
147 | t_atom *atom; |
148 | int mb, nmol, atnr, cg, a, a0, ncqlj, ncq, nclj; |
149 | gmx_bool bBHAM, bLJcut, bWater, bQ, bLJ; |
150 | int nw, nqlj, nq, nlj; |
151 | float fq, fqlj, flj, fljtab, fqljw, fqw; |
152 | t_iparams *iparams; |
153 | gmx_moltype_t *molt; |
154 | |
155 | bBHAM = (mtop->ffparams.functype[0] == F_BHAM); |
156 | |
157 | bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM); |
158 | |
159 | /* Computational cost of bonded, non-bonded and PME calculations. |
160 | * This will be machine dependent. |
161 | * The numbers here are accurate for Intel Core2 and AMD Athlon 64 |
162 | * in single precision. In double precision PME mesh is slightly cheaper, |
163 | * although not so much that the numbers need to be adjusted. |
164 | */ |
165 | fq = C_GR_FQ1.5; |
166 | fqlj = (bLJcut ? C_GR_QLJ_CUT1.5 : C_GR_QLJ_TAB2.0); |
167 | flj = (bLJcut ? C_GR_LJ_CUT1.0 : C_GR_LJ_TAB1.75); |
168 | /* Cost of 1 water with one Q/LJ atom */ |
169 | fqljw = (bLJcut ? C_GR_QLJW_CUT2.0 : C_GR_QLJW_TAB2.25); |
170 | /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */ |
171 | fqw = C_GR_QW1.75; |
172 | |
173 | iparams = mtop->ffparams.iparams; |
174 | atnr = mtop->ffparams.atnr; |
175 | nw = 0; |
176 | nqlj = 0; |
177 | nq = 0; |
178 | nlj = 0; |
179 | *bChargePerturbed = FALSE0; |
180 | for (mb = 0; mb < mtop->nmolblock; mb++) |
181 | { |
182 | molt = &mtop->moltype[mtop->molblock[mb].type]; |
183 | atom = molt->atoms.atom; |
184 | nmol = mtop->molblock[mb].nmol; |
185 | a = 0; |
186 | for (cg = 0; cg < molt->cgs.nr; cg++) |
187 | { |
188 | bWater = !bBHAM; |
189 | ncqlj = 0; |
190 | ncq = 0; |
191 | nclj = 0; |
192 | a0 = a; |
193 | while (a < molt->cgs.index[cg+1]) |
194 | { |
195 | bQ = (atom[a].q != 0 || atom[a].qB != 0); |
196 | bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 || |
197 | iparams[(atnr+1)*atom[a].type].lj.c12 != 0); |
198 | if (atom[a].q != atom[a].qB) |
199 | { |
200 | *bChargePerturbed = TRUE1; |
201 | } |
202 | if (atom[a].type != atom[a].typeB) |
203 | { |
204 | *bTypePerturbed = TRUE1; |
205 | } |
206 | /* This if this atom fits into water optimization */ |
207 | if (!((a == a0 && bQ && bLJ) || |
208 | (a == a0+1 && bQ && !bLJ) || |
209 | (a == a0+2 && bQ && !bLJ && atom[a].q == atom[a-1].q) || |
210 | (a == a0+3 && !bQ && bLJ))) |
211 | { |
212 | bWater = FALSE0; |
213 | } |
214 | if (bQ && bLJ) |
215 | { |
216 | ncqlj++; |
217 | } |
218 | else |
219 | { |
220 | if (bQ) |
221 | { |
222 | ncq++; |
223 | } |
224 | if (bLJ) |
225 | { |
226 | nclj++; |
227 | } |
228 | } |
229 | a++; |
230 | } |
231 | if (bWater) |
232 | { |
233 | nw += nmol; |
234 | } |
235 | else |
236 | { |
237 | nqlj += nmol*ncqlj; |
238 | nq += nmol*ncq; |
239 | nlj += nmol*nclj; |
240 | } |
241 | } |
242 | } |
243 | |
244 | *nq_tot = nq + nqlj + nw*3; |
245 | *nlj_tot = nlj + nqlj + nw; |
246 | |
247 | if (debug) |
248 | { |
249 | fprintf(debug, "nw %d nqlj %d nq %d nlj %d\n", nw, nqlj, nq, nlj); |
250 | } |
251 | |
252 | /* For the PP non-bonded cost it is (unrealistically) assumed |
253 | * that all atoms are distributed homogeneously in space. |
254 | * Factor 3 is used because a water molecule has 3 atoms |
255 | * (and TIP4P effectively has 3 interactions with (water) atoms)). |
256 | */ |
257 | *cost_pp = 0.5*(fqljw*nw*nqlj + |
258 | fqw *nw*(3*nw + nq) + |
259 | fqlj *nqlj*nqlj + |
260 | fq *nq*(3*nw + nqlj + nq) + |
261 | flj *nlj*(nw + nqlj + nlj)) |
262 | *4/3*M_PI3.14159265358979323846*ir->rlist*ir->rlist*ir->rlist/det(box); |
263 | } |
264 | |
265 | static void pp_verlet_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box, |
266 | int *nq_tot, int *nlj_tot, |
267 | double *cost_pp, |
268 | gmx_bool *bChargePerturbed, gmx_bool *bTypePerturbed) |
269 | { |
270 | t_atom *atom; |
271 | int mb, nmol, atnr, cg, a, a0, nqlj, nq, nlj; |
272 | gmx_bool bQRF; |
273 | t_iparams *iparams; |
274 | gmx_moltype_t *molt; |
275 | real r_eff; |
276 | double c_qlj, c_q, c_lj; |
277 | double nat; |
278 | /* Conversion factor for reference vs SIMD kernel performance. |
279 | * The factor is about right for SSE2/4, but should be 2 higher for AVX256. |
280 | */ |
281 | #ifdef GMX_DOUBLE |
282 | const real nbnxn_refkernel_fac = 4.0; |
283 | #else |
284 | const real nbnxn_refkernel_fac = 8.0; |
285 | #endif |
286 | |
287 | bQRF = (EEL_RF(ir->coulombtype)((ir->coulombtype) == eelRF || (ir->coulombtype) == eelGRF || (ir->coulombtype) == eelRF_NEC || (ir->coulombtype) == eelRF_ZERO ) || ir->coulombtype == eelCUT); |
288 | |
289 | iparams = mtop->ffparams.iparams; |
290 | atnr = mtop->ffparams.atnr; |
291 | nqlj = 0; |
292 | nq = 0; |
293 | *bChargePerturbed = FALSE0; |
294 | for (mb = 0; mb < mtop->nmolblock; mb++) |
295 | { |
296 | molt = &mtop->moltype[mtop->molblock[mb].type]; |
297 | atom = molt->atoms.atom; |
298 | nmol = mtop->molblock[mb].nmol; |
299 | a = 0; |
Value stored to 'a' is never read | |
300 | for (a = 0; a < molt->atoms.nr; a++) |
301 | { |
302 | if (atom[a].q != 0 || atom[a].qB != 0) |
303 | { |
304 | if (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 || |
305 | iparams[(atnr+1)*atom[a].type].lj.c12 != 0) |
306 | { |
307 | nqlj += nmol; |
308 | } |
309 | else |
310 | { |
311 | nq += nmol; |
312 | } |
313 | } |
314 | if (atom[a].q != atom[a].qB) |
315 | { |
316 | *bChargePerturbed = TRUE1; |
317 | } |
318 | if (atom[a].type != atom[a].typeB) |
319 | { |
320 | *bTypePerturbed = TRUE1; |
321 | } |
322 | } |
323 | } |
324 | |
325 | nlj = mtop->natoms - nqlj - nq; |
326 | |
327 | *nq_tot = nqlj + nq; |
328 | *nlj_tot = nqlj + nlj; |
329 | |
330 | /* Effective cut-off for cluster pair list of 4x4 atoms */ |
331 | r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE4, mtop->natoms/det(box)); |
332 | |
333 | if (debug) |
334 | { |
335 | fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n", |
336 | nqlj, nq, nlj, ir->rlist, r_eff); |
337 | } |
338 | |
339 | /* Determine the cost per pair interaction */ |
340 | c_qlj = (bQRF ? C_VT_QRF_LJ0.40 : C_VT_QEXP_LJ0.55); |
341 | c_q = (bQRF ? C_VT_QRF0.30 : C_VT_QEXP0.50); |
342 | c_lj = C_VT_LJ0.30; |
343 | if (ir->vdw_modifier == eintmodPOTSWITCH || EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME)) |
344 | { |
345 | c_qlj += C_VT_LJEXP_ADD0.20; |
346 | c_lj += C_VT_LJEXP_ADD0.20; |
347 | } |
348 | if (EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME) && ir->ljpme_combination_rule == eljpmeLB) |
349 | { |
350 | /* We don't have LJ-PME LB comb. rule kernels, we use slow kernels */ |
351 | c_qlj *= nbnxn_refkernel_fac; |
352 | c_q *= nbnxn_refkernel_fac; |
353 | c_lj *= nbnxn_refkernel_fac; |
354 | } |
355 | |
356 | /* For the PP non-bonded cost it is (unrealistically) assumed |
357 | * that all atoms are distributed homogeneously in space. |
358 | */ |
359 | /* Convert mtop->natoms to double to avoid int overflow */ |
360 | nat = mtop->natoms; |
361 | *cost_pp = 0.5*nat*(nqlj*c_qlj + nq*c_q + nlj*c_lj) |
362 | *4/3*M_PI3.14159265358979323846*r_eff*r_eff*r_eff/det(box); |
363 | } |
364 | |
365 | float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box) |
366 | { |
367 | t_atom *atom; |
368 | int mb, nmol, atnr, cg, a, a0, nq_tot, nlj_tot, f; |
369 | gmx_bool bBHAM, bLJcut, bChargePerturbed, bTypePerturbed; |
370 | gmx_bool bWater, bQ, bLJ; |
371 | double cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme; |
372 | float ratio; |
373 | t_iparams *iparams; |
374 | gmx_moltype_t *molt; |
375 | |
376 | /* Computational cost of bonded, non-bonded and PME calculations. |
377 | * This will be machine dependent. |
378 | * The numbers here are accurate for Intel Core2 and AMD Athlon 64 |
379 | * in single precision. In double precision PME mesh is slightly cheaper, |
380 | * although not so much that the numbers need to be adjusted. |
381 | */ |
382 | |
383 | iparams = mtop->ffparams.iparams; |
384 | atnr = mtop->ffparams.atnr; |
385 | |
386 | cost_bond = C_BOND5.0*n_bonded_dx(mtop, TRUE1); |
387 | |
388 | if (ir->cutoff_scheme == ecutsGROUP) |
389 | { |
390 | pp_group_load(mtop, ir, box, |
391 | &nq_tot, &nlj_tot, &cost_pp, |
392 | &bChargePerturbed, &bTypePerturbed); |
393 | } |
394 | else |
395 | { |
396 | pp_verlet_load(mtop, ir, box, |
397 | &nq_tot, &nlj_tot, &cost_pp, |
398 | &bChargePerturbed, &bTypePerturbed); |
399 | } |
400 | |
401 | cost_redist = 0; |
402 | cost_spread = 0; |
403 | cost_fft = 0; |
404 | cost_solve = 0; |
405 | |
406 | if (EEL_PME(ir->coulombtype)((ir->coulombtype) == eelPME || (ir->coulombtype) == eelPMESWITCH || (ir->coulombtype) == eelPMEUSER || (ir->coulombtype ) == eelPMEUSERSWITCH || (ir->coulombtype) == eelP3M_AD)) |
407 | { |
408 | f = ((ir->efep != efepNO && bChargePerturbed) ? 2 : 1); |
409 | cost_redist += C_PME_REDIST12.0*nq_tot; |
410 | cost_spread += f*C_PME_SPREAD0.30*nq_tot*pow(ir->pme_order, 3); |
411 | cost_fft += f*C_PME_FFT0.20*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz); |
412 | cost_solve += f*C_PME_SOLVE0.50*ir->nkx*ir->nky*ir->nkz; |
413 | } |
414 | |
415 | if (EVDW_PME(ir->vdwtype)((ir->vdwtype) == evdwPME)) |
416 | { |
417 | f = ((ir->efep != efepNO && bTypePerturbed) ? 2 : 1); |
418 | if (ir->ljpme_combination_rule == eljpmeLB) |
419 | { |
420 | /* LB combination rule: we have 7 mesh terms */ |
421 | f *= 7; |
422 | } |
423 | cost_redist += C_PME_REDIST12.0*nlj_tot; |
424 | cost_spread += f*C_PME_SPREAD0.30*nlj_tot*pow(ir->pme_order, 3); |
425 | cost_fft += f*C_PME_FFT0.20*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz); |
426 | cost_solve += f*C_PME_SOLVE0.50*ir->nkx*ir->nky*ir->nkz; |
427 | } |
428 | |
429 | cost_pme = cost_redist + cost_spread + cost_fft + cost_solve; |
430 | |
431 | ratio = cost_pme/(cost_bond + cost_pp + cost_pme); |
432 | |
433 | if (debug) |
434 | { |
435 | fprintf(debug, |
436 | "cost_bond %f\n" |
437 | "cost_pp %f\n" |
438 | "cost_redist %f\n" |
439 | "cost_spread %f\n" |
440 | "cost_fft %f\n" |
441 | "cost_solve %f\n", |
442 | cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve); |
443 | |
444 | fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio); |
445 | } |
446 | |
447 | return ratio; |
448 | } |