1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
44 #include "mtop_util.h"
45 #include "types/commrec.h"
46 #include "nbnxn_search.h"
47 #include "nbnxn_consts.h"
50 /* Computational cost of bonded, non-bonded and PME calculations.
51 * This will be machine dependent.
52 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
53 * in single precision. In double precision PME mesh is slightly cheaper,
54 * although not so much that the numbers need to be adjusted.
57 /* Cost of a pair interaction in the "group" cut-off scheme" */
59 #define C_GR_QLJ_CUT 1.5
60 #define C_GR_QLJ_TAB 2.0
61 #define C_GR_LJ_CUT 1.0
62 #define C_GR_LJ_TAB 1.75
63 /* Cost of 1 water with one Q/LJ atom */
64 #define C_GR_QLJW_CUT 2.0
65 #define C_GR_QLJW_TAB 2.25
66 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
69 /* Cost of a pair interaction in the "Verlet" cut-off scheme" */
71 #define C_VT_QLJ_RF 0.40
72 #define C_VT_Q_RF 0.30
73 #define C_VT_QLJ_TAB 0.55
74 #define C_VT_Q_TAB 0.50
76 /* Cost of PME, with all components running with SSE instructions */
77 /* Cost of particle reordering and redistribution */
78 #define C_PME_REDIST 12.0
79 /* Cost of q spreading and force interpolation per charge (mainly memory) */
80 #define C_PME_SPREAD 0.30
81 /* Cost of fft's, will be multiplied with N log(N) */
82 #define C_PME_FFT 0.20
83 /* Cost of pme_solve, will be multiplied with N */
84 #define C_PME_SOLVE 0.50
86 /* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
89 int n_bonded_dx(gmx_mtop_t *mtop, gmx_bool bExcl)
91 int mb, nmol, ftype, ndxb, ndx_excl;
95 /* Count the number of pbc_rvec_sub calls required for bonded interactions.
96 * This number is also roughly proportional to the computational cost.
100 for (mb = 0; mb < mtop->nmolblock; mb++)
102 molt = &mtop->moltype[mtop->molblock[mb].type];
103 nmol = mtop->molblock[mb].nmol;
104 for (ftype = 0; ftype < F_NRE; ftype++)
106 if (interaction_function[ftype].flags & IF_BOND)
111 case F_FBPOSRES: ndxb = 1; break;
112 case F_CONNBONDS: ndxb = 0; break;
113 default: ndxb = NRAL(ftype) - 1; break;
115 ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype));
120 ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
130 fprintf(debug, "ndx bonded %d exclusions %d\n", ndx, ndx_excl);
138 static void pp_group_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
141 gmx_bool *bChargePerturbed)
144 int mb, nmol, atnr, cg, a, a0, ncqlj, ncq, nclj;
145 gmx_bool bBHAM, bLJcut, bWater, bQ, bLJ;
146 int nw, nqlj, nq, nlj;
147 float fq, fqlj, flj, fljtab, fqljw, fqw;
151 bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
153 bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
155 /* Computational cost of bonded, non-bonded and PME calculations.
156 * This will be machine dependent.
157 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
158 * in single precision. In double precision PME mesh is slightly cheaper,
159 * although not so much that the numbers need to be adjusted.
162 fqlj = (bLJcut ? C_GR_QLJ_CUT : C_GR_QLJ_TAB);
163 flj = (bLJcut ? C_GR_LJ_CUT : C_GR_LJ_TAB);
164 /* Cost of 1 water with one Q/LJ atom */
165 fqljw = (bLJcut ? C_GR_QLJW_CUT : C_GR_QLJW_TAB);
166 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
169 iparams = mtop->ffparams.iparams;
170 atnr = mtop->ffparams.atnr;
175 *bChargePerturbed = FALSE;
176 for (mb = 0; mb < mtop->nmolblock; mb++)
178 molt = &mtop->moltype[mtop->molblock[mb].type];
179 atom = molt->atoms.atom;
180 nmol = mtop->molblock[mb].nmol;
182 for (cg = 0; cg < molt->cgs.nr; cg++)
189 while (a < molt->cgs.index[cg+1])
191 bQ = (atom[a].q != 0 || atom[a].qB != 0);
192 bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 ||
193 iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
194 if (atom[a].q != atom[a].qB)
196 *bChargePerturbed = TRUE;
198 /* This if this atom fits into water optimization */
199 if (!((a == a0 && bQ && bLJ) ||
200 (a == a0+1 && bQ && !bLJ) ||
201 (a == a0+2 && bQ && !bLJ && atom[a].q == atom[a-1].q) ||
202 (a == a0+3 && !bQ && bLJ)))
236 *nq_tot = nq + nqlj + nw*3;
240 fprintf(debug, "nw %d nqlj %d nq %d nlj %d\n", nw, nqlj, nq, nlj);
243 /* For the PP non-bonded cost it is (unrealistically) assumed
244 * that all atoms are distributed homogeneously in space.
245 * Factor 3 is used because a water molecule has 3 atoms
246 * (and TIP4P effectively has 3 interactions with (water) atoms)).
248 *cost_pp = 0.5*(fqljw*nw*nqlj +
249 fqw *nw*(3*nw + nq) +
251 fq *nq*(3*nw + nqlj + nq) +
252 flj *nlj*(nw + nqlj + nlj))
253 *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box);
256 static void pp_verlet_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
259 gmx_bool *bChargePerturbed)
262 int mb, nmol, atnr, cg, a, a0, nqlj, nq, nlj;
269 bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
271 iparams = mtop->ffparams.iparams;
272 atnr = mtop->ffparams.atnr;
275 *bChargePerturbed = FALSE;
276 for (mb = 0; mb < mtop->nmolblock; mb++)
278 molt = &mtop->moltype[mtop->molblock[mb].type];
279 atom = molt->atoms.atom;
280 nmol = mtop->molblock[mb].nmol;
282 for (a = 0; a < molt->atoms.nr; a++)
284 if (atom[a].q != 0 || atom[a].qB != 0)
286 if (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 ||
287 iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
296 if (atom[a].q != atom[a].qB)
298 *bChargePerturbed = TRUE;
303 nlj = mtop->natoms - nqlj - nq;
307 /* Effective cut-off for cluster pair list of 4x4 atoms */
308 r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE, mtop->natoms/det(box));
312 fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
313 nqlj, nq, nlj, ir->rlist, r_eff);
316 /* For the PP non-bonded cost it is (unrealistically) assumed
317 * that all atoms are distributed homogeneously in space.
319 /* Convert mtop->natoms to double to avoid int overflow */
321 *cost_pp = 0.5*(nqlj*nat*(bQRF ? C_VT_QLJ_RF : C_VT_QLJ_TAB) +
322 nq*nat*(bQRF ? C_VT_Q_RF : C_VT_Q_TAB) +
324 *4/3*M_PI*r_eff*r_eff*r_eff/det(box);
327 float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box)
330 int mb, nmol, atnr, cg, a, a0, nq_tot;
331 gmx_bool bBHAM, bLJcut, bChargePerturbed, bWater, bQ, bLJ;
332 double cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;
337 /* Computational cost of bonded, non-bonded and PME calculations.
338 * This will be machine dependent.
339 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
340 * in single precision. In double precision PME mesh is slightly cheaper,
341 * although not so much that the numbers need to be adjusted.
344 iparams = mtop->ffparams.iparams;
345 atnr = mtop->ffparams.atnr;
347 cost_bond = C_BOND*n_bonded_dx(mtop, TRUE);
349 if (ir->cutoff_scheme == ecutsGROUP)
351 pp_group_load(mtop, ir, box, &nq_tot, &cost_pp, &bChargePerturbed);
355 pp_verlet_load(mtop, ir, box, &nq_tot, &cost_pp, &bChargePerturbed);
358 cost_redist = C_PME_REDIST*nq_tot;
359 cost_spread = C_PME_SPREAD*nq_tot*pow(ir->pme_order, 3);
360 cost_fft = C_PME_FFT*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
361 cost_solve = C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
363 if (ir->efep != efepNO && bChargePerturbed)
365 /* All PME work, except redist & spline coefficient calculation, doubles */
371 cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
373 ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
384 cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);
386 fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);