2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2008, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "mtop_util.h"
48 #include "types/commrec.h"
49 #include "nbnxn_search.h"
50 #include "nbnxn_consts.h"
53 /* Computational cost of bonded, non-bonded and PME calculations.
54 * This will be machine dependent.
55 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
56 * in single precision. In double precision PME mesh is slightly cheaper,
57 * although not so much that the numbers need to be adjusted.
60 /* Cost of a pair interaction in the "group" cut-off scheme" */
62 #define C_GR_QLJ_CUT 1.5
63 #define C_GR_QLJ_TAB 2.0
64 #define C_GR_LJ_CUT 1.0
65 #define C_GR_LJ_TAB 1.75
66 /* Cost of 1 water with one Q/LJ atom */
67 #define C_GR_QLJW_CUT 2.0
68 #define C_GR_QLJW_TAB 2.25
69 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
72 /* Cost of a pair interaction in the "Verlet" cut-off scheme" */
74 #define C_VT_QLJ_RF 0.40
75 #define C_VT_Q_RF 0.30
76 #define C_VT_QLJ_TAB 0.55
77 #define C_VT_Q_TAB 0.50
79 /* Cost of PME, with all components running with SSE instructions */
80 /* Cost of particle reordering and redistribution */
81 #define C_PME_REDIST 12.0
82 /* Cost of q spreading and force interpolation per charge (mainly memory) */
83 #define C_PME_SPREAD 0.30
84 /* Cost of fft's, will be multiplied with N log(N) */
85 #define C_PME_FFT 0.20
86 /* Cost of pme_solve, will be multiplied with N */
87 #define C_PME_SOLVE 0.50
89 /* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
92 int n_bonded_dx(gmx_mtop_t *mtop,gmx_bool bExcl)
94 int mb,nmol,ftype,ndxb,ndx_excl;
98 /* Count the number of pbc_rvec_sub calls required for bonded interactions.
99 * This number is also roughly proportional to the computational cost.
103 for(mb=0; mb<mtop->nmolblock; mb++) {
104 molt = &mtop->moltype[mtop->molblock[mb].type];
105 nmol = mtop->molblock[mb].nmol;
106 for(ftype=0; ftype<F_NRE; ftype++) {
107 if (interaction_function[ftype].flags & IF_BOND) {
109 case F_POSRES: ndxb = 1; break;
110 case F_CONNBONDS: ndxb = 0; break;
111 default: ndxb = NRAL(ftype) - 1; break;
113 ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype));
117 ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
124 fprintf(debug,"ndx bonded %d exclusions %d\n",ndx,ndx_excl);
131 static void pp_group_load(gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
134 gmx_bool *bChargePerturbed)
137 int mb,nmol,atnr,cg,a,a0,ncqlj,ncq,nclj;
138 gmx_bool bBHAM,bLJcut,bWater,bQ,bLJ;
140 float fq,fqlj,flj,fljtab,fqljw,fqw;
144 bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
146 bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
148 /* Computational cost of bonded, non-bonded and PME calculations.
149 * This will be machine dependent.
150 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
151 * in single precision. In double precision PME mesh is slightly cheaper,
152 * although not so much that the numbers need to be adjusted.
155 fqlj = (bLJcut ? C_GR_QLJ_CUT : C_GR_QLJ_TAB);
156 flj = (bLJcut ? C_GR_LJ_CUT : C_GR_LJ_TAB);
157 /* Cost of 1 water with one Q/LJ atom */
158 fqljw = (bLJcut ? C_GR_QLJW_CUT : C_GR_QLJW_TAB);
159 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
162 iparams = mtop->ffparams.iparams;
163 atnr = mtop->ffparams.atnr;
168 *bChargePerturbed = FALSE;
169 for(mb=0; mb<mtop->nmolblock; mb++)
171 molt = &mtop->moltype[mtop->molblock[mb].type];
172 atom = molt->atoms.atom;
173 nmol = mtop->molblock[mb].nmol;
175 for(cg=0; cg<molt->cgs.nr; cg++)
182 while (a < molt->cgs.index[cg+1])
184 bQ = (atom[a].q != 0 || atom[a].qB != 0);
185 bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 ||
186 iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
187 if (atom[a].q != atom[a].qB)
189 *bChargePerturbed = TRUE;
191 /* This if this atom fits into water optimization */
192 if (!((a == a0 && bQ && bLJ) ||
193 (a == a0+1 && bQ && !bLJ) ||
194 (a == a0+2 && bQ && !bLJ && atom[a].q == atom[a-1].q) ||
195 (a == a0+3 && !bQ && bLJ)))
227 *nq_tot = nq + nqlj + nw*3;
231 fprintf(debug,"nw %d nqlj %d nq %d nlj %d\n",nw,nqlj,nq,nlj);
234 /* For the PP non-bonded cost it is (unrealistically) assumed
235 * that all atoms are distributed homogeneously in space.
236 * Factor 3 is used because a water molecule has 3 atoms
237 * (and TIP4P effectively has 3 interactions with (water) atoms)).
239 *cost_pp = 0.5*(fqljw*nw*nqlj +
240 fqw *nw*(3*nw + nq) +
242 fq *nq*(3*nw + nqlj + nq) +
243 flj *nlj*(nw + nqlj + nlj))
244 *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box);
247 static void pp_verlet_load(gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
250 gmx_bool *bChargePerturbed)
253 int mb,nmol,atnr,cg,a,a0,nqlj,nq,nlj;
260 bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
262 iparams = mtop->ffparams.iparams;
263 atnr = mtop->ffparams.atnr;
266 *bChargePerturbed = FALSE;
267 for(mb=0; mb<mtop->nmolblock; mb++)
269 molt = &mtop->moltype[mtop->molblock[mb].type];
270 atom = molt->atoms.atom;
271 nmol = mtop->molblock[mb].nmol;
273 for(a=0; a<molt->atoms.nr; a++)
275 if (atom[a].q != 0 || atom[a].qB != 0)
277 if (iparams[(atnr+1)*atom[a].type].lj.c6 != 0 ||
278 iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
287 if (atom[a].q != atom[a].qB)
289 *bChargePerturbed = TRUE;
294 nlj = mtop->natoms - nqlj - nq;
298 /* Effective cut-off for cluster pair list of 4x4 atoms */
299 r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE,mtop->natoms/det(box));
303 fprintf(debug,"nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
304 nqlj,nq,nlj,ir->rlist,r_eff);
307 /* For the PP non-bonded cost it is (unrealistically) assumed
308 * that all atoms are distributed homogeneously in space.
310 /* Convert mtop->natoms to double to avoid int overflow */
312 *cost_pp = 0.5*(nqlj*nat*(bQRF ? C_VT_QLJ_RF : C_VT_QLJ_TAB) +
313 nq*nat*(bQRF ? C_VT_Q_RF : C_VT_Q_TAB) +
315 *4/3*M_PI*r_eff*r_eff*r_eff/det(box);
318 float pme_load_estimate(gmx_mtop_t *mtop,t_inputrec *ir,matrix box)
321 int mb,nmol,atnr,cg,a,a0,nq_tot;
322 gmx_bool bBHAM,bLJcut,bChargePerturbed,bWater,bQ,bLJ;
323 double cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve,cost_pme;
328 /* Computational cost of bonded, non-bonded and PME calculations.
329 * This will be machine dependent.
330 * The numbers here are accurate for Intel Core2 and AMD Athlon 64
331 * in single precision. In double precision PME mesh is slightly cheaper,
332 * although not so much that the numbers need to be adjusted.
335 iparams = mtop->ffparams.iparams;
336 atnr = mtop->ffparams.atnr;
338 cost_bond = C_BOND*n_bonded_dx(mtop,TRUE);
340 if (ir->cutoff_scheme == ecutsGROUP)
342 pp_group_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed);
346 pp_verlet_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed);
349 cost_redist = C_PME_REDIST*nq_tot;
350 cost_spread = C_PME_SPREAD*nq_tot*pow(ir->pme_order,3);
351 cost_fft = C_PME_FFT*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
352 cost_solve = C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
354 if (ir->efep != efepNO && bChargePerturbed) {
355 /* All PME work, except redist & spline coefficient calculation, doubles */
361 cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
363 ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
373 cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve);
375 fprintf(debug,"Estimate for relative PME load: %.3f\n",ratio);