src/mdlib/perf_est.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2008, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <math.h>
  43
  44 #include "perf_est.h"
  45 #include "physics.h"
  46 #include "vec.h"
  47 #include "mtop_util.h"
  48 #include "types/commrec.h"
  49 #include "nbnxn_search.h"
  50 #include "nbnxn_consts.h"
  51
  52
  53 /* Computational cost of bonded, non-bonded and PME calculations.
  54  * This will be machine dependent.
  55  * The numbers here are accurate for Intel Core2 and AMD Athlon 64
  56  * in single precision. In double precision PME mesh is slightly cheaper,
  57  * although not so much that the numbers need to be adjusted.
  58  */
  59
  60 /* Cost of a pair interaction in the "group" cut-off scheme" */
  61 #define C_GR_FQ       1.5
  62 #define C_GR_QLJ_CUT  1.5
  63 #define C_GR_QLJ_TAB  2.0
  64 #define C_GR_LJ_CUT   1.0
  65 #define C_GR_LJ_TAB   1.75
  66 /* Cost of 1 water with one Q/LJ atom */
  67 #define C_GR_QLJW_CUT 2.0
  68 #define C_GR_QLJW_TAB 2.25
  69 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
  70 #define C_GR_QW       1.75
  71
  72 /* Cost of a pair interaction in the "Verlet" cut-off scheme" */
  73 #define C_VT_LJ       0.30
  74 #define C_VT_QLJ_RF   0.40
  75 #define C_VT_Q_RF     0.30
  76 #define C_VT_QLJ_TAB  0.55
  77 #define C_VT_Q_TAB    0.50
  78
  79 /* Cost of PME, with all components running with SSE instructions */
  80 /* Cost of particle reordering and redistribution */
  81 #define C_PME_REDIST  12.0
  82 /* Cost of q spreading and force interpolation per charge (mainly memory) */
  83 #define C_PME_SPREAD  0.30
  84 /* Cost of fft's, will be multiplied with N log(N) */
  85 #define C_PME_FFT     0.20
  86 /* Cost of pme_solve, will be multiplied with N */
  87 #define C_PME_SOLVE   0.50
  88
  89 /* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
  90 #define C_BOND        5.0
  91
  92 int n_bonded_dx(gmx_mtop_t *mtop,gmx_bool bExcl)
  93 {
  94   int mb,nmol,ftype,ndxb,ndx_excl;
  95   int ndx;
  96   gmx_moltype_t *molt;
  97
  98   /* Count the number of pbc_rvec_sub calls required for bonded interactions.
  99    * This number is also roughly proportional to the computational cost.
 100    */
 101   ndx = 0;
 102   ndx_excl = 0;
 103   for(mb=0; mb<mtop->nmolblock; mb++) {
 104     molt = &mtop->moltype[mtop->molblock[mb].type];
 105     nmol = mtop->molblock[mb].nmol;
 106     for(ftype=0; ftype<F_NRE; ftype++) {
 107       if (interaction_function[ftype].flags & IF_BOND) {
 108         switch (ftype) {
 109                 case F_POSRES:    ndxb = 1; break;
 110                 case F_CONNBONDS: ndxb = 0; break;
 111                 default:     ndxb = NRAL(ftype) - 1; break;
 112         }
 113         ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype));
 114       }
 115     }
 116     if (bExcl) {
 117       ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
 118     } else {
 119       ndx_excl = 0;
 120     }
 121   }
 122
 123   if (debug)
 124     fprintf(debug,"ndx bonded %d exclusions %d\n",ndx,ndx_excl);
 125
 126   ndx += ndx_excl;
 127
 128   return ndx;
 129 }
 130
 131 static void pp_group_load(gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
 132                           int *nq_tot,
 133                           double *cost_pp,
 134                           gmx_bool *bChargePerturbed)
 135 {
 136     t_atom *atom;
 137     int  mb,nmol,atnr,cg,a,a0,ncqlj,ncq,nclj;
 138     gmx_bool bBHAM,bLJcut,bWater,bQ,bLJ;
 139     int nw,nqlj,nq,nlj;
 140     float fq,fqlj,flj,fljtab,fqljw,fqw;
 141     t_iparams *iparams;
 142     gmx_moltype_t *molt;
 143
 144     bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 145
 146     bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
 147
 148     /* Computational cost of bonded, non-bonded and PME calculations.
 149      * This will be machine dependent.
 150      * The numbers here are accurate for Intel Core2 and AMD Athlon 64
 151      * in single precision. In double precision PME mesh is slightly cheaper,
 152      * although not so much that the numbers need to be adjusted.
 153      */
 154     fq    = C_GR_FQ;
 155     fqlj  = (bLJcut ? C_GR_QLJ_CUT : C_GR_QLJ_TAB);
 156     flj   = (bLJcut ? C_GR_LJ_CUT  : C_GR_LJ_TAB);
 157     /* Cost of 1 water with one Q/LJ atom */
 158     fqljw = (bLJcut ? C_GR_QLJW_CUT : C_GR_QLJW_TAB);
 159     /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
 160     fqw   = C_GR_QW;
 161
 162     iparams = mtop->ffparams.iparams;
 163     atnr = mtop->ffparams.atnr;
 164     nw   = 0;
 165     nqlj = 0;
 166     nq   = 0;
 167     nlj  = 0;
 168     *bChargePerturbed = FALSE;
 169     for(mb=0; mb<mtop->nmolblock; mb++)
 170         {
 171         molt = &mtop->moltype[mtop->molblock[mb].type];
 172         atom = molt->atoms.atom;
 173         nmol = mtop->molblock[mb].nmol;
 174         a = 0;
 175         for(cg=0; cg<molt->cgs.nr; cg++)
 176         {
 177             bWater = !bBHAM;
 178             ncqlj = 0;
 179             ncq   = 0;
 180             nclj  = 0;
 181             a0    = a;
 182             while (a < molt->cgs.index[cg+1])
 183             {
 184                 bQ  = (atom[a].q != 0 || atom[a].qB != 0);
 185                 bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
 186                        iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
 187                 if (atom[a].q != atom[a].qB)
 188                 {
 189                     *bChargePerturbed = TRUE;
 190                 }
 191                 /* This if this atom fits into water optimization */
 192                 if (!((a == a0   &&  bQ &&  bLJ) ||
 193                       (a == a0+1 &&  bQ && !bLJ) ||
 194                       (a == a0+2 &&  bQ && !bLJ && atom[a].q == atom[a-1].q) ||
 195                       (a == a0+3 && !bQ &&  bLJ)))
 196                     bWater = FALSE;
 197                 if (bQ && bLJ)
 198                 {
 199                     ncqlj++;
 200                 }
 201                 else
 202                 {
 203                     if (bQ)
 204                     {
 205                         ncq++;
 206                     }
 207                     if (bLJ)
 208                     {
 209                         nclj++;
 210                     }
 211                 }
 212                 a++;
 213             }
 214             if (bWater)
 215             {
 216                 nw   += nmol;
 217             }
 218             else
 219             {
 220                 nqlj += nmol*ncqlj;
 221                 nq   += nmol*ncq;
 222                 nlj  += nmol*nclj;
 223             }
 224         }
 225     }
 226
 227     *nq_tot = nq + nqlj + nw*3;
 228
 229     if (debug)
 230     {
 231       fprintf(debug,"nw %d nqlj %d nq %d nlj %d\n",nw,nqlj,nq,nlj);
 232     }
 233
 234     /* For the PP non-bonded cost it is (unrealistically) assumed
 235      * that all atoms are distributed homogeneously in space.
 236      * Factor 3 is used because a water molecule has 3 atoms
 237      * (and TIP4P effectively has 3 interactions with (water) atoms)).
 238      */
 239     *cost_pp = 0.5*(fqljw*nw*nqlj +
 240                     fqw  *nw*(3*nw + nq) +
 241                     fqlj *nqlj*nqlj +
 242                     fq   *nq*(3*nw + nqlj + nq) +
 243                     flj  *nlj*(nw + nqlj + nlj))
 244         *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box);
 245 }
 246
 247 static void pp_verlet_load(gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
 248                            int *nq_tot,
 249                            double *cost_pp,
 250                            gmx_bool *bChargePerturbed)
 251 {
 252     t_atom *atom;
 253     int  mb,nmol,atnr,cg,a,a0,nqlj,nq,nlj;
 254     gmx_bool bQRF;
 255     t_iparams *iparams;
 256     gmx_moltype_t *molt;
 257     float r_eff;
 258     double nat;
 259
 260     bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
 261
 262     iparams = mtop->ffparams.iparams;
 263     atnr = mtop->ffparams.atnr;
 264     nqlj = 0;
 265     nq   = 0;
 266     *bChargePerturbed = FALSE;
 267     for(mb=0; mb<mtop->nmolblock; mb++)
 268         {
 269         molt = &mtop->moltype[mtop->molblock[mb].type];
 270         atom = molt->atoms.atom;
 271         nmol = mtop->molblock[mb].nmol;
 272         a = 0;
 273         for(a=0; a<molt->atoms.nr; a++)
 274         {
 275             if (atom[a].q != 0 || atom[a].qB != 0)
 276             {
 277                 if (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
 278                     iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
 279                 {
 280                     nqlj += nmol;
 281                 }
 282                 else
 283                 {
 284                     nq += nmol;
 285                 }
 286             }
 287             if (atom[a].q != atom[a].qB)
 288             {
 289                 *bChargePerturbed = TRUE;
 290             }
 291         }
 292     }
 293
 294     nlj = mtop->natoms - nqlj - nq;
 295
 296     *nq_tot = nqlj + nq;
 297
 298     /* Effective cut-off for cluster pair list of 4x4 atoms */
 299     r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE,mtop->natoms/det(box));
 300
 301     if (debug)
 302     {
 303         fprintf(debug,"nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
 304                 nqlj,nq,nlj,ir->rlist,r_eff);
 305     }
 306
 307     /* For the PP non-bonded cost it is (unrealistically) assumed
 308      * that all atoms are distributed homogeneously in space.
 309      */
 310     /* Convert mtop->natoms to double to avoid int overflow */
 311     nat = mtop->natoms;
 312     *cost_pp = 0.5*(nqlj*nat*(bQRF ? C_VT_QLJ_RF : C_VT_QLJ_TAB) +
 313                     nq*nat*(bQRF ? C_VT_Q_RF : C_VT_Q_TAB) +
 314                     nlj*nat*C_VT_LJ)
 315         *4/3*M_PI*r_eff*r_eff*r_eff/det(box);
 316 }
 317
 318 float pme_load_estimate(gmx_mtop_t *mtop,t_inputrec *ir,matrix box)
 319 {
 320   t_atom *atom;
 321   int  mb,nmol,atnr,cg,a,a0,nq_tot;
 322   gmx_bool bBHAM,bLJcut,bChargePerturbed,bWater,bQ,bLJ;
 323   double cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve,cost_pme;
 324   float ratio;
 325   t_iparams *iparams;
 326   gmx_moltype_t *molt;
 327
 328   /* Computational cost of bonded, non-bonded and PME calculations.
 329    * This will be machine dependent.
 330    * The numbers here are accurate for Intel Core2 and AMD Athlon 64
 331    * in single precision. In double precision PME mesh is slightly cheaper,
 332    * although not so much that the numbers need to be adjusted.
 333    */
 334
 335   iparams = mtop->ffparams.iparams;
 336   atnr = mtop->ffparams.atnr;
 337
 338   cost_bond = C_BOND*n_bonded_dx(mtop,TRUE);
 339
 340   if (ir->cutoff_scheme == ecutsGROUP)
 341   {
 342       pp_group_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed);
 343   }
 344   else
 345   {
 346       pp_verlet_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed);
 347   }
 348
 349   cost_redist = C_PME_REDIST*nq_tot;
 350   cost_spread = C_PME_SPREAD*nq_tot*pow(ir->pme_order,3);
 351   cost_fft    = C_PME_FFT*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
 352   cost_solve  = C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
 353
 354   if (ir->efep != efepNO && bChargePerturbed) {
 355     /* All PME work, except redist & spline coefficient calculation, doubles */
 356     cost_spread *= 2;
 357     cost_fft    *= 2;
 358     cost_solve  *= 2;
 359   }
 360
 361   cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
 362
 363   ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
 364
 365   if (debug) {
 366     fprintf(debug,
 367             "cost_bond   %f\n"
 368             "cost_pp     %f\n"
 369             "cost_redist %f\n"
 370             "cost_spread %f\n"
 371             "cost_fft    %f\n"
 372             "cost_solve  %f\n",
 373             cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve);
 374
 375     fprintf(debug,"Estimate for relative PME load: %.3f\n",ratio);
 376   }
 377
 378   return ratio;
 379 }