src/gromacs/mdlib/perf_est.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2008, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <math.h>
  40
  41 #include "perf_est.h"
  42 #include "physics.h"
  43 #include "vec.h"
  44 #include "mtop_util.h"
  45 #include "types/commrec.h"
  46 #include "nbnxn_search.h"
  47 #include "nbnxn_consts.h"
  48
  49
  50 /* Computational cost of bonded, non-bonded and PME calculations.
  51  * This will be machine dependent.
  52  * The numbers here are accurate for Intel Core2 and AMD Athlon 64
  53  * in single precision. In double precision PME mesh is slightly cheaper,
  54  * although not so much that the numbers need to be adjusted.
  55  */
  56
  57 /* Cost of a pair interaction in the "group" cut-off scheme" */
  58 #define C_GR_FQ       1.5
  59 #define C_GR_QLJ_CUT  1.5
  60 #define C_GR_QLJ_TAB  2.0
  61 #define C_GR_LJ_CUT   1.0
  62 #define C_GR_LJ_TAB   1.75
  63 /* Cost of 1 water with one Q/LJ atom */
  64 #define C_GR_QLJW_CUT 2.0
  65 #define C_GR_QLJW_TAB 2.25
  66 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
  67 #define C_GR_QW       1.75
  68
  69 /* Cost of a pair interaction in the "Verlet" cut-off scheme" */
  70 #define C_VT_LJ       0.30
  71 #define C_VT_QLJ_RF   0.40
  72 #define C_VT_Q_RF     0.30
  73 #define C_VT_QLJ_TAB  0.55
  74 #define C_VT_Q_TAB    0.50
  75
  76 /* Cost of PME, with all components running with SSE instructions */
  77 /* Cost of particle reordering and redistribution */
  78 #define C_PME_REDIST  12.0
  79 /* Cost of q spreading and force interpolation per charge (mainly memory) */
  80 #define C_PME_SPREAD  0.30
  81 /* Cost of fft's, will be multiplied with N log(N) */
  82 #define C_PME_FFT     0.20
  83 /* Cost of pme_solve, will be multiplied with N */
  84 #define C_PME_SOLVE   0.50
  85
  86 /* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
  87 #define C_BOND        5.0
  88
  89 int n_bonded_dx(gmx_mtop_t *mtop,gmx_bool bExcl)
  90 {
  91   int mb,nmol,ftype,ndxb,ndx_excl;
  92   int ndx;
  93   gmx_moltype_t *molt;
  94
  95   /* Count the number of pbc_rvec_sub calls required for bonded interactions.
  96    * This number is also roughly proportional to the computational cost.
  97    */
  98   ndx = 0;
  99   ndx_excl = 0;
 100   for(mb=0; mb<mtop->nmolblock; mb++) {
 101     molt = &mtop->moltype[mtop->molblock[mb].type];
 102     nmol = mtop->molblock[mb].nmol;
 103     for(ftype=0; ftype<F_NRE; ftype++) {
 104       if (interaction_function[ftype].flags & IF_BOND) {
 105         switch (ftype) {
 106                 case F_POSRES:
 107                 case F_FBPOSRES:  ndxb = 1; break;
 108                 case F_CONNBONDS: ndxb = 0; break;
 109                 default:     ndxb = NRAL(ftype) - 1; break;
 110         }
 111         ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype));
 112       }
 113     }
 114     if (bExcl) {
 115       ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
 116     } else {
 117       ndx_excl = 0;
 118     }
 119   }
 120
 121   if (debug)
 122     fprintf(debug,"ndx bonded %d exclusions %d\n",ndx,ndx_excl);
 123
 124   ndx += ndx_excl;
 125
 126   return ndx;
 127 }
 128
 129 static void pp_group_load(gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
 130                           int *nq_tot,
 131                           double *cost_pp,
 132                           gmx_bool *bChargePerturbed)
 133 {
 134     t_atom *atom;
 135     int  mb,nmol,atnr,cg,a,a0,ncqlj,ncq,nclj;
 136     gmx_bool bBHAM,bLJcut,bWater,bQ,bLJ;
 137     int nw,nqlj,nq,nlj;
 138     float fq,fqlj,flj,fljtab,fqljw,fqw;
 139     t_iparams *iparams;
 140     gmx_moltype_t *molt;
 141
 142     bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 143
 144     bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
 145
 146     /* Computational cost of bonded, non-bonded and PME calculations.
 147      * This will be machine dependent.
 148      * The numbers here are accurate for Intel Core2 and AMD Athlon 64
 149      * in single precision. In double precision PME mesh is slightly cheaper,
 150      * although not so much that the numbers need to be adjusted.
 151      */
 152     fq    = C_GR_FQ;
 153     fqlj  = (bLJcut ? C_GR_QLJ_CUT : C_GR_QLJ_TAB);
 154     flj   = (bLJcut ? C_GR_LJ_CUT  : C_GR_LJ_TAB);
 155     /* Cost of 1 water with one Q/LJ atom */
 156     fqljw = (bLJcut ? C_GR_QLJW_CUT : C_GR_QLJW_TAB);
 157     /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
 158     fqw   = C_GR_QW;
 159
 160     iparams = mtop->ffparams.iparams;
 161     atnr = mtop->ffparams.atnr;
 162     nw   = 0;
 163     nqlj = 0;
 164     nq   = 0;
 165     nlj  = 0;
 166     *bChargePerturbed = FALSE;
 167     for(mb=0; mb<mtop->nmolblock; mb++)
 168         {
 169         molt = &mtop->moltype[mtop->molblock[mb].type];
 170         atom = molt->atoms.atom;
 171         nmol = mtop->molblock[mb].nmol;
 172         a = 0;
 173         for(cg=0; cg<molt->cgs.nr; cg++)
 174         {
 175             bWater = !bBHAM;
 176             ncqlj = 0;
 177             ncq   = 0;
 178             nclj  = 0;
 179             a0    = a;
 180             while (a < molt->cgs.index[cg+1])
 181             {
 182                 bQ  = (atom[a].q != 0 || atom[a].qB != 0);
 183                 bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
 184                        iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
 185                 if (atom[a].q != atom[a].qB)
 186                 {
 187                     *bChargePerturbed = TRUE;
 188                 }
 189                 /* This if this atom fits into water optimization */
 190                 if (!((a == a0   &&  bQ &&  bLJ) ||
 191                       (a == a0+1 &&  bQ && !bLJ) ||
 192                       (a == a0+2 &&  bQ && !bLJ && atom[a].q == atom[a-1].q) ||
 193                       (a == a0+3 && !bQ &&  bLJ)))
 194                     bWater = FALSE;
 195                 if (bQ && bLJ)
 196                 {
 197                     ncqlj++;
 198                 }
 199                 else
 200                 {
 201                     if (bQ)
 202                     {
 203                         ncq++;
 204                     }
 205                     if (bLJ)
 206                     {
 207                         nclj++;
 208                     }
 209                 }
 210                 a++;
 211             }
 212             if (bWater)
 213             {
 214                 nw   += nmol;
 215             }
 216             else
 217             {
 218                 nqlj += nmol*ncqlj;
 219                 nq   += nmol*ncq;
 220                 nlj  += nmol*nclj;
 221             }
 222         }
 223     }
 224
 225     *nq_tot = nq + nqlj + nw*3;
 226
 227     if (debug)
 228     {
 229       fprintf(debug,"nw %d nqlj %d nq %d nlj %d\n",nw,nqlj,nq,nlj);
 230     }
 231
 232     /* For the PP non-bonded cost it is (unrealistically) assumed
 233      * that all atoms are distributed homogeneously in space.
 234      * Factor 3 is used because a water molecule has 3 atoms
 235      * (and TIP4P effectively has 3 interactions with (water) atoms)).
 236      */
 237     *cost_pp = 0.5*(fqljw*nw*nqlj +
 238                     fqw  *nw*(3*nw + nq) +
 239                     fqlj *nqlj*nqlj +
 240                     fq   *nq*(3*nw + nqlj + nq) +
 241                     flj  *nlj*(nw + nqlj + nlj))
 242         *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box);
 243 }
 244
 245 static void pp_verlet_load(gmx_mtop_t *mtop,t_inputrec *ir,matrix box,
 246                            int *nq_tot,
 247                            double *cost_pp,
 248                            gmx_bool *bChargePerturbed)
 249 {
 250     t_atom *atom;
 251     int  mb,nmol,atnr,cg,a,a0,nqlj,nq,nlj;
 252     gmx_bool bQRF;
 253     t_iparams *iparams;
 254     gmx_moltype_t *molt;
 255     float r_eff;
 256     double nat;
 257
 258     bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
 259
 260     iparams = mtop->ffparams.iparams;
 261     atnr = mtop->ffparams.atnr;
 262     nqlj = 0;
 263     nq   = 0;
 264     *bChargePerturbed = FALSE;
 265     for(mb=0; mb<mtop->nmolblock; mb++)
 266         {
 267         molt = &mtop->moltype[mtop->molblock[mb].type];
 268         atom = molt->atoms.atom;
 269         nmol = mtop->molblock[mb].nmol;
 270         a = 0;
 271         for(a=0; a<molt->atoms.nr; a++)
 272         {
 273             if (atom[a].q != 0 || atom[a].qB != 0)
 274             {
 275                 if (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
 276                     iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
 277                 {
 278                     nqlj += nmol;
 279                 }
 280                 else
 281                 {
 282                     nq += nmol;
 283                 }
 284             }
 285             if (atom[a].q != atom[a].qB)
 286             {
 287                 *bChargePerturbed = TRUE;
 288             }
 289         }
 290     }
 291
 292     nlj = mtop->natoms - nqlj - nq;
 293
 294     *nq_tot = nqlj + nq;
 295
 296     /* Effective cut-off for cluster pair list of 4x4 atoms */
 297     r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE,mtop->natoms/det(box));
 298
 299     if (debug)
 300     {
 301         fprintf(debug,"nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
 302                 nqlj,nq,nlj,ir->rlist,r_eff);
 303     }
 304
 305     /* For the PP non-bonded cost it is (unrealistically) assumed
 306      * that all atoms are distributed homogeneously in space.
 307      */
 308     /* Convert mtop->natoms to double to avoid int overflow */
 309     nat = mtop->natoms;
 310     *cost_pp = 0.5*(nqlj*nat*(bQRF ? C_VT_QLJ_RF : C_VT_QLJ_TAB) +
 311                     nq*nat*(bQRF ? C_VT_Q_RF : C_VT_Q_TAB) +
 312                     nlj*nat*C_VT_LJ)
 313         *4/3*M_PI*r_eff*r_eff*r_eff/det(box);
 314 }
 315
 316 float pme_load_estimate(gmx_mtop_t *mtop,t_inputrec *ir,matrix box)
 317 {
 318   t_atom *atom;
 319   int  mb,nmol,atnr,cg,a,a0,nq_tot;
 320   gmx_bool bBHAM,bLJcut,bChargePerturbed,bWater,bQ,bLJ;
 321   double cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve,cost_pme;
 322   float ratio;
 323   t_iparams *iparams;
 324   gmx_moltype_t *molt;
 325
 326   /* Computational cost of bonded, non-bonded and PME calculations.
 327    * This will be machine dependent.
 328    * The numbers here are accurate for Intel Core2 and AMD Athlon 64
 329    * in single precision. In double precision PME mesh is slightly cheaper,
 330    * although not so much that the numbers need to be adjusted.
 331    */
 332
 333   iparams = mtop->ffparams.iparams;
 334   atnr = mtop->ffparams.atnr;
 335
 336   cost_bond = C_BOND*n_bonded_dx(mtop,TRUE);
 337
 338   if (ir->cutoff_scheme == ecutsGROUP)
 339   {
 340       pp_group_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed);
 341   }
 342   else
 343   {
 344       pp_verlet_load(mtop,ir,box,&nq_tot,&cost_pp,&bChargePerturbed);
 345   }
 346
 347   cost_redist = C_PME_REDIST*nq_tot;
 348   cost_spread = C_PME_SPREAD*nq_tot*pow(ir->pme_order,3);
 349   cost_fft    = C_PME_FFT*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
 350   cost_solve  = C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
 351
 352   if (ir->efep != efepNO && bChargePerturbed) {
 353     /* All PME work, except redist & spline coefficient calculation, doubles */
 354     cost_spread *= 2;
 355     cost_fft    *= 2;
 356     cost_solve  *= 2;
 357   }
 358
 359   cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
 360
 361   ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
 362
 363   if (debug) {
 364     fprintf(debug,
 365             "cost_bond   %f\n"
 366             "cost_pp     %f\n"
 367             "cost_redist %f\n"
 368             "cost_spread %f\n"
 369             "cost_fft    %f\n"
 370             "cost_solve  %f\n",
 371             cost_bond,cost_pp,cost_redist,cost_spread,cost_fft,cost_solve);
 372
 373     fprintf(debug,"Estimate for relative PME load: %.3f\n",ratio);
 374   }
 375
 376   return ratio;
 377 }