src/gromacs/mdlib/perf_est.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2008, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <math.h>
  40
  41 #include "perf_est.h"
  42 #include "physics.h"
  43 #include "vec.h"
  44 #include "mtop_util.h"
  45 #include "types/commrec.h"
  46 #include "nbnxn_search.h"
  47 #include "nbnxn_consts.h"
  48
  49
  50 /* Computational cost of bonded, non-bonded and PME calculations.
  51  * This will be machine dependent.
  52  * The numbers here are accurate for Intel Core2 and AMD Athlon 64
  53  * in single precision. In double precision PME mesh is slightly cheaper,
  54  * although not so much that the numbers need to be adjusted.
  55  */
  56
  57 /* Cost of a pair interaction in the "group" cut-off scheme" */
  58 #define C_GR_FQ       1.5
  59 #define C_GR_QLJ_CUT  1.5
  60 #define C_GR_QLJ_TAB  2.0
  61 #define C_GR_LJ_CUT   1.0
  62 #define C_GR_LJ_TAB   1.75
  63 /* Cost of 1 water with one Q/LJ atom */
  64 #define C_GR_QLJW_CUT 2.0
  65 #define C_GR_QLJW_TAB 2.25
  66 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
  67 #define C_GR_QW       1.75
  68
  69 /* Cost of a pair interaction in the "Verlet" cut-off scheme" */
  70 #define C_VT_LJ       0.30
  71 #define C_VT_QLJ_RF   0.40
  72 #define C_VT_Q_RF     0.30
  73 #define C_VT_QLJ_TAB  0.55
  74 #define C_VT_Q_TAB    0.50
  75
  76 /* Cost of PME, with all components running with SSE instructions */
  77 /* Cost of particle reordering and redistribution */
  78 #define C_PME_REDIST  12.0
  79 /* Cost of q spreading and force interpolation per charge (mainly memory) */
  80 #define C_PME_SPREAD  0.30
  81 /* Cost of fft's, will be multiplied with N log(N) */
  82 #define C_PME_FFT     0.20
  83 /* Cost of pme_solve, will be multiplied with N */
  84 #define C_PME_SOLVE   0.50
  85
  86 /* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
  87 #define C_BOND        5.0
  88
  89 int n_bonded_dx(gmx_mtop_t *mtop, gmx_bool bExcl)
  90 {
  91     int            mb, nmol, ftype, ndxb, ndx_excl;
  92     int            ndx;
  93     gmx_moltype_t *molt;
  94
  95     /* Count the number of pbc_rvec_sub calls required for bonded interactions.
  96      * This number is also roughly proportional to the computational cost.
  97      */
  98     ndx      = 0;
  99     ndx_excl = 0;
 100     for (mb = 0; mb < mtop->nmolblock; mb++)
 101     {
 102         molt = &mtop->moltype[mtop->molblock[mb].type];
 103         nmol = mtop->molblock[mb].nmol;
 104         for (ftype = 0; ftype < F_NRE; ftype++)
 105         {
 106             if (interaction_function[ftype].flags & IF_BOND)
 107             {
 108                 switch (ftype)
 109                 {
 110                     case F_POSRES:
 111                     case F_FBPOSRES:  ndxb = 1; break;
 112                     case F_CONNBONDS: ndxb = 0; break;
 113                     default:     ndxb      = NRAL(ftype) - 1; break;
 114                 }
 115                 ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype));
 116             }
 117         }
 118         if (bExcl)
 119         {
 120             ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
 121         }
 122         else
 123         {
 124             ndx_excl = 0;
 125         }
 126     }
 127
 128     if (debug)
 129     {
 130         fprintf(debug, "ndx bonded %d exclusions %d\n", ndx, ndx_excl);
 131     }
 132
 133     ndx += ndx_excl;
 134
 135     return ndx;
 136 }
 137
 138 static void pp_group_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
 139                           int *nq_tot,
 140                           double *cost_pp,
 141                           gmx_bool *bChargePerturbed)
 142 {
 143     t_atom        *atom;
 144     int            mb, nmol, atnr, cg, a, a0, ncqlj, ncq, nclj;
 145     gmx_bool       bBHAM, bLJcut, bWater, bQ, bLJ;
 146     int            nw, nqlj, nq, nlj;
 147     float          fq, fqlj, flj, fljtab, fqljw, fqw;
 148     t_iparams     *iparams;
 149     gmx_moltype_t *molt;
 150
 151     bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 152
 153     bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
 154
 155     /* Computational cost of bonded, non-bonded and PME calculations.
 156      * This will be machine dependent.
 157      * The numbers here are accurate for Intel Core2 and AMD Athlon 64
 158      * in single precision. In double precision PME mesh is slightly cheaper,
 159      * although not so much that the numbers need to be adjusted.
 160      */
 161     fq    = C_GR_FQ;
 162     fqlj  = (bLJcut ? C_GR_QLJ_CUT : C_GR_QLJ_TAB);
 163     flj   = (bLJcut ? C_GR_LJ_CUT  : C_GR_LJ_TAB);
 164     /* Cost of 1 water with one Q/LJ atom */
 165     fqljw = (bLJcut ? C_GR_QLJW_CUT : C_GR_QLJW_TAB);
 166     /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
 167     fqw   = C_GR_QW;
 168
 169     iparams           = mtop->ffparams.iparams;
 170     atnr              = mtop->ffparams.atnr;
 171     nw                = 0;
 172     nqlj              = 0;
 173     nq                = 0;
 174     nlj               = 0;
 175     *bChargePerturbed = FALSE;
 176     for (mb = 0; mb < mtop->nmolblock; mb++)
 177     {
 178         molt = &mtop->moltype[mtop->molblock[mb].type];
 179         atom = molt->atoms.atom;
 180         nmol = mtop->molblock[mb].nmol;
 181         a    = 0;
 182         for (cg = 0; cg < molt->cgs.nr; cg++)
 183         {
 184             bWater = !bBHAM;
 185             ncqlj  = 0;
 186             ncq    = 0;
 187             nclj   = 0;
 188             a0     = a;
 189             while (a < molt->cgs.index[cg+1])
 190             {
 191                 bQ  = (atom[a].q != 0 || atom[a].qB != 0);
 192                 bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
 193                        iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
 194                 if (atom[a].q != atom[a].qB)
 195                 {
 196                     *bChargePerturbed = TRUE;
 197                 }
 198                 /* This if this atom fits into water optimization */
 199                 if (!((a == a0   &&  bQ &&  bLJ) ||
 200                       (a == a0+1 &&  bQ && !bLJ) ||
 201                       (a == a0+2 &&  bQ && !bLJ && atom[a].q == atom[a-1].q) ||
 202                       (a == a0+3 && !bQ &&  bLJ)))
 203                 {
 204                     bWater = FALSE;
 205                 }
 206                 if (bQ && bLJ)
 207                 {
 208                     ncqlj++;
 209                 }
 210                 else
 211                 {
 212                     if (bQ)
 213                     {
 214                         ncq++;
 215                     }
 216                     if (bLJ)
 217                     {
 218                         nclj++;
 219                     }
 220                 }
 221                 a++;
 222             }
 223             if (bWater)
 224             {
 225                 nw   += nmol;
 226             }
 227             else
 228             {
 229                 nqlj += nmol*ncqlj;
 230                 nq   += nmol*ncq;
 231                 nlj  += nmol*nclj;
 232             }
 233         }
 234     }
 235
 236     *nq_tot = nq + nqlj + nw*3;
 237
 238     if (debug)
 239     {
 240         fprintf(debug, "nw %d nqlj %d nq %d nlj %d\n", nw, nqlj, nq, nlj);
 241     }
 242
 243     /* For the PP non-bonded cost it is (unrealistically) assumed
 244      * that all atoms are distributed homogeneously in space.
 245      * Factor 3 is used because a water molecule has 3 atoms
 246      * (and TIP4P effectively has 3 interactions with (water) atoms)).
 247      */
 248     *cost_pp = 0.5*(fqljw*nw*nqlj +
 249                     fqw  *nw*(3*nw + nq) +
 250                     fqlj *nqlj*nqlj +
 251                     fq   *nq*(3*nw + nqlj + nq) +
 252                     flj  *nlj*(nw + nqlj + nlj))
 253         *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box);
 254 }
 255
 256 static void pp_verlet_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
 257                            int *nq_tot,
 258                            double *cost_pp,
 259                            gmx_bool *bChargePerturbed)
 260 {
 261     t_atom        *atom;
 262     int            mb, nmol, atnr, cg, a, a0, nqlj, nq, nlj;
 263     gmx_bool       bQRF;
 264     t_iparams     *iparams;
 265     gmx_moltype_t *molt;
 266     float          r_eff;
 267     double         nat;
 268
 269     bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
 270
 271     iparams           = mtop->ffparams.iparams;
 272     atnr              = mtop->ffparams.atnr;
 273     nqlj              = 0;
 274     nq                = 0;
 275     *bChargePerturbed = FALSE;
 276     for (mb = 0; mb < mtop->nmolblock; mb++)
 277     {
 278         molt = &mtop->moltype[mtop->molblock[mb].type];
 279         atom = molt->atoms.atom;
 280         nmol = mtop->molblock[mb].nmol;
 281         a    = 0;
 282         for (a = 0; a < molt->atoms.nr; a++)
 283         {
 284             if (atom[a].q != 0 || atom[a].qB != 0)
 285             {
 286                 if (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
 287                     iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
 288                 {
 289                     nqlj += nmol;
 290                 }
 291                 else
 292                 {
 293                     nq += nmol;
 294                 }
 295             }
 296             if (atom[a].q != atom[a].qB)
 297             {
 298                 *bChargePerturbed = TRUE;
 299             }
 300         }
 301     }
 302
 303     nlj = mtop->natoms - nqlj - nq;
 304
 305     *nq_tot = nqlj + nq;
 306
 307     /* Effective cut-off for cluster pair list of 4x4 atoms */
 308     r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE, mtop->natoms/det(box));
 309
 310     if (debug)
 311     {
 312         fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
 313                 nqlj, nq, nlj, ir->rlist, r_eff);
 314     }
 315
 316     /* For the PP non-bonded cost it is (unrealistically) assumed
 317      * that all atoms are distributed homogeneously in space.
 318      */
 319     /* Convert mtop->natoms to double to avoid int overflow */
 320     nat      = mtop->natoms;
 321     *cost_pp = 0.5*(nqlj*nat*(bQRF ? C_VT_QLJ_RF : C_VT_QLJ_TAB) +
 322                     nq*nat*(bQRF ? C_VT_Q_RF : C_VT_Q_TAB) +
 323                     nlj*nat*C_VT_LJ)
 324         *4/3*M_PI*r_eff*r_eff*r_eff/det(box);
 325 }
 326
 327 float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box)
 328 {
 329     t_atom        *atom;
 330     int            mb, nmol, atnr, cg, a, a0, nq_tot;
 331     gmx_bool       bBHAM, bLJcut, bChargePerturbed, bWater, bQ, bLJ;
 332     double         cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;
 333     float          ratio;
 334     t_iparams     *iparams;
 335     gmx_moltype_t *molt;
 336
 337     /* Computational cost of bonded, non-bonded and PME calculations.
 338      * This will be machine dependent.
 339      * The numbers here are accurate for Intel Core2 and AMD Athlon 64
 340      * in single precision. In double precision PME mesh is slightly cheaper,
 341      * although not so much that the numbers need to be adjusted.
 342      */
 343
 344     iparams = mtop->ffparams.iparams;
 345     atnr    = mtop->ffparams.atnr;
 346
 347     cost_bond = C_BOND*n_bonded_dx(mtop, TRUE);
 348
 349     if (ir->cutoff_scheme == ecutsGROUP)
 350     {
 351         pp_group_load(mtop, ir, box, &nq_tot, &cost_pp, &bChargePerturbed);
 352     }
 353     else
 354     {
 355         pp_verlet_load(mtop, ir, box, &nq_tot, &cost_pp, &bChargePerturbed);
 356     }
 357
 358     cost_redist = C_PME_REDIST*nq_tot;
 359     cost_spread = C_PME_SPREAD*nq_tot*pow(ir->pme_order, 3);
 360     cost_fft    = C_PME_FFT*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
 361     cost_solve  = C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
 362
 363     if (ir->efep != efepNO && bChargePerturbed)
 364     {
 365         /* All PME work, except redist & spline coefficient calculation, doubles */
 366         cost_spread *= 2;
 367         cost_fft    *= 2;
 368         cost_solve  *= 2;
 369     }
 370
 371     cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
 372
 373     ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
 374
 375     if (debug)
 376     {
 377         fprintf(debug,
 378                 "cost_bond   %f\n"
 379                 "cost_pp     %f\n"
 380                 "cost_redist %f\n"
 381                 "cost_spread %f\n"
 382                 "cost_fft    %f\n"
 383                 "cost_solve  %f\n",
 384                 cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);
 385
 386         fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);
 387     }
 388
 389     return ratio;
 390 }