src/gromacs/mdlib/perf_est.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2008, The GROMACS development team.
   6  * Copyright (c) 2012, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <math.h>
  42
  43 #include "perf_est.h"
  44 #include "physics.h"
  45 #include "vec.h"
  46 #include "mtop_util.h"
  47 #include "types/commrec.h"
  48 #include "nbnxn_search.h"
  49 #include "nbnxn_consts.h"
  50
  51
  52 /* Computational cost of bonded, non-bonded and PME calculations.
  53  * This will be machine dependent.
  54  * The numbers here are accurate for Intel Core2 and AMD Athlon 64
  55  * in single precision. In double precision PME mesh is slightly cheaper,
  56  * although not so much that the numbers need to be adjusted.
  57  */
  58
  59 /* Cost of a pair interaction in the "group" cut-off scheme" */
  60 #define C_GR_FQ       1.5
  61 #define C_GR_QLJ_CUT  1.5
  62 #define C_GR_QLJ_TAB  2.0
  63 #define C_GR_LJ_CUT   1.0
  64 #define C_GR_LJ_TAB   1.75
  65 /* Cost of 1 water with one Q/LJ atom */
  66 #define C_GR_QLJW_CUT 2.0
  67 #define C_GR_QLJW_TAB 2.25
  68 /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
  69 #define C_GR_QW       1.75
  70
  71 /* Cost of a pair interaction in the "Verlet" cut-off scheme" */
  72 #define C_VT_LJ       0.30
  73 #define C_VT_QLJ_RF   0.40
  74 #define C_VT_Q_RF     0.30
  75 #define C_VT_QLJ_TAB  0.55
  76 #define C_VT_Q_TAB    0.50
  77
  78 /* Cost of PME, with all components running with SSE instructions */
  79 /* Cost of particle reordering and redistribution */
  80 #define C_PME_REDIST  12.0
  81 /* Cost of q spreading and force interpolation per charge (mainly memory) */
  82 #define C_PME_SPREAD  0.30
  83 /* Cost of fft's, will be multiplied with N log(N) */
  84 #define C_PME_FFT     0.20
  85 /* Cost of pme_solve, will be multiplied with N */
  86 #define C_PME_SOLVE   0.50
  87
  88 /* Cost of a bonded interaction divided by the number of (pbc_)dx nrequired */
  89 #define C_BOND        5.0
  90
  91 int n_bonded_dx(gmx_mtop_t *mtop, gmx_bool bExcl)
  92 {
  93     int            mb, nmol, ftype, ndxb, ndx_excl;
  94     int            ndx;
  95     gmx_moltype_t *molt;
  96
  97     /* Count the number of pbc_rvec_sub calls required for bonded interactions.
  98      * This number is also roughly proportional to the computational cost.
  99      */
 100     ndx      = 0;
 101     ndx_excl = 0;
 102     for (mb = 0; mb < mtop->nmolblock; mb++)
 103     {
 104         molt = &mtop->moltype[mtop->molblock[mb].type];
 105         nmol = mtop->molblock[mb].nmol;
 106         for (ftype = 0; ftype < F_NRE; ftype++)
 107         {
 108             if (interaction_function[ftype].flags & IF_BOND)
 109             {
 110                 switch (ftype)
 111                 {
 112                     case F_POSRES:
 113                     case F_FBPOSRES:  ndxb = 1; break;
 114                     case F_CONNBONDS: ndxb = 0; break;
 115                     default:     ndxb      = NRAL(ftype) - 1; break;
 116                 }
 117                 ndx += nmol*ndxb*molt->ilist[ftype].nr/(1 + NRAL(ftype));
 118             }
 119         }
 120         if (bExcl)
 121         {
 122             ndx_excl += nmol*(molt->excls.nra - molt->atoms.nr)/2;
 123         }
 124         else
 125         {
 126             ndx_excl = 0;
 127         }
 128     }
 129
 130     if (debug)
 131     {
 132         fprintf(debug, "ndx bonded %d exclusions %d\n", ndx, ndx_excl);
 133     }
 134
 135     ndx += ndx_excl;
 136
 137     return ndx;
 138 }
 139
 140 static void pp_group_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
 141                           int *nq_tot,
 142                           double *cost_pp,
 143                           gmx_bool *bChargePerturbed)
 144 {
 145     t_atom        *atom;
 146     int            mb, nmol, atnr, cg, a, a0, ncqlj, ncq, nclj;
 147     gmx_bool       bBHAM, bLJcut, bWater, bQ, bLJ;
 148     int            nw, nqlj, nq, nlj;
 149     float          fq, fqlj, flj, fljtab, fqljw, fqw;
 150     t_iparams     *iparams;
 151     gmx_moltype_t *molt;
 152
 153     bBHAM = (mtop->ffparams.functype[0] == F_BHAM);
 154
 155     bLJcut = ((ir->vdwtype == evdwCUT) && !bBHAM);
 156
 157     /* Computational cost of bonded, non-bonded and PME calculations.
 158      * This will be machine dependent.
 159      * The numbers here are accurate for Intel Core2 and AMD Athlon 64
 160      * in single precision. In double precision PME mesh is slightly cheaper,
 161      * although not so much that the numbers need to be adjusted.
 162      */
 163     fq    = C_GR_FQ;
 164     fqlj  = (bLJcut ? C_GR_QLJ_CUT : C_GR_QLJ_TAB);
 165     flj   = (bLJcut ? C_GR_LJ_CUT  : C_GR_LJ_TAB);
 166     /* Cost of 1 water with one Q/LJ atom */
 167     fqljw = (bLJcut ? C_GR_QLJW_CUT : C_GR_QLJW_TAB);
 168     /* Cost of 1 water with one Q atom or with 1/3 water (LJ negligible) */
 169     fqw   = C_GR_QW;
 170
 171     iparams           = mtop->ffparams.iparams;
 172     atnr              = mtop->ffparams.atnr;
 173     nw                = 0;
 174     nqlj              = 0;
 175     nq                = 0;
 176     nlj               = 0;
 177     *bChargePerturbed = FALSE;
 178     for (mb = 0; mb < mtop->nmolblock; mb++)
 179     {
 180         molt = &mtop->moltype[mtop->molblock[mb].type];
 181         atom = molt->atoms.atom;
 182         nmol = mtop->molblock[mb].nmol;
 183         a    = 0;
 184         for (cg = 0; cg < molt->cgs.nr; cg++)
 185         {
 186             bWater = !bBHAM;
 187             ncqlj  = 0;
 188             ncq    = 0;
 189             nclj   = 0;
 190             a0     = a;
 191             while (a < molt->cgs.index[cg+1])
 192             {
 193                 bQ  = (atom[a].q != 0 || atom[a].qB != 0);
 194                 bLJ = (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
 195                        iparams[(atnr+1)*atom[a].type].lj.c12 != 0);
 196                 if (atom[a].q != atom[a].qB)
 197                 {
 198                     *bChargePerturbed = TRUE;
 199                 }
 200                 /* This if this atom fits into water optimization */
 201                 if (!((a == a0   &&  bQ &&  bLJ) ||
 202                       (a == a0+1 &&  bQ && !bLJ) ||
 203                       (a == a0+2 &&  bQ && !bLJ && atom[a].q == atom[a-1].q) ||
 204                       (a == a0+3 && !bQ &&  bLJ)))
 205                 {
 206                     bWater = FALSE;
 207                 }
 208                 if (bQ && bLJ)
 209                 {
 210                     ncqlj++;
 211                 }
 212                 else
 213                 {
 214                     if (bQ)
 215                     {
 216                         ncq++;
 217                     }
 218                     if (bLJ)
 219                     {
 220                         nclj++;
 221                     }
 222                 }
 223                 a++;
 224             }
 225             if (bWater)
 226             {
 227                 nw   += nmol;
 228             }
 229             else
 230             {
 231                 nqlj += nmol*ncqlj;
 232                 nq   += nmol*ncq;
 233                 nlj  += nmol*nclj;
 234             }
 235         }
 236     }
 237
 238     *nq_tot = nq + nqlj + nw*3;
 239
 240     if (debug)
 241     {
 242         fprintf(debug, "nw %d nqlj %d nq %d nlj %d\n", nw, nqlj, nq, nlj);
 243     }
 244
 245     /* For the PP non-bonded cost it is (unrealistically) assumed
 246      * that all atoms are distributed homogeneously in space.
 247      * Factor 3 is used because a water molecule has 3 atoms
 248      * (and TIP4P effectively has 3 interactions with (water) atoms)).
 249      */
 250     *cost_pp = 0.5*(fqljw*nw*nqlj +
 251                     fqw  *nw*(3*nw + nq) +
 252                     fqlj *nqlj*nqlj +
 253                     fq   *nq*(3*nw + nqlj + nq) +
 254                     flj  *nlj*(nw + nqlj + nlj))
 255         *4/3*M_PI*ir->rlist*ir->rlist*ir->rlist/det(box);
 256 }
 257
 258 static void pp_verlet_load(gmx_mtop_t *mtop, t_inputrec *ir, matrix box,
 259                            int *nq_tot,
 260                            double *cost_pp,
 261                            gmx_bool *bChargePerturbed)
 262 {
 263     t_atom        *atom;
 264     int            mb, nmol, atnr, cg, a, a0, nqlj, nq, nlj;
 265     gmx_bool       bQRF;
 266     t_iparams     *iparams;
 267     gmx_moltype_t *molt;
 268     float          r_eff;
 269     double         nat;
 270
 271     bQRF = (EEL_RF(ir->coulombtype) || ir->coulombtype == eelCUT);
 272
 273     iparams           = mtop->ffparams.iparams;
 274     atnr              = mtop->ffparams.atnr;
 275     nqlj              = 0;
 276     nq                = 0;
 277     *bChargePerturbed = FALSE;
 278     for (mb = 0; mb < mtop->nmolblock; mb++)
 279     {
 280         molt = &mtop->moltype[mtop->molblock[mb].type];
 281         atom = molt->atoms.atom;
 282         nmol = mtop->molblock[mb].nmol;
 283         a    = 0;
 284         for (a = 0; a < molt->atoms.nr; a++)
 285         {
 286             if (atom[a].q != 0 || atom[a].qB != 0)
 287             {
 288                 if (iparams[(atnr+1)*atom[a].type].lj.c6  != 0 ||
 289                     iparams[(atnr+1)*atom[a].type].lj.c12 != 0)
 290                 {
 291                     nqlj += nmol;
 292                 }
 293                 else
 294                 {
 295                     nq += nmol;
 296                 }
 297             }
 298             if (atom[a].q != atom[a].qB)
 299             {
 300                 *bChargePerturbed = TRUE;
 301             }
 302         }
 303     }
 304
 305     nlj = mtop->natoms - nqlj - nq;
 306
 307     *nq_tot = nqlj + nq;
 308
 309     /* Effective cut-off for cluster pair list of 4x4 atoms */
 310     r_eff = ir->rlist + nbnxn_get_rlist_effective_inc(NBNXN_CPU_CLUSTER_I_SIZE, mtop->natoms/det(box));
 311
 312     if (debug)
 313     {
 314         fprintf(debug, "nqlj %d nq %d nlj %d rlist %.3f r_eff %.3f\n",
 315                 nqlj, nq, nlj, ir->rlist, r_eff);
 316     }
 317
 318     /* For the PP non-bonded cost it is (unrealistically) assumed
 319      * that all atoms are distributed homogeneously in space.
 320      */
 321     /* Convert mtop->natoms to double to avoid int overflow */
 322     nat      = mtop->natoms;
 323     *cost_pp = 0.5*(nqlj*nat*(bQRF ? C_VT_QLJ_RF : C_VT_QLJ_TAB) +
 324                     nq*nat*(bQRF ? C_VT_Q_RF : C_VT_Q_TAB) +
 325                     nlj*nat*C_VT_LJ)
 326         *4/3*M_PI*r_eff*r_eff*r_eff/det(box);
 327 }
 328
 329 float pme_load_estimate(gmx_mtop_t *mtop, t_inputrec *ir, matrix box)
 330 {
 331     t_atom        *atom;
 332     int            mb, nmol, atnr, cg, a, a0, nq_tot;
 333     gmx_bool       bBHAM, bLJcut, bChargePerturbed, bWater, bQ, bLJ;
 334     double         cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve, cost_pme;
 335     float          ratio;
 336     t_iparams     *iparams;
 337     gmx_moltype_t *molt;
 338
 339     /* Computational cost of bonded, non-bonded and PME calculations.
 340      * This will be machine dependent.
 341      * The numbers here are accurate for Intel Core2 and AMD Athlon 64
 342      * in single precision. In double precision PME mesh is slightly cheaper,
 343      * although not so much that the numbers need to be adjusted.
 344      */
 345
 346     iparams = mtop->ffparams.iparams;
 347     atnr    = mtop->ffparams.atnr;
 348
 349     cost_bond = C_BOND*n_bonded_dx(mtop, TRUE);
 350
 351     if (ir->cutoff_scheme == ecutsGROUP)
 352     {
 353         pp_group_load(mtop, ir, box, &nq_tot, &cost_pp, &bChargePerturbed);
 354     }
 355     else
 356     {
 357         pp_verlet_load(mtop, ir, box, &nq_tot, &cost_pp, &bChargePerturbed);
 358     }
 359
 360     cost_redist = C_PME_REDIST*nq_tot;
 361     cost_spread = C_PME_SPREAD*nq_tot*pow(ir->pme_order, 3);
 362     cost_fft    = C_PME_FFT*ir->nkx*ir->nky*ir->nkz*log(ir->nkx*ir->nky*ir->nkz);
 363     cost_solve  = C_PME_SOLVE*ir->nkx*ir->nky*ir->nkz;
 364
 365     if (ir->efep != efepNO && bChargePerturbed)
 366     {
 367         /* All PME work, except redist & spline coefficient calculation, doubles */
 368         cost_spread *= 2;
 369         cost_fft    *= 2;
 370         cost_solve  *= 2;
 371     }
 372
 373     cost_pme = cost_redist + cost_spread + cost_fft + cost_solve;
 374
 375     ratio = cost_pme/(cost_bond + cost_pp + cost_pme);
 376
 377     if (debug)
 378     {
 379         fprintf(debug,
 380                 "cost_bond   %f\n"
 381                 "cost_pp     %f\n"
 382                 "cost_redist %f\n"
 383                 "cost_spread %f\n"
 384                 "cost_fft    %f\n"
 385                 "cost_solve  %f\n",
 386                 cost_bond, cost_pp, cost_redist, cost_spread, cost_fft, cost_solve);
 387
 388         fprintf(debug, "Estimate for relative PME load: %.3f\n", ratio);
 389     }
 390
 391     return ratio;
 392 }