src/gromacs/ewald/pme-redistribute.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "pme-redistribute.h"
  40
  41 #include "config.h"
  42
  43 #include <algorithm>
  44
  45 #include "gromacs/math/vec.h"
  46 #include "gromacs/mdtypes/commrec.h"
  47 #include "gromacs/utility/exceptions.h"
  48 #include "gromacs/utility/fatalerror.h"
  49 #include "gromacs/utility/gmxmpi.h"
  50 #include "gromacs/utility/smalloc.h"
  51
  52 #include "pme-internal.h"
  53 #include "pme-simd.h"
  54
  55 static void pme_calc_pidx(int start, int end,
  56                           matrix recipbox, rvec x[],
  57                           pme_atomcomm_t *atc, int *count)
  58 {
  59     int   nslab, i;
  60     int   si;
  61     real *xptr, s;
  62     real  rxx, ryx, rzx, ryy, rzy;
  63     int  *pd;
  64
  65     /* Calculate PME task index (pidx) for each grid index.
  66      * Here we always assign equally sized slabs to each node
  67      * for load balancing reasons (the PME grid spacing is not used).
  68      */
  69
  70     nslab = atc->nslab;
  71     pd    = atc->pd;
  72
  73     /* Reset the count */
  74     for (i = 0; i < nslab; i++)
  75     {
  76         count[i] = 0;
  77     }
  78
  79     if (atc->dimind == 0)
  80     {
  81         rxx = recipbox[XX][XX];
  82         ryx = recipbox[YY][XX];
  83         rzx = recipbox[ZZ][XX];
  84         /* Calculate the node index in x-dimension */
  85         for (i = start; i < end; i++)
  86         {
  87             xptr   = x[i];
  88             /* Fractional coordinates along box vectors */
  89             s     = nslab*(xptr[XX]*rxx + xptr[YY]*ryx + xptr[ZZ]*rzx);
  90             si    = static_cast<int>(s + 2*nslab) % nslab;
  91             pd[i] = si;
  92             count[si]++;
  93         }
  94     }
  95     else
  96     {
  97         ryy = recipbox[YY][YY];
  98         rzy = recipbox[ZZ][YY];
  99         /* Calculate the node index in y-dimension */
 100         for (i = start; i < end; i++)
 101         {
 102             xptr   = x[i];
 103             /* Fractional coordinates along box vectors */
 104             s     = nslab*(xptr[YY]*ryy + xptr[ZZ]*rzy);
 105             si    = static_cast<int>(s + 2*nslab) % nslab;
 106             pd[i] = si;
 107             count[si]++;
 108         }
 109     }
 110 }
 111
 112 static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[],
 113                                   pme_atomcomm_t *atc)
 114 {
 115     int nthread, thread, slab;
 116
 117     nthread = atc->nthread;
 118
 119 #pragma omp parallel for num_threads(nthread) schedule(static)
 120     for (thread = 0; thread < nthread; thread++)
 121     {
 122         try
 123         {
 124             pme_calc_pidx(natoms* thread   /nthread,
 125                           natoms*(thread+1)/nthread,
 126                           recipbox, x, atc, atc->count_thread[thread]);
 127         }
 128         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 129     }
 130     /* Non-parallel reduction, since nslab is small */
 131
 132     for (thread = 1; thread < nthread; thread++)
 133     {
 134         for (slab = 0; slab < atc->nslab; slab++)
 135         {
 136             atc->count_thread[0][slab] += atc->count_thread[thread][slab];
 137         }
 138     }
 139 }
 140
 141 static void realloc_splinevec(splinevec th, real **ptr_z, int nalloc)
 142 {
 143     const int padding = 4;
 144     int       i;
 145
 146     srenew(th[XX], nalloc);
 147     srenew(th[YY], nalloc);
 148     /* In z we add padding, this is only required for the aligned SIMD code */
 149     sfree_aligned(*ptr_z);
 150     snew_aligned(*ptr_z, nalloc+2*padding, SIMD4_ALIGNMENT);
 151     th[ZZ] = *ptr_z + padding;
 152
 153     for (i = 0; i < padding; i++)
 154     {
 155         (*ptr_z)[               i] = 0;
 156         (*ptr_z)[padding+nalloc+i] = 0;
 157     }
 158 }
 159
 160 static void pme_realloc_splinedata(splinedata_t *spline, pme_atomcomm_t *atc)
 161 {
 162     int i;
 163
 164     srenew(spline->ind, atc->nalloc);
 165     /* Initialize the index to identity so it works without threads */
 166     for (i = 0; i < atc->nalloc; i++)
 167     {
 168         spline->ind[i] = i;
 169     }
 170
 171     realloc_splinevec(spline->theta, &spline->ptr_theta_z,
 172                       atc->pme_order*atc->nalloc);
 173     realloc_splinevec(spline->dtheta, &spline->ptr_dtheta_z,
 174                       atc->pme_order*atc->nalloc);
 175 }
 176
 177 void pme_realloc_atomcomm_things(pme_atomcomm_t *atc)
 178 {
 179     int nalloc_old, i;
 180
 181     /* We have to avoid a NULL pointer for atc->x to avoid
 182      * possible fatal errors in MPI routines.
 183      */
 184     if (atc->n > atc->nalloc || atc->nalloc == 0)
 185     {
 186         nalloc_old  = atc->nalloc;
 187         atc->nalloc = over_alloc_dd(std::max(atc->n, 1));
 188
 189         if (atc->nslab > 1)
 190         {
 191             srenew(atc->x, atc->nalloc);
 192             srenew(atc->coefficient, atc->nalloc);
 193             srenew(atc->f, atc->nalloc);
 194             for (i = nalloc_old; i < atc->nalloc; i++)
 195             {
 196                 clear_rvec(atc->f[i]);
 197             }
 198         }
 199         if (atc->bSpread)
 200         {
 201             srenew(atc->fractx, atc->nalloc);
 202             srenew(atc->idx, atc->nalloc);
 203
 204             if (atc->nthread > 1)
 205             {
 206                 srenew(atc->thread_idx, atc->nalloc);
 207             }
 208
 209             for (i = 0; i < atc->nthread; i++)
 210             {
 211                 pme_realloc_splinedata(&atc->spline[i], atc);
 212             }
 213         }
 214     }
 215 }
 216
 217 static void pme_dd_sendrecv(pme_atomcomm_t gmx_unused *atc,
 218                             gmx_bool gmx_unused bBackward, int gmx_unused shift,
 219                             void gmx_unused *buf_s, int gmx_unused nbyte_s,
 220                             void gmx_unused *buf_r, int gmx_unused nbyte_r)
 221 {
 222 #if GMX_MPI
 223     int        dest, src;
 224     MPI_Status stat;
 225
 226     if (bBackward == FALSE)
 227     {
 228         dest = atc->node_dest[shift];
 229         src  = atc->node_src[shift];
 230     }
 231     else
 232     {
 233         dest = atc->node_src[shift];
 234         src  = atc->node_dest[shift];
 235     }
 236
 237     if (nbyte_s > 0 && nbyte_r > 0)
 238     {
 239         MPI_Sendrecv(buf_s, nbyte_s, MPI_BYTE,
 240                      dest, shift,
 241                      buf_r, nbyte_r, MPI_BYTE,
 242                      src, shift,
 243                      atc->mpi_comm, &stat);
 244     }
 245     else if (nbyte_s > 0)
 246     {
 247         MPI_Send(buf_s, nbyte_s, MPI_BYTE,
 248                  dest, shift,
 249                  atc->mpi_comm);
 250     }
 251     else if (nbyte_r > 0)
 252     {
 253         MPI_Recv(buf_r, nbyte_r, MPI_BYTE,
 254                  src, shift,
 255                  atc->mpi_comm, &stat);
 256     }
 257 #endif
 258 }
 259
 260 static void dd_pmeredist_pos_coeffs(struct gmx_pme_t *pme,
 261                                     int n, gmx_bool bX, rvec *x, const real *data,
 262                                     pme_atomcomm_t *atc)
 263 {
 264     int *commnode, *buf_index;
 265     int  nnodes_comm, i, nsend, local_pos, buf_pos, node, scount, rcount;
 266
 267     commnode  = atc->node_dest;
 268     buf_index = atc->buf_index;
 269
 270     nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
 271
 272     nsend = 0;
 273     for (i = 0; i < nnodes_comm; i++)
 274     {
 275         buf_index[commnode[i]] = nsend;
 276         nsend                 += atc->count[commnode[i]];
 277     }
 278     if (bX)
 279     {
 280         if (atc->count[atc->nodeid] + nsend != n)
 281         {
 282             gmx_fatal(FARGS, "%d particles communicated to PME rank %d are more than 2/3 times the cut-off out of the domain decomposition cell of their charge group in dimension %c.\n"
 283                       "This usually means that your system is not well equilibrated.",
 284                       n - (atc->count[atc->nodeid] + nsend),
 285                       pme->nodeid, 'x'+atc->dimind);
 286         }
 287
 288         if (nsend > pme->buf_nalloc)
 289         {
 290             pme->buf_nalloc = over_alloc_dd(nsend);
 291             srenew(pme->bufv, pme->buf_nalloc);
 292             srenew(pme->bufr, pme->buf_nalloc);
 293         }
 294
 295         atc->n = atc->count[atc->nodeid];
 296         for (i = 0; i < nnodes_comm; i++)
 297         {
 298             scount = atc->count[commnode[i]];
 299             /* Communicate the count */
 300             if (debug)
 301             {
 302                 fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n",
 303                         atc->dimind, atc->nodeid, commnode[i], scount);
 304             }
 305             pme_dd_sendrecv(atc, FALSE, i,
 306                             &scount, sizeof(int),
 307                             &atc->rcount[i], sizeof(int));
 308             atc->n += atc->rcount[i];
 309         }
 310
 311         pme_realloc_atomcomm_things(atc);
 312     }
 313
 314     local_pos = 0;
 315     for (i = 0; i < n; i++)
 316     {
 317         node = atc->pd[i];
 318         if (node == atc->nodeid)
 319         {
 320             /* Copy direct to the receive buffer */
 321             if (bX)
 322             {
 323                 copy_rvec(x[i], atc->x[local_pos]);
 324             }
 325             atc->coefficient[local_pos] = data[i];
 326             local_pos++;
 327         }
 328         else
 329         {
 330             /* Copy to the send buffer */
 331             if (bX)
 332             {
 333                 copy_rvec(x[i], pme->bufv[buf_index[node]]);
 334             }
 335             pme->bufr[buf_index[node]] = data[i];
 336             buf_index[node]++;
 337         }
 338     }
 339
 340     buf_pos = 0;
 341     for (i = 0; i < nnodes_comm; i++)
 342     {
 343         scount = atc->count[commnode[i]];
 344         rcount = atc->rcount[i];
 345         if (scount > 0 || rcount > 0)
 346         {
 347             if (bX)
 348             {
 349                 /* Communicate the coordinates */
 350                 pme_dd_sendrecv(atc, FALSE, i,
 351                                 pme->bufv[buf_pos], scount*sizeof(rvec),
 352                                 atc->x[local_pos], rcount*sizeof(rvec));
 353             }
 354             /* Communicate the coefficients */
 355             pme_dd_sendrecv(atc, FALSE, i,
 356                             pme->bufr+buf_pos, scount*sizeof(real),
 357                             atc->coefficient+local_pos, rcount*sizeof(real));
 358             buf_pos   += scount;
 359             local_pos += atc->rcount[i];
 360         }
 361     }
 362 }
 363
 364 void dd_pmeredist_f(struct gmx_pme_t *pme, pme_atomcomm_t *atc,
 365                     int n, rvec *f,
 366                     gmx_bool bAddF)
 367 {
 368     int *commnode, *buf_index;
 369     int  nnodes_comm, local_pos, buf_pos, i, scount, rcount, node;
 370
 371     commnode  = atc->node_dest;
 372     buf_index = atc->buf_index;
 373
 374     nnodes_comm = std::min(2*atc->maxshift, atc->nslab-1);
 375
 376     local_pos = atc->count[atc->nodeid];
 377     buf_pos   = 0;
 378     for (i = 0; i < nnodes_comm; i++)
 379     {
 380         scount = atc->rcount[i];
 381         rcount = atc->count[commnode[i]];
 382         if (scount > 0 || rcount > 0)
 383         {
 384             /* Communicate the forces */
 385             pme_dd_sendrecv(atc, TRUE, i,
 386                             atc->f[local_pos], scount*sizeof(rvec),
 387                             pme->bufv[buf_pos], rcount*sizeof(rvec));
 388             local_pos += scount;
 389         }
 390         buf_index[commnode[i]] = buf_pos;
 391         buf_pos               += rcount;
 392     }
 393
 394     local_pos = 0;
 395     if (bAddF)
 396     {
 397         for (i = 0; i < n; i++)
 398         {
 399             node = atc->pd[i];
 400             if (node == atc->nodeid)
 401             {
 402                 /* Add from the local force array */
 403                 rvec_inc(f[i], atc->f[local_pos]);
 404                 local_pos++;
 405             }
 406             else
 407             {
 408                 /* Add from the receive buffer */
 409                 rvec_inc(f[i], pme->bufv[buf_index[node]]);
 410                 buf_index[node]++;
 411             }
 412         }
 413     }
 414     else
 415     {
 416         for (i = 0; i < n; i++)
 417         {
 418             node = atc->pd[i];
 419             if (node == atc->nodeid)
 420             {
 421                 /* Copy from the local force array */
 422                 copy_rvec(atc->f[local_pos], f[i]);
 423                 local_pos++;
 424             }
 425             else
 426             {
 427                 /* Copy from the receive buffer */
 428                 copy_rvec(pme->bufv[buf_index[node]], f[i]);
 429                 buf_index[node]++;
 430             }
 431         }
 432     }
 433 }
 434
 435 void
 436 do_redist_pos_coeffs(struct gmx_pme_t *pme, const t_commrec *cr, int start, int homenr,
 437                      gmx_bool bFirst, rvec x[], real *data)
 438 {
 439     int             d;
 440     pme_atomcomm_t *atc;
 441     atc = &pme->atc[0];
 442
 443     for (d = pme->ndecompdim - 1; d >= 0; d--)
 444     {
 445         int             n_d;
 446         rvec           *x_d;
 447         real           *param_d;
 448
 449         if (d == pme->ndecompdim - 1)
 450         {
 451             n_d     = homenr;
 452             x_d     = x + start;
 453             param_d = data;
 454         }
 455         else
 456         {
 457             n_d     = pme->atc[d + 1].n;
 458             x_d     = atc->x;
 459             param_d = atc->coefficient;
 460         }
 461         atc      = &pme->atc[d];
 462         atc->npd = n_d;
 463         if (atc->npd > atc->pd_nalloc)
 464         {
 465             atc->pd_nalloc = over_alloc_dd(atc->npd);
 466             srenew(atc->pd, atc->pd_nalloc);
 467         }
 468         pme_calc_pidx_wrapper(n_d, pme->recipbox, x_d, atc);
 469         /* Redistribute x (only once) and qA/c6A or qB/c6B */
 470         if (DOMAINDECOMP(cr))
 471         {
 472             dd_pmeredist_pos_coeffs(pme, n_d, bFirst, x_d, param_d, atc);
 473         }
 474     }
 475 }