src/gromacs/domdec/redistribute.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /* \internal \file
  36  *
  37  * \brief Implements atom redistribution functions.
  38  *
  39  * \author Berk Hess <hess@kth.se>
  40  * \ingroup module_domdec
  41  */
  42
  43 #include "gmxpre.h"
  44
  45 #include "redistribute.h"
  46
  47 #include <cstring>
  48
  49 #include "gromacs/domdec/domdec_network.h"
  50 #include "gromacs/domdec/ga2la.h"
  51 #include "gromacs/gmxlib/nrnb.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  54 #include "gromacs/mdlib/nsgrid.h"
  55 #include "gromacs/mdtypes/forcerec.h"
  56 #include "gromacs/mdtypes/nblist.h"
  57 #include "gromacs/mdtypes/state.h"
  58 #include "gromacs/utility/cstringutil.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61 #include "domdec_internal.h"
  62 #include "utility.h"
  63
  64 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
  65 #define DD_CGIBS 2
  66
  67 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
  68 #define DD_FLAG_NRCG  65535
  69 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
  70 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
  71
  72 static int compact_and_copy_vec_at(int ncg, int *move,
  73                                    const gmx::RangePartitioning &atomGroups,
  74                                    int nvec, int vec,
  75                                    rvec *src, gmx_domdec_comm_t *comm,
  76                                    gmx_bool bCompact)
  77 {
  78     int home_pos       = 0;
  79     int pos_vec[DIM*2] = { 0 };
  80
  81     for (int g = 0; g < ncg; g++)
  82     {
  83         const auto atomGroup = atomGroups.block(g);
  84         int        m         = move[g];
  85         if (m == -1)
  86         {
  87             if (bCompact)
  88             {
  89                 /* Compact the home array in place */
  90                 for (int i : atomGroup)
  91                 {
  92                     copy_rvec(src[i], src[home_pos++]);
  93                 }
  94             }
  95         }
  96         else
  97         {
  98             /* Copy to the communication buffer */
  99             int numAtoms  = atomGroup.size();
 100             pos_vec[m]   += 1 + vec*numAtoms;
 101             for (int i : atomGroup)
 102             {
 103                 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
 104             }
 105             pos_vec[m] += (nvec - vec - 1)*numAtoms;
 106         }
 107         if (!bCompact)
 108         {
 109             home_pos += atomGroup.size();
 110         }
 111     }
 112
 113     return home_pos;
 114 }
 115
 116 static int compact_and_copy_vec_cg(int ncg, int *move,
 117                                    const gmx::RangePartitioning &atomGroups,
 118                                    int nvec, rvec *src, gmx_domdec_comm_t *comm,
 119                                    gmx_bool bCompact)
 120 {
 121     int home_pos       = 0;
 122     int pos_vec[DIM*2] = { 0 };
 123
 124     for (int g = 0; g < ncg; g++)
 125     {
 126         const auto atomGroup = atomGroups.block(g);
 127         int        m         = move[g];
 128         if (m == -1)
 129         {
 130             if (bCompact)
 131             {
 132                 /* Compact the home array in place */
 133                 copy_rvec(src[g], src[home_pos++]);
 134             }
 135         }
 136         else
 137         {
 138             /* Copy to the communication buffer */
 139             copy_rvec(src[g], comm->cgcm_state[m][pos_vec[m]]);
 140             pos_vec[m] += 1 + atomGroup.size()*nvec;
 141         }
 142     }
 143     if (!bCompact)
 144     {
 145         home_pos = ncg;
 146     }
 147
 148     return home_pos;
 149 }
 150
 151 static int compact_ind(int                     numAtomGroups,
 152                        const int              *move,
 153                        gmx::ArrayRef<int>      globalAtomGroupIndices,
 154                        gmx::RangePartitioning *atomGroups,
 155                        gmx::ArrayRef<int>      globalAtomIndices,
 156                        gmx_ga2la_t            *ga2la,
 157                        char                   *bLocalCG,
 158                        int                    *cginfo)
 159 {
 160     atomGroups->clear();
 161     int home_pos = 0;
 162     int nat      = 0;
 163     for (int g = 0; g < numAtomGroups; g++)
 164     {
 165         if (move[g] == -1)
 166         {
 167             /* Compact the home arrays in place.
 168              * Anything that can be done here avoids access to global arrays.
 169              */
 170             atomGroups->appendBlock(nat);
 171             for (int a : atomGroups->block(g))
 172             {
 173                 const int a_gl         = globalAtomIndices[a];
 174                 globalAtomIndices[nat] = a_gl;
 175                 /* The cell number stays 0, so we don't need to set it */
 176                 ga2la_change_la(ga2la, a_gl, nat);
 177                 nat++;
 178             }
 179             globalAtomGroupIndices[home_pos] = globalAtomGroupIndices[g];
 180             cginfo[home_pos]                 = cginfo[g];
 181             /* The charge group remains local, so bLocalCG does not change */
 182             home_pos++;
 183         }
 184         else
 185         {
 186             /* Clear the global indices */
 187             for (int a : atomGroups->block(g))
 188             {
 189                 ga2la_del(ga2la, globalAtomIndices[a]);
 190             }
 191             if (bLocalCG)
 192             {
 193                 bLocalCG[globalAtomGroupIndices[g]] = FALSE;
 194             }
 195         }
 196     }
 197
 198     GMX_ASSERT(atomGroups->numBlocks() == home_pos, "The atom group data and atomGroups should be consistent");
 199
 200     return home_pos;
 201 }
 202
 203 static void clear_and_mark_ind(int                           numAtomGroups,
 204                                const int                    *move,
 205                                gmx::ArrayRef<const int>      globalAtomGroupIndices,
 206                                const gmx::RangePartitioning &atomGroups,
 207                                gmx::ArrayRef<const int>      globalAtomIndices,
 208                                gmx_ga2la_t                  *ga2la,
 209                                char                         *bLocalCG,
 210                                int                          *cell_index)
 211 {
 212     for (int g = 0; g < numAtomGroups; g++)
 213     {
 214         if (move[g] >= 0)
 215         {
 216             /* Clear the global indices */
 217             for (int a : atomGroups.block(g))
 218             {
 219                 ga2la_del(ga2la, globalAtomIndices[a]);
 220             }
 221             if (bLocalCG)
 222             {
 223                 bLocalCG[globalAtomGroupIndices[g]] = FALSE;
 224             }
 225             /* Signal that this group has moved using the ns cell index.
 226              * Here we set it to -1. fill_grid will change it
 227              * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
 228              */
 229             cell_index[g] = -1;
 230         }
 231     }
 232 }
 233
 234 static void print_cg_move(FILE *fplog,
 235                           gmx_domdec_t *dd,
 236                           gmx_int64_t step, int cg, int dim, int dir,
 237                           gmx_bool bHaveCgcmOld, real limitd,
 238                           rvec cm_old, rvec cm_new, real pos_d)
 239 {
 240     gmx_domdec_comm_t *comm;
 241     char               buf[22];
 242
 243     comm = dd->comm;
 244
 245     fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
 246     if (limitd > 0)
 247     {
 248         fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
 249                 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
 250                 ddglatnr(dd, dd->atomGrouping().block(cg).begin()), limitd, dim2char(dim));
 251     }
 252     else
 253     {
 254         /* We don't have a limiting distance available: don't print it */
 255         fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
 256                 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
 257                 ddglatnr(dd, dd->atomGrouping().block(cg).begin()), dim2char(dim));
 258     }
 259     fprintf(fplog, "distance out of cell %f\n",
 260             dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
 261     if (bHaveCgcmOld)
 262     {
 263         fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
 264                 cm_old[XX], cm_old[YY], cm_old[ZZ]);
 265     }
 266     fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
 267             cm_new[XX], cm_new[YY], cm_new[ZZ]);
 268     fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
 269             dim2char(dim),
 270             comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
 271     fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
 272             dim2char(dim),
 273             comm->cell_x0[dim], comm->cell_x1[dim]);
 274 }
 275
 276 static void cg_move_error(FILE *fplog,
 277                           gmx_domdec_t *dd,
 278                           gmx_int64_t step, int cg, int dim, int dir,
 279                           gmx_bool bHaveCgcmOld, real limitd,
 280                           rvec cm_old, rvec cm_new, real pos_d)
 281 {
 282     if (fplog)
 283     {
 284         print_cg_move(fplog, dd, step, cg, dim, dir,
 285                       bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
 286     }
 287     print_cg_move(stderr, dd, step, cg, dim, dir,
 288                   bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
 289     gmx_fatal(FARGS,
 290               "%s moved too far between two domain decomposition steps\n"
 291               "This usually means that your system is not well equilibrated",
 292               dd->comm->bCGs ? "A charge group" : "An atom");
 293 }
 294
 295 static void rotate_state_atom(t_state *state, int a)
 296 {
 297     if (state->flags & (1 << estX))
 298     {
 299         /* Rotate the complete state; for a rectangular box only */
 300         state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
 301         state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
 302     }
 303     if (state->flags & (1 << estV))
 304     {
 305         state->v[a][YY] = -state->v[a][YY];
 306         state->v[a][ZZ] = -state->v[a][ZZ];
 307     }
 308     if (state->flags & (1 << estCGP))
 309     {
 310         state->cg_p[a][YY] = -state->cg_p[a][YY];
 311         state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
 312     }
 313 }
 314
 315 /* Returns a pointer to a buffer of size \p numAtomsNew with contents 0 from numAtomsOld upwards
 316  *
 317  * Note: numAtomsOld should either be 0 or match the current buffer size.
 318  */
 319 static int *getMovedBuffer(gmx_domdec_comm_t *comm,
 320                            size_t             numAtomsOld,
 321                            size_t             numAtomsNew)
 322 {
 323     std::vector<int> &movedBuffer = comm->movedBuffer;
 324
 325     GMX_RELEASE_ASSERT(numAtomsOld == 0 || movedBuffer.size() == numAtomsOld, "numAtomsOld should either be 0 or match the current size");
 326
 327     /* Contents up to numAtomsOld should be preserved, clear from numAtomsOld */
 328     if (numAtomsOld == 0)
 329     {
 330         movedBuffer.clear();
 331     }
 332     movedBuffer.resize(numAtomsNew);
 333
 334     return movedBuffer.data();
 335 }
 336
 337 /* Determine to which domains atomGroups in the range \p cg_start, \p cg_end should go.
 338  *
 339  * Returns in the move array where the groups should go.
 340  * Also updates \p cg_cm for jumps over periodic boundaries.
 341  *
 342  * \TODO Rename cg to atomGroup.
 343  */
 344 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
 345                          gmx_domdec_t *dd,
 346                          t_state *state,
 347                          ivec tric_dir, matrix tcm,
 348                          rvec cell_x0, rvec cell_x1,
 349                          rvec limitd, rvec limit0, rvec limit1,
 350                          const gmx::RangePartitioning &atomGroups,
 351                          int cg_start, int cg_end,
 352                          rvec *cg_cm,
 353                          int *move)
 354 {
 355     const int npbcdim = dd->npbcdim;
 356
 357     for (int g = cg_start; g < cg_end; g++)
 358     {
 359         const auto atomGroup = atomGroups.block(g);
 360         const int  numAtoms  = atomGroup.size();
 361         // TODO: Rename this center of geometry variable to cogNew
 362         rvec       cm_new;
 363         if (numAtoms == 1)
 364         {
 365             copy_rvec(state->x[atomGroup.begin()], cm_new);
 366         }
 367         else
 368         {
 369             real invNumAtoms = 1/static_cast<real>(numAtoms);
 370             clear_rvec(cm_new);
 371             for (int k : atomGroup)
 372             {
 373                 rvec_inc(cm_new, state->x[k]);
 374             }
 375             for (int d = 0; d < DIM; d++)
 376             {
 377                 cm_new[d] = invNumAtoms*cm_new[d];
 378             }
 379         }
 380
 381         ivec dev = { 0 };
 382         /* Do pbc and check DD cell boundary crossings */
 383         for (int d = DIM - 1; d >= 0; d--)
 384         {
 385             if (dd->nc[d] > 1)
 386             {
 387                 bool bScrew = (dd->bScrewPBC && d == XX);
 388                 /* Determine the location of this cg in lattice coordinates */
 389                 real pos_d = cm_new[d];
 390                 if (tric_dir[d])
 391                 {
 392                     for (int d2 = d + 1; d2 < DIM; d2++)
 393                     {
 394                         pos_d += cm_new[d2]*tcm[d2][d];
 395                     }
 396                 }
 397                 /* Put the charge group in the triclinic unit-cell */
 398                 if (pos_d >= cell_x1[d])
 399                 {
 400                     if (pos_d >= limit1[d])
 401                     {
 402                         cg_move_error(fplog, dd, step, g, d, 1,
 403                                       cg_cm != as_rvec_array(state->x.data()), limitd[d],
 404                                       cg_cm[g], cm_new, pos_d);
 405                     }
 406                     dev[d] = 1;
 407                     if (dd->ci[d] == dd->nc[d] - 1)
 408                     {
 409                         rvec_dec(cm_new, state->box[d]);
 410                         if (bScrew)
 411                         {
 412                             cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 413                             cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 414                         }
 415                         for (int k : atomGroup)
 416                         {
 417                             rvec_dec(state->x[k], state->box[d]);
 418                             if (bScrew)
 419                             {
 420                                 rotate_state_atom(state, k);
 421                             }
 422                         }
 423                     }
 424                 }
 425                 else if (pos_d < cell_x0[d])
 426                 {
 427                     if (pos_d < limit0[d])
 428                     {
 429                         cg_move_error(fplog, dd, step, g, d, -1,
 430                                       cg_cm != as_rvec_array(state->x.data()), limitd[d],
 431                                       cg_cm[g], cm_new, pos_d);
 432                     }
 433                     dev[d] = -1;
 434                     if (dd->ci[d] == 0)
 435                     {
 436                         rvec_inc(cm_new, state->box[d]);
 437                         if (bScrew)
 438                         {
 439                             cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 440                             cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 441                         }
 442                         for (int k : atomGroup)
 443                         {
 444                             rvec_inc(state->x[k], state->box[d]);
 445                             if (bScrew)
 446                             {
 447                                 rotate_state_atom(state, k);
 448                             }
 449                         }
 450                     }
 451                 }
 452             }
 453             else if (d < npbcdim)
 454             {
 455                 /* Put the charge group in the rectangular unit-cell */
 456                 while (cm_new[d] >= state->box[d][d])
 457                 {
 458                     rvec_dec(cm_new, state->box[d]);
 459                     for (int k : atomGroup)
 460                     {
 461                         rvec_dec(state->x[k], state->box[d]);
 462                     }
 463                 }
 464                 while (cm_new[d] < 0)
 465                 {
 466                     rvec_inc(cm_new, state->box[d]);
 467                     for (int k : atomGroup)
 468                     {
 469                         rvec_inc(state->x[k], state->box[d]);
 470                     }
 471                 }
 472             }
 473         }
 474
 475         copy_rvec(cm_new, cg_cm[g]);
 476
 477         /* Determine where this cg should go */
 478         int flag = 0;
 479         int mc   = -1;
 480         for (int d = 0; d < dd->ndim; d++)
 481         {
 482             int dim = dd->dim[d];
 483             if (dev[dim] == 1)
 484             {
 485                 flag |= DD_FLAG_FW(d);
 486                 if (mc == -1)
 487                 {
 488                     mc = d*2;
 489                 }
 490             }
 491             else if (dev[dim] == -1)
 492             {
 493                 flag |= DD_FLAG_BW(d);
 494                 if (mc == -1)
 495                 {
 496                     if (dd->nc[dim] > 2)
 497                     {
 498                         mc = d*2 + 1;
 499                     }
 500                     else
 501                     {
 502                         mc = d*2;
 503                     }
 504                 }
 505             }
 506         }
 507         /* Temporarily store the flag in move */
 508         move[g] = mc + flag;
 509     }
 510 }
 511
 512 void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
 513                         gmx_domdec_t *dd, ivec tric_dir,
 514                         t_state *state, PaddedRVecVector *f,
 515                         t_forcerec *fr,
 516                         gmx_bool bCompact,
 517                         t_nrnb *nrnb,
 518                         int *ncg_stay_home,
 519                         int *ncg_moved)
 520 {
 521     int               *move;
 522     int                npbcdim;
 523     int                ncg[DIM*2] = { 0 }, nat[DIM*2] = { 0 };
 524     int                i, cg, d, dim, dim2, dir, d2, d3;
 525     int                mc, cdd, nrcg, ncg_recv, nvs, nvec, vec;
 526     int                sbuf[2], rbuf[2];
 527     int                home_pos_cg, home_pos_at, buf_pos;
 528     real               pos_d;
 529     matrix             tcm;
 530     rvec              *cg_cm = nullptr, cell_x0, cell_x1, limitd, limit0, limit1;
 531     cginfo_mb_t       *cginfo_mb;
 532     gmx_domdec_comm_t *comm;
 533     int               *moved;
 534     int                nthread, thread;
 535
 536     if (dd->bScrewPBC)
 537     {
 538         check_screw_box(state->box);
 539     }
 540
 541     comm  = dd->comm;
 542     if (fr->cutoff_scheme == ecutsGROUP)
 543     {
 544         cg_cm = fr->cg_cm;
 545     }
 546
 547     // Positions are always present, so there's nothing to flag
 548     bool                bV   = state->flags & (1<<estV);
 549     bool                bCGP = state->flags & (1<<estCGP);
 550
 551     DDBufferAccess<int> moveBuffer(comm->intBuffer, dd->globalAtomGroupIndices.size());
 552     move = moveBuffer.buffer.data();
 553
 554     npbcdim = dd->npbcdim;
 555
 556     for (d = 0; (d < DIM); d++)
 557     {
 558         limitd[d] = dd->comm->cellsize_min[d];
 559         if (d >= npbcdim && dd->ci[d] == 0)
 560         {
 561             cell_x0[d] = -GMX_FLOAT_MAX;
 562         }
 563         else
 564         {
 565             cell_x0[d] = comm->cell_x0[d];
 566         }
 567         if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
 568         {
 569             cell_x1[d] = GMX_FLOAT_MAX;
 570         }
 571         else
 572         {
 573             cell_x1[d] = comm->cell_x1[d];
 574         }
 575         if (d < npbcdim)
 576         {
 577             limit0[d] = comm->old_cell_x0[d] - limitd[d];
 578             limit1[d] = comm->old_cell_x1[d] + limitd[d];
 579         }
 580         else
 581         {
 582             /* We check after communication if a charge group moved
 583              * more than one cell. Set the pre-comm check limit to float_max.
 584              */
 585             limit0[d] = -GMX_FLOAT_MAX;
 586             limit1[d] =  GMX_FLOAT_MAX;
 587         }
 588     }
 589
 590     make_tric_corr_matrix(npbcdim, state->box, tcm);
 591
 592     const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
 593
 594     nthread = gmx_omp_nthreads_get(emntDomdec);
 595
 596     /* Compute the center of geometry for all home charge groups
 597      * and put them in the box and determine where they should go.
 598      */
 599 #pragma omp parallel for num_threads(nthread) schedule(static)
 600     for (thread = 0; thread < nthread; thread++)
 601     {
 602         try
 603         {
 604             calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
 605                          cell_x0, cell_x1, limitd, limit0, limit1,
 606                          atomGrouping,
 607                          ( thread   *dd->ncg_home)/nthread,
 608                          ((thread+1)*dd->ncg_home)/nthread,
 609                          fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
 610                          move);
 611         }
 612         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 613     }
 614
 615     for (cg = 0; cg < dd->ncg_home; cg++)
 616     {
 617         if (move[cg] >= 0)
 618         {
 619             mc       = move[cg];
 620             int flag = mc & ~DD_FLAG_NRCG;
 621             mc       = mc & DD_FLAG_NRCG;
 622             move[cg] = mc;
 623
 624             std::vector<int> &cggl_flag = comm->cggl_flag[mc];
 625
 626             /* TODO: See if we can use push_back instead */
 627             if (static_cast<size_t>((ncg[mc] + 1)*DD_CGIBS) > cggl_flag.size())
 628             {
 629                 cggl_flag.resize((ncg[mc] + 1)*DD_CGIBS);
 630             }
 631             cggl_flag[ncg[mc]*DD_CGIBS  ] = dd->globalAtomGroupIndices[cg];
 632             /* We store the cg size in the lower 16 bits
 633              * and the place where the charge group should go
 634              * in the next 6 bits. This saves some communication volume.
 635              */
 636             nrcg = atomGrouping.block(cg).size();
 637             cggl_flag[ncg[mc]*DD_CGIBS+1] = nrcg | flag;
 638             ncg[mc] += 1;
 639             nat[mc] += nrcg;
 640         }
 641     }
 642
 643     inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
 644     inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
 645
 646     *ncg_moved = 0;
 647     for (i = 0; i < dd->ndim*2; i++)
 648     {
 649         *ncg_moved += ncg[i];
 650     }
 651
 652     nvec = 1;
 653     if (bV)
 654     {
 655         nvec++;
 656     }
 657     if (bCGP)
 658     {
 659         nvec++;
 660     }
 661
 662     /* Make sure the communication buffers are large enough */
 663     for (mc = 0; mc < dd->ndim*2; mc++)
 664     {
 665         size_t nvr = ncg[mc] + nat[mc]*nvec;
 666         if (nvr > comm->cgcm_state[mc].size())
 667         {
 668             comm->cgcm_state[mc].resize(nvr);
 669         }
 670     }
 671
 672     switch (fr->cutoff_scheme)
 673     {
 674         case ecutsGROUP:
 675             /* Recalculating cg_cm might be cheaper than communicating,
 676              * but that could give rise to rounding issues.
 677              */
 678             home_pos_cg =
 679                 compact_and_copy_vec_cg(dd->ncg_home, move, dd->atomGrouping(),
 680                                         nvec, cg_cm, comm, bCompact);
 681             break;
 682         case ecutsVERLET:
 683             /* Without charge groups we send the moved atom coordinates
 684              * over twice. This is so the code below can be used without
 685              * many conditionals for both for with and without charge groups.
 686              */
 687             home_pos_cg =
 688                 compact_and_copy_vec_cg(dd->ncg_home, move, dd->atomGrouping(),
 689                                         nvec, as_rvec_array(state->x.data()), comm, FALSE);
 690             if (bCompact)
 691             {
 692                 home_pos_cg -= *ncg_moved;
 693             }
 694             break;
 695         default:
 696             gmx_incons("unimplemented");
 697             home_pos_cg = 0;
 698     }
 699
 700     vec         = 0;
 701     home_pos_at =
 702         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
 703                                 nvec, vec++, as_rvec_array(state->x.data()),
 704                                 comm, bCompact);
 705     if (bV)
 706     {
 707         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
 708                                 nvec, vec++, as_rvec_array(state->v.data()),
 709                                 comm, bCompact);
 710     }
 711     if (bCGP)
 712     {
 713         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
 714                                 nvec, vec++, as_rvec_array(state->cg_p.data()),
 715                                 comm, bCompact);
 716     }
 717
 718     if (bCompact)
 719     {
 720         compact_ind(dd->ncg_home, move,
 721                     dd->globalAtomGroupIndices, &dd->atomGrouping_, dd->globalAtomIndices,
 722                     dd->ga2la, comm->bLocalCG,
 723                     fr->cginfo);
 724     }
 725     else
 726     {
 727         if (fr->cutoff_scheme == ecutsVERLET)
 728         {
 729             moved = getMovedBuffer(comm, 0, dd->ncg_home);
 730         }
 731         else
 732         {
 733             moved = fr->ns->grid->cell_index;
 734         }
 735
 736         clear_and_mark_ind(dd->ncg_home, move,
 737                            dd->globalAtomGroupIndices, dd->atomGrouping(), dd->globalAtomIndices,
 738                            dd->ga2la, comm->bLocalCG,
 739                            moved);
 740     }
 741
 742     /* Now we can remove the excess global atom-group indices from the list */
 743     dd->globalAtomGroupIndices.resize(home_pos_cg);
 744     dd->atomGrouping_.reduceNumBlocks(home_pos_cg);
 745
 746     /* We reuse the intBuffer without reacquiring since we are in the same scope */
 747     DDBufferAccess<int> &flagBuffer = moveBuffer;
 748
 749     cginfo_mb = fr->cginfo_mb;
 750
 751     *ncg_stay_home = home_pos_cg;
 752     for (d = 0; d < dd->ndim; d++)
 753     {
 754         DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, 0);
 755
 756         dim      = dd->dim[d];
 757         ncg_recv = 0;
 758         int nvr  = 0;
 759         for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
 760         {
 761             cdd = d*2 + dir;
 762             /* Communicate the cg and atom counts */
 763             sbuf[0] = ncg[cdd];
 764             sbuf[1] = nat[cdd];
 765             if (debug)
 766             {
 767                 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
 768                         d, dir, sbuf[0], sbuf[1]);
 769             }
 770             ddSendrecv(dd, d, dir, sbuf, 2, rbuf, 2);
 771
 772             flagBuffer.resize((ncg_recv + rbuf[0])*DD_CGIBS);
 773
 774             /* Communicate the charge group indices, sizes and flags */
 775             ddSendrecv(dd, d, dir,
 776                        comm->cggl_flag[cdd].data(), sbuf[0]*DD_CGIBS,
 777                        flagBuffer.buffer.data() + ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
 778
 779             nvs = ncg[cdd] + nat[cdd]*nvec;
 780             i   = rbuf[0]  + rbuf[1] *nvec;
 781             rvecBuffer.resize(nvr + i);
 782
 783             /* Communicate cgcm and state */
 784             ddSendrecv(dd, d, dir,
 785                        as_rvec_array(comm->cgcm_state[cdd].data()), nvs,
 786                        as_rvec_array(rvecBuffer.buffer.data()) + nvr, i);
 787             ncg_recv += rbuf[0];
 788             nvr      += i;
 789         }
 790
 791         dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
 792         if (fr->cutoff_scheme == ecutsGROUP)
 793         {
 794             /* Here we resize to more than necessary and shrink later */
 795             dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
 796         }
 797
 798         /* Process the received charge groups */
 799         buf_pos = 0;
 800         for (cg = 0; cg < ncg_recv; cg++)
 801         {
 802             int   flag = flagBuffer.buffer[cg*DD_CGIBS + 1];
 803             rvec &pos  = as_rvec_array(rvecBuffer.buffer.data())[buf_pos];
 804
 805             if (dim >= npbcdim && dd->nc[dim] > 2)
 806             {
 807                 rvec &pos = as_rvec_array(rvecBuffer.buffer.data())[buf_pos];
 808
 809                 /* No pbc in this dim and more than one domain boundary.
 810                  * We do a separate check if a charge group didn't move too far.
 811                  */
 812                 if (((flag & DD_FLAG_FW(d)) &&
 813                      pos[dim] > cell_x1[dim]) ||
 814                     ((flag & DD_FLAG_BW(d)) &&
 815                      pos[dim] < cell_x0[dim]))
 816                 {
 817                     cg_move_error(fplog, dd, step, cg, dim,
 818                                   (flag & DD_FLAG_FW(d)) ? 1 : 0,
 819                                   fr->cutoff_scheme == ecutsGROUP, 0,
 820                                   pos, pos, pos[dim]);
 821                 }
 822             }
 823
 824             mc = -1;
 825             if (d < dd->ndim-1)
 826             {
 827                 /* Check which direction this cg should go */
 828                 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
 829                 {
 830                     if (isDlbOn(dd->comm))
 831                     {
 832                         /* The cell boundaries for dimension d2 are not equal
 833                          * for each cell row of the lower dimension(s),
 834                          * therefore we might need to redetermine where
 835                          * this cg should go.
 836                          */
 837                         dim2 = dd->dim[d2];
 838                         /* If this cg crosses the box boundary in dimension d2
 839                          * we can use the communicated flag, so we do not
 840                          * have to worry about pbc.
 841                          */
 842                         if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
 843                                (flag & DD_FLAG_FW(d2))) ||
 844                               (dd->ci[dim2] == 0 &&
 845                                (flag & DD_FLAG_BW(d2)))))
 846                         {
 847                             /* Clear the two flags for this dimension */
 848                             flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
 849                             /* Determine the location of this cg
 850                              * in lattice coordinates
 851                              */
 852                             pos_d = rvecBuffer.buffer[buf_pos][dim2];
 853                             if (tric_dir[dim2])
 854                             {
 855                                 for (d3 = dim2+1; d3 < DIM; d3++)
 856                                 {
 857                                     pos_d += pos[d3]*tcm[d3][dim2];
 858                                 }
 859                             }
 860
 861                             GMX_ASSERT(dim2 >= 0 && dim2 < DIM, "Keep the static analyzer happy");
 862
 863                             /* Check of we are not at the box edge.
 864                              * pbc is only handled in the first step above,
 865                              * but this check could move over pbc while
 866                              * the first step did not due to different rounding.
 867                              */
 868                             if (pos_d >= cell_x1[dim2] &&
 869                                 dd->ci[dim2] != dd->nc[dim2]-1)
 870                             {
 871                                 flag |= DD_FLAG_FW(d2);
 872                             }
 873                             else if (pos_d < cell_x0[dim2] &&
 874                                      dd->ci[dim2] != 0)
 875                             {
 876                                 flag |= DD_FLAG_BW(d2);
 877                             }
 878                             flagBuffer.buffer[cg*DD_CGIBS + 1] = flag;
 879                         }
 880                     }
 881                     /* Set to which neighboring cell this cg should go */
 882                     if (flag & DD_FLAG_FW(d2))
 883                     {
 884                         mc = d2*2;
 885                     }
 886                     else if (flag & DD_FLAG_BW(d2))
 887                     {
 888                         if (dd->nc[dd->dim[d2]] > 2)
 889                         {
 890                             mc = d2*2+1;
 891                         }
 892                         else
 893                         {
 894                             mc = d2*2;
 895                         }
 896                     }
 897                 }
 898             }
 899
 900             nrcg = flag & DD_FLAG_NRCG;
 901             if (mc == -1)
 902             {
 903                 /* Set the global charge group index and size */
 904                 const int globalAtomGroupIndex = flagBuffer.buffer[cg*DD_CGIBS];
 905                 dd->globalAtomGroupIndices.push_back(globalAtomGroupIndex);
 906                 dd->atomGrouping_.appendBlock(nrcg);
 907                 /* Copy the state from the buffer */
 908                 if (fr->cutoff_scheme == ecutsGROUP)
 909                 {
 910                     cg_cm = fr->cg_cm;
 911                     copy_rvec(pos, cg_cm[home_pos_cg]);
 912                 }
 913                 buf_pos++;
 914
 915                 /* Set the cginfo */
 916                 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
 917                                                    globalAtomGroupIndex);
 918                 if (comm->bLocalCG)
 919                 {
 920                     comm->bLocalCG[globalAtomGroupIndex] = TRUE;
 921                 }
 922
 923                 rvec *rvecPtr = as_rvec_array(rvecBuffer.buffer.data());
 924                 for (i = 0; i < nrcg; i++)
 925                 {
 926                     copy_rvec(rvecPtr[buf_pos++],
 927                               state->x[home_pos_at+i]);
 928                 }
 929                 if (bV)
 930                 {
 931                     for (i = 0; i < nrcg; i++)
 932                     {
 933                         copy_rvec(rvecPtr[buf_pos++],
 934                                   state->v[home_pos_at+i]);
 935                     }
 936                 }
 937                 if (bCGP)
 938                 {
 939                     for (i = 0; i < nrcg; i++)
 940                     {
 941                         copy_rvec(rvecPtr[buf_pos++],
 942                                   state->cg_p[home_pos_at+i]);
 943                     }
 944                 }
 945                 home_pos_cg += 1;
 946                 home_pos_at += nrcg;
 947             }
 948             else
 949             {
 950                 /* Reallocate the buffers if necessary  */
 951                 if (static_cast<size_t>((ncg[mc] + 1)*DD_CGIBS) > comm->cggl_flag[mc].size())
 952                 {
 953                     comm->cggl_flag[mc].resize((ncg[mc] + 1)*DD_CGIBS);
 954                 }
 955                 size_t nvr = ncg[mc] + nat[mc]*nvec;
 956                 if (nvr + 1 + nrcg*nvec > comm->cgcm_state[mc].size())
 957                 {
 958                     comm->cgcm_state[mc].resize(nvr + 1 + nrcg*nvec);
 959                 }
 960                 /* Copy from the receive to the send buffers */
 961                 memcpy(comm->cggl_flag[mc].data() + ncg[mc]*DD_CGIBS,
 962                        flagBuffer.buffer.data() + cg*DD_CGIBS,
 963                        DD_CGIBS*sizeof(int));
 964                 memcpy(comm->cgcm_state[mc][nvr],
 965                        rvecBuffer.buffer.data() + buf_pos,
 966                        (1 + nrcg*nvec)*sizeof(rvec));
 967                 buf_pos += 1 + nrcg*nvec;
 968                 ncg[mc] += 1;
 969                 nat[mc] += nrcg;
 970             }
 971         }
 972     }
 973
 974     /* With sorting (!bCompact) the indices are now only partially up to date
 975      * and ncg_home and nat_home are not the real count, since there are
 976      * "holes" in the arrays for the charge groups that moved to neighbors.
 977      */
 978     if (fr->cutoff_scheme == ecutsVERLET)
 979     {
 980         /* We need to clear the moved flags for the received atoms,
 981          * because the moved buffer will be passed to the nbnxn gridding call.
 982          */
 983         moved = getMovedBuffer(comm, dd->ncg_home, home_pos_cg);
 984
 985         for (int i =  dd->ncg_home; i < home_pos_cg; i++)
 986         {
 987             moved[i] = 0;
 988         }
 989     }
 990
 991     dd->ncg_home = home_pos_cg;
 992     comm->atomRanges.setEnd(DDAtomRanges::Type::Home, home_pos_at);
 993
 994     if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
 995     {
 996         /* We overallocated before, we need to set the right size here */
 997         dd_resize_state(state, f, comm->atomRanges.numHomeAtoms());
 998     }
 999
1000     if (debug)
1001     {
1002         fprintf(debug,
1003                 "Finished repartitioning: cgs moved out %d, new home %d\n",
1004                 *ncg_moved, dd->ncg_home-*ncg_moved);
1005
1006     }
1007 }