src/gromacs/domdec/redistribute.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /* \internal \file
  36  *
  37  * \brief Implements atom redistribution functions.
  38  *
  39  * \author Berk Hess <hess@kth.se>
  40  * \ingroup module_domdec
  41  */
  42
  43 #include "gmxpre.h"
  44
  45 #include "redistribute.h"
  46
  47 #include <cstring>
  48
  49 #include "gromacs/domdec/domdec_network.h"
  50 #include "gromacs/domdec/ga2la.h"
  51 #include "gromacs/gmxlib/nrnb.h"
  52 #include "gromacs/math/vec.h"
  53 #include "gromacs/mdlib/gmx_omp_nthreads.h"
  54 #include "gromacs/mdlib/nsgrid.h"
  55 #include "gromacs/mdtypes/forcerec.h"
  56 #include "gromacs/mdtypes/nblist.h"
  57 #include "gromacs/mdtypes/state.h"
  58 #include "gromacs/utility/cstringutil.h"
  59 #include "gromacs/utility/fatalerror.h"
  60
  61 #include "domdec_internal.h"
  62 #include "utility.h"
  63
  64 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
  65 #define DD_CGIBS 2
  66
  67 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
  68 #define DD_FLAG_NRCG  65535
  69 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
  70 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
  71
  72 static int compact_and_copy_vec_at(int ncg, const int *move,
  73                                    const gmx::RangePartitioning &atomGroups,
  74                                    int nvec, int vec,
  75                                    rvec *src, gmx_domdec_comm_t *comm,
  76                                    gmx_bool bCompact)
  77 {
  78     int home_pos       = 0;
  79     int pos_vec[DIM*2] = { 0 };
  80
  81     for (int g = 0; g < ncg; g++)
  82     {
  83         const auto atomGroup = atomGroups.block(g);
  84         int        m         = move[g];
  85         if (m == -1)
  86         {
  87             if (bCompact)
  88             {
  89                 /* Compact the home array in place */
  90                 for (int i : atomGroup)
  91                 {
  92                     copy_rvec(src[i], src[home_pos++]);
  93                 }
  94             }
  95         }
  96         else
  97         {
  98             /* Copy to the communication buffer */
  99             int numAtoms  = atomGroup.size();
 100             pos_vec[m]   += 1 + vec*numAtoms;
 101             for (int i : atomGroup)
 102             {
 103                 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
 104             }
 105             pos_vec[m] += (nvec - vec - 1)*numAtoms;
 106         }
 107         if (!bCompact)
 108         {
 109             home_pos += atomGroup.size();
 110         }
 111     }
 112
 113     return home_pos;
 114 }
 115
 116 static int compact_and_copy_vec_cg(int ncg, const int *move,
 117                                    const gmx::RangePartitioning &atomGroups,
 118                                    int nvec, rvec *src, gmx_domdec_comm_t *comm,
 119                                    gmx_bool bCompact)
 120 {
 121     int home_pos       = 0;
 122     int pos_vec[DIM*2] = { 0 };
 123
 124     for (int g = 0; g < ncg; g++)
 125     {
 126         const auto atomGroup = atomGroups.block(g);
 127         int        m         = move[g];
 128         if (m == -1)
 129         {
 130             if (bCompact)
 131             {
 132                 /* Compact the home array in place */
 133                 copy_rvec(src[g], src[home_pos++]);
 134             }
 135         }
 136         else
 137         {
 138             /* Copy to the communication buffer */
 139             copy_rvec(src[g], comm->cgcm_state[m][pos_vec[m]]);
 140             pos_vec[m] += 1 + atomGroup.size()*nvec;
 141         }
 142     }
 143     if (!bCompact)
 144     {
 145         home_pos = ncg;
 146     }
 147
 148     return home_pos;
 149 }
 150
 151 static int compact_ind(int                     numAtomGroups,
 152                        const int              *move,
 153                        gmx::ArrayRef<int>      globalAtomGroupIndices,
 154                        gmx::RangePartitioning *atomGroups,
 155                        gmx::ArrayRef<int>      globalAtomIndices,
 156                        gmx_ga2la_t            *ga2la,
 157                        char                   *bLocalCG,
 158                        int                    *cginfo)
 159 {
 160     atomGroups->clear();
 161     int home_pos = 0;
 162     int nat      = 0;
 163     for (int g = 0; g < numAtomGroups; g++)
 164     {
 165         if (move[g] == -1)
 166         {
 167             /* Compact the home arrays in place.
 168              * Anything that can be done here avoids access to global arrays.
 169              */
 170             atomGroups->appendBlock(nat);
 171             for (int a : atomGroups->block(g))
 172             {
 173                 const int a_gl         = globalAtomIndices[a];
 174                 globalAtomIndices[nat] = a_gl;
 175                 /* The cell number stays 0, so we don't need to set it */
 176                 ga2la_change_la(ga2la, a_gl, nat);
 177                 nat++;
 178             }
 179             globalAtomGroupIndices[home_pos] = globalAtomGroupIndices[g];
 180             cginfo[home_pos]                 = cginfo[g];
 181             /* The charge group remains local, so bLocalCG does not change */
 182             home_pos++;
 183         }
 184         else
 185         {
 186             /* Clear the global indices */
 187             for (int a : atomGroups->block(g))
 188             {
 189                 ga2la_del(ga2la, globalAtomIndices[a]);
 190             }
 191             if (bLocalCG)
 192             {
 193                 bLocalCG[globalAtomGroupIndices[g]] = FALSE;
 194             }
 195         }
 196     }
 197
 198     GMX_ASSERT(atomGroups->numBlocks() == home_pos, "The atom group data and atomGroups should be consistent");
 199
 200     return home_pos;
 201 }
 202
 203 static void clear_and_mark_ind(int                           numAtomGroups,
 204                                const int                    *move,
 205                                gmx::ArrayRef<const int>      globalAtomGroupIndices,
 206                                const gmx::RangePartitioning &atomGroups,
 207                                gmx::ArrayRef<const int>      globalAtomIndices,
 208                                gmx_ga2la_t                  *ga2la,
 209                                char                         *bLocalCG,
 210                                int                          *cell_index)
 211 {
 212     for (int g = 0; g < numAtomGroups; g++)
 213     {
 214         if (move[g] >= 0)
 215         {
 216             /* Clear the global indices */
 217             for (int a : atomGroups.block(g))
 218             {
 219                 ga2la_del(ga2la, globalAtomIndices[a]);
 220             }
 221             if (bLocalCG)
 222             {
 223                 bLocalCG[globalAtomGroupIndices[g]] = FALSE;
 224             }
 225             /* Signal that this group has moved using the ns cell index.
 226              * Here we set it to -1. fill_grid will change it
 227              * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
 228              */
 229             cell_index[g] = -1;
 230         }
 231     }
 232 }
 233
 234 static void print_cg_move(FILE *fplog,
 235                           gmx_domdec_t *dd,
 236                           gmx_int64_t step, int cg, int dim, int dir,
 237                           gmx_bool bHaveCgcmOld, real limitd,
 238                           rvec cm_old, rvec cm_new, real pos_d)
 239 {
 240     gmx_domdec_comm_t *comm;
 241     char               buf[22];
 242
 243     comm = dd->comm;
 244
 245     fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
 246     if (limitd > 0)
 247     {
 248         fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
 249                 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
 250                 ddglatnr(dd, dd->atomGrouping().block(cg).begin()), limitd, dim2char(dim));
 251     }
 252     else
 253     {
 254         /* We don't have a limiting distance available: don't print it */
 255         fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
 256                 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
 257                 ddglatnr(dd, dd->atomGrouping().block(cg).begin()), dim2char(dim));
 258     }
 259     fprintf(fplog, "distance out of cell %f\n",
 260             dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
 261     if (bHaveCgcmOld)
 262     {
 263         fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
 264                 cm_old[XX], cm_old[YY], cm_old[ZZ]);
 265     }
 266     fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
 267             cm_new[XX], cm_new[YY], cm_new[ZZ]);
 268     fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
 269             dim2char(dim),
 270             comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
 271     fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
 272             dim2char(dim),
 273             comm->cell_x0[dim], comm->cell_x1[dim]);
 274 }
 275
 276 static void cg_move_error(FILE *fplog,
 277                           gmx_domdec_t *dd,
 278                           gmx_int64_t step, int cg, int dim, int dir,
 279                           gmx_bool bHaveCgcmOld, real limitd,
 280                           rvec cm_old, rvec cm_new, real pos_d)
 281 {
 282     if (fplog)
 283     {
 284         print_cg_move(fplog, dd, step, cg, dim, dir,
 285                       bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
 286     }
 287     print_cg_move(stderr, dd, step, cg, dim, dir,
 288                   bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
 289     gmx_fatal(FARGS,
 290               "%s moved too far between two domain decomposition steps\n"
 291               "This usually means that your system is not well equilibrated",
 292               dd->comm->bCGs ? "A charge group" : "An atom");
 293 }
 294
 295 static void rotate_state_atom(t_state *state, int a)
 296 {
 297     if (state->flags & (1 << estX))
 298     {
 299         /* Rotate the complete state; for a rectangular box only */
 300         state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
 301         state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
 302     }
 303     if (state->flags & (1 << estV))
 304     {
 305         state->v[a][YY] = -state->v[a][YY];
 306         state->v[a][ZZ] = -state->v[a][ZZ];
 307     }
 308     if (state->flags & (1 << estCGP))
 309     {
 310         state->cg_p[a][YY] = -state->cg_p[a][YY];
 311         state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
 312     }
 313 }
 314
 315 /* Returns a pointer to a buffer of size \p numAtomsNew with contents 0 from numAtomsOld upwards
 316  *
 317  * Note: numAtomsOld should either be 0 or match the current buffer size.
 318  */
 319 static int *getMovedBuffer(gmx_domdec_comm_t *comm,
 320                            size_t             numAtomsOld,
 321                            size_t             numAtomsNew)
 322 {
 323     std::vector<int> &movedBuffer = comm->movedBuffer;
 324
 325     GMX_RELEASE_ASSERT(numAtomsOld == 0 || movedBuffer.size() == numAtomsOld, "numAtomsOld should either be 0 or match the current size");
 326
 327     /* Contents up to numAtomsOld should be preserved, clear from numAtomsOld */
 328     if (numAtomsOld == 0)
 329     {
 330         movedBuffer.clear();
 331     }
 332     movedBuffer.resize(numAtomsNew);
 333
 334     return movedBuffer.data();
 335 }
 336
 337 /* Determine to which domains atomGroups in the range \p cg_start, \p cg_end should go.
 338  *
 339  * Returns in the move array where the groups should go.
 340  * Also updates \p cg_cm for jumps over periodic boundaries.
 341  *
 342  * \TODO Rename cg to atomGroup.
 343  */
 344 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
 345                          gmx_domdec_t *dd,
 346                          t_state *state,
 347                          const ivec tric_dir, matrix tcm,
 348                          const rvec cell_x0, const rvec cell_x1,
 349                          rvec limitd, const rvec limit0, const rvec limit1,
 350                          const gmx::RangePartitioning &atomGroups,
 351                          int cg_start, int cg_end,
 352                          rvec *cg_cm,
 353                          int *move)
 354 {
 355     const int npbcdim = dd->npbcdim;
 356
 357     for (int g = cg_start; g < cg_end; g++)
 358     {
 359         const auto atomGroup = atomGroups.block(g);
 360         const int  numAtoms  = atomGroup.size();
 361         // TODO: Rename this center of geometry variable to cogNew
 362         rvec       cm_new;
 363         if (numAtoms == 1)
 364         {
 365             copy_rvec(state->x[atomGroup.begin()], cm_new);
 366         }
 367         else
 368         {
 369             real invNumAtoms = 1/static_cast<real>(numAtoms);
 370             clear_rvec(cm_new);
 371             for (int k : atomGroup)
 372             {
 373                 rvec_inc(cm_new, state->x[k]);
 374             }
 375             for (int d = 0; d < DIM; d++)
 376             {
 377                 cm_new[d] = invNumAtoms*cm_new[d];
 378             }
 379         }
 380
 381         ivec dev = { 0 };
 382         /* Do pbc and check DD cell boundary crossings */
 383         for (int d = DIM - 1; d >= 0; d--)
 384         {
 385             if (dd->nc[d] > 1)
 386             {
 387                 bool bScrew = (dd->bScrewPBC && d == XX);
 388                 /* Determine the location of this cg in lattice coordinates */
 389                 real pos_d = cm_new[d];
 390                 if (tric_dir[d])
 391                 {
 392                     for (int d2 = d + 1; d2 < DIM; d2++)
 393                     {
 394                         pos_d += cm_new[d2]*tcm[d2][d];
 395                     }
 396                 }
 397                 /* Put the charge group in the triclinic unit-cell */
 398                 if (pos_d >= cell_x1[d])
 399                 {
 400                     if (pos_d >= limit1[d])
 401                     {
 402                         cg_move_error(fplog, dd, step, g, d, 1,
 403                                       cg_cm != as_rvec_array(state->x.data()), limitd[d],
 404                                       cg_cm[g], cm_new, pos_d);
 405                     }
 406                     dev[d] = 1;
 407                     if (dd->ci[d] == dd->nc[d] - 1)
 408                     {
 409                         rvec_dec(cm_new, state->box[d]);
 410                         if (bScrew)
 411                         {
 412                             cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 413                             cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 414                         }
 415                         for (int k : atomGroup)
 416                         {
 417                             rvec_dec(state->x[k], state->box[d]);
 418                             if (bScrew)
 419                             {
 420                                 rotate_state_atom(state, k);
 421                             }
 422                         }
 423                     }
 424                 }
 425                 else if (pos_d < cell_x0[d])
 426                 {
 427                     if (pos_d < limit0[d])
 428                     {
 429                         cg_move_error(fplog, dd, step, g, d, -1,
 430                                       cg_cm != as_rvec_array(state->x.data()), limitd[d],
 431                                       cg_cm[g], cm_new, pos_d);
 432                     }
 433                     dev[d] = -1;
 434                     if (dd->ci[d] == 0)
 435                     {
 436                         rvec_inc(cm_new, state->box[d]);
 437                         if (bScrew)
 438                         {
 439                             cm_new[YY] = state->box[YY][YY] - cm_new[YY];
 440                             cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
 441                         }
 442                         for (int k : atomGroup)
 443                         {
 444                             rvec_inc(state->x[k], state->box[d]);
 445                             if (bScrew)
 446                             {
 447                                 rotate_state_atom(state, k);
 448                             }
 449                         }
 450                     }
 451                 }
 452             }
 453             else if (d < npbcdim)
 454             {
 455                 /* Put the charge group in the rectangular unit-cell */
 456                 while (cm_new[d] >= state->box[d][d])
 457                 {
 458                     rvec_dec(cm_new, state->box[d]);
 459                     for (int k : atomGroup)
 460                     {
 461                         rvec_dec(state->x[k], state->box[d]);
 462                     }
 463                 }
 464                 while (cm_new[d] < 0)
 465                 {
 466                     rvec_inc(cm_new, state->box[d]);
 467                     for (int k : atomGroup)
 468                     {
 469                         rvec_inc(state->x[k], state->box[d]);
 470                     }
 471                 }
 472             }
 473         }
 474
 475         copy_rvec(cm_new, cg_cm[g]);
 476
 477         /* Determine where this cg should go */
 478         int flag = 0;
 479         int mc   = -1;
 480         for (int d = 0; d < dd->ndim; d++)
 481         {
 482             int dim = dd->dim[d];
 483             if (dev[dim] == 1)
 484             {
 485                 flag |= DD_FLAG_FW(d);
 486                 if (mc == -1)
 487                 {
 488                     mc = d*2;
 489                 }
 490             }
 491             else if (dev[dim] == -1)
 492             {
 493                 flag |= DD_FLAG_BW(d);
 494                 if (mc == -1)
 495                 {
 496                     if (dd->nc[dim] > 2)
 497                     {
 498                         mc = d*2 + 1;
 499                     }
 500                     else
 501                     {
 502                         mc = d*2;
 503                     }
 504                 }
 505             }
 506         }
 507         /* Temporarily store the flag in move */
 508         move[g] = mc + flag;
 509     }
 510 }
 511
 512 void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
 513                         gmx_domdec_t *dd, ivec tric_dir,
 514                         t_state *state, PaddedRVecVector *f,
 515                         t_forcerec *fr,
 516                         gmx_bool bCompact,
 517                         t_nrnb *nrnb,
 518                         int *ncg_stay_home,
 519                         int *ncg_moved)
 520 {
 521     gmx_domdec_comm_t *comm = dd->comm;
 522
 523     if (dd->bScrewPBC)
 524     {
 525         check_screw_box(state->box);
 526     }
 527
 528     rvec *cg_cm = nullptr;
 529     if (fr->cutoff_scheme == ecutsGROUP)
 530     {
 531         cg_cm = fr->cg_cm;
 532     }
 533
 534     // Positions are always present, so there's nothing to flag
 535     bool                bV   = state->flags & (1<<estV);
 536     bool                bCGP = state->flags & (1<<estCGP);
 537
 538     DDBufferAccess<int> moveBuffer(comm->intBuffer, dd->globalAtomGroupIndices.size());
 539     int                *move = moveBuffer.buffer.data();
 540
 541     const int           npbcdim = dd->npbcdim;
 542
 543     rvec                cell_x0, cell_x1, limitd, limit0, limit1;
 544     for (int d = 0; (d < DIM); d++)
 545     {
 546         limitd[d] = dd->comm->cellsize_min[d];
 547         if (d >= npbcdim && dd->ci[d] == 0)
 548         {
 549             cell_x0[d] = -GMX_FLOAT_MAX;
 550         }
 551         else
 552         {
 553             cell_x0[d] = comm->cell_x0[d];
 554         }
 555         if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
 556         {
 557             cell_x1[d] = GMX_FLOAT_MAX;
 558         }
 559         else
 560         {
 561             cell_x1[d] = comm->cell_x1[d];
 562         }
 563         if (d < npbcdim)
 564         {
 565             limit0[d] = comm->old_cell_x0[d] - limitd[d];
 566             limit1[d] = comm->old_cell_x1[d] + limitd[d];
 567         }
 568         else
 569         {
 570             /* We check after communication if a charge group moved
 571              * more than one cell. Set the pre-comm check limit to float_max.
 572              */
 573             limit0[d] = -GMX_FLOAT_MAX;
 574             limit1[d] =  GMX_FLOAT_MAX;
 575         }
 576     }
 577
 578     matrix tcm;
 579     make_tric_corr_matrix(npbcdim, state->box, tcm);
 580
 581     const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
 582
 583     const int                     nthread = gmx_omp_nthreads_get(emntDomdec);
 584
 585     /* Compute the center of geometry for all home charge groups
 586      * and put them in the box and determine where they should go.
 587      */
 588 #pragma omp parallel for num_threads(nthread) schedule(static)
 589     for (int thread = 0; thread < nthread; thread++)
 590     {
 591         try
 592         {
 593             calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
 594                          cell_x0, cell_x1, limitd, limit0, limit1,
 595                          atomGrouping,
 596                          ( thread   *dd->ncg_home)/nthread,
 597                          ((thread+1)*dd->ncg_home)/nthread,
 598                          fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
 599                          move);
 600         }
 601         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
 602     }
 603
 604     int ncg[DIM*2] = { 0 };
 605     int nat[DIM*2] = { 0 };
 606     for (int cg = 0; cg < dd->ncg_home; cg++)
 607     {
 608         if (move[cg] >= 0)
 609         {
 610             const int flag = move[cg] & ~DD_FLAG_NRCG;
 611             const int mc   = move[cg] & DD_FLAG_NRCG;
 612             move[cg]       = mc;
 613
 614             std::vector<int> &cggl_flag = comm->cggl_flag[mc];
 615
 616             /* TODO: See if we can use push_back instead */
 617             if ((ncg[mc] + 1)*DD_CGIBS > gmx::index(cggl_flag.size()))
 618             {
 619                 cggl_flag.resize((ncg[mc] + 1)*DD_CGIBS);
 620             }
 621             cggl_flag[ncg[mc]*DD_CGIBS  ] = dd->globalAtomGroupIndices[cg];
 622             /* We store the cg size in the lower 16 bits
 623              * and the place where the charge group should go
 624              * in the next 6 bits. This saves some communication volume.
 625              */
 626             const int nrcg = atomGrouping.block(cg).size();
 627             cggl_flag[ncg[mc]*DD_CGIBS+1] = nrcg | flag;
 628             ncg[mc] += 1;
 629             nat[mc] += nrcg;
 630         }
 631     }
 632
 633     inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
 634     inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
 635
 636     *ncg_moved = 0;
 637     for (int i = 0; i < dd->ndim*2; i++)
 638     {
 639         *ncg_moved += ncg[i];
 640     }
 641
 642     int nvec = 1;
 643     if (bV)
 644     {
 645         nvec++;
 646     }
 647     if (bCGP)
 648     {
 649         nvec++;
 650     }
 651
 652     /* Make sure the communication buffers are large enough */
 653     for (int mc = 0; mc < dd->ndim*2; mc++)
 654     {
 655         size_t nvr = ncg[mc] + nat[mc]*nvec;
 656         if (nvr > comm->cgcm_state[mc].size())
 657         {
 658             comm->cgcm_state[mc].resize(nvr);
 659         }
 660     }
 661
 662     int home_pos_cg;
 663     switch (fr->cutoff_scheme)
 664     {
 665         case ecutsGROUP:
 666             /* Recalculating cg_cm might be cheaper than communicating,
 667              * but that could give rise to rounding issues.
 668              */
 669             home_pos_cg =
 670                 compact_and_copy_vec_cg(dd->ncg_home, move, dd->atomGrouping(),
 671                                         nvec, cg_cm, comm, bCompact);
 672             break;
 673         case ecutsVERLET:
 674             /* Without charge groups we send the moved atom coordinates
 675              * over twice. This is so the code below can be used without
 676              * many conditionals for both for with and without charge groups.
 677              */
 678             home_pos_cg =
 679                 compact_and_copy_vec_cg(dd->ncg_home, move, dd->atomGrouping(),
 680                                         nvec, as_rvec_array(state->x.data()), comm, FALSE);
 681             if (bCompact)
 682             {
 683                 home_pos_cg -= *ncg_moved;
 684             }
 685             break;
 686         default:
 687             gmx_incons("unimplemented");
 688             home_pos_cg = 0;
 689     }
 690
 691     int vec         = 0;
 692     int home_pos_at =
 693         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
 694                                 nvec, vec++, as_rvec_array(state->x.data()),
 695                                 comm, bCompact);
 696     if (bV)
 697     {
 698         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
 699                                 nvec, vec++, as_rvec_array(state->v.data()),
 700                                 comm, bCompact);
 701     }
 702     if (bCGP)
 703     {
 704         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
 705                                 nvec, vec++, as_rvec_array(state->cg_p.data()),
 706                                 comm, bCompact);
 707     }
 708
 709     if (bCompact)
 710     {
 711         compact_ind(dd->ncg_home, move,
 712                     dd->globalAtomGroupIndices, &dd->atomGrouping_, dd->globalAtomIndices,
 713                     dd->ga2la, comm->bLocalCG,
 714                     fr->cginfo);
 715     }
 716     else
 717     {
 718         int *moved;
 719         if (fr->cutoff_scheme == ecutsVERLET)
 720         {
 721             moved = getMovedBuffer(comm, 0, dd->ncg_home);
 722         }
 723         else
 724         {
 725             moved = fr->ns->grid->cell_index;
 726         }
 727
 728         clear_and_mark_ind(dd->ncg_home, move,
 729                            dd->globalAtomGroupIndices, dd->atomGrouping(), dd->globalAtomIndices,
 730                            dd->ga2la, comm->bLocalCG,
 731                            moved);
 732     }
 733
 734     /* Now we can remove the excess global atom-group indices from the list */
 735     dd->globalAtomGroupIndices.resize(home_pos_cg);
 736     dd->atomGrouping_.reduceNumBlocks(home_pos_cg);
 737
 738     /* We reuse the intBuffer without reacquiring since we are in the same scope */
 739     DDBufferAccess<int> &flagBuffer = moveBuffer;
 740
 741     const cginfo_mb_t   *cginfo_mb  = fr->cginfo_mb;
 742
 743     *ncg_stay_home = home_pos_cg;
 744     for (int d = 0; d < dd->ndim; d++)
 745     {
 746         DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, 0);
 747
 748         const int                 dim = dd->dim[d];
 749         int ncg_recv                  = 0;
 750         int                       nvr = 0;
 751         for (int dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
 752         {
 753             const int cdd = d*2 + dir;
 754             /* Communicate the cg and atom counts */
 755             int       sbuf[2] = { ncg[cdd], nat[cdd] };
 756             if (debug)
 757             {
 758                 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
 759                         d, dir, sbuf[0], sbuf[1]);
 760             }
 761             int rbuf[2];
 762             ddSendrecv(dd, d, dir, sbuf, 2, rbuf, 2);
 763
 764             flagBuffer.resize((ncg_recv + rbuf[0])*DD_CGIBS);
 765
 766             /* Communicate the charge group indices, sizes and flags */
 767             ddSendrecv(dd, d, dir,
 768                        comm->cggl_flag[cdd].data(), sbuf[0]*DD_CGIBS,
 769                        flagBuffer.buffer.data() + ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
 770
 771             const int nvs = ncg[cdd] + nat[cdd]*nvec;
 772             const int i   = rbuf[0]  + rbuf[1] *nvec;
 773             rvecBuffer.resize(nvr + i);
 774
 775             /* Communicate cgcm and state */
 776             ddSendrecv(dd, d, dir,
 777                        as_rvec_array(comm->cgcm_state[cdd].data()), nvs,
 778                        as_rvec_array(rvecBuffer.buffer.data()) + nvr, i);
 779             ncg_recv += rbuf[0];
 780             nvr      += i;
 781         }
 782
 783         dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
 784         if (fr->cutoff_scheme == ecutsGROUP)
 785         {
 786             /* Here we resize to more than necessary and shrink later */
 787             dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
 788         }
 789
 790         /* Process the received charge groups */
 791         int buf_pos = 0;
 792         for (int cg = 0; cg < ncg_recv; cg++)
 793         {
 794             int   flag = flagBuffer.buffer[cg*DD_CGIBS + 1];
 795             rvec &pos  = as_rvec_array(rvecBuffer.buffer.data())[buf_pos];
 796
 797             if (dim >= npbcdim && dd->nc[dim] > 2)
 798             {
 799                 rvec &pos = as_rvec_array(rvecBuffer.buffer.data())[buf_pos];
 800
 801                 /* No pbc in this dim and more than one domain boundary.
 802                  * We do a separate check if a charge group didn't move too far.
 803                  */
 804                 if (((flag & DD_FLAG_FW(d)) &&
 805                      pos[dim] > cell_x1[dim]) ||
 806                     ((flag & DD_FLAG_BW(d)) &&
 807                      pos[dim] < cell_x0[dim]))
 808                 {
 809                     cg_move_error(fplog, dd, step, cg, dim,
 810                                   (flag & DD_FLAG_FW(d)) ? 1 : 0,
 811                                   fr->cutoff_scheme == ecutsGROUP, 0,
 812                                   pos, pos, pos[dim]);
 813                 }
 814             }
 815
 816             int mc = -1;
 817             if (d < dd->ndim-1)
 818             {
 819                 /* Check which direction this cg should go */
 820                 for (int d2 = d + 1; (d2 < dd->ndim && mc == -1); d2++)
 821                 {
 822                     if (isDlbOn(dd->comm))
 823                     {
 824                         /* The cell boundaries for dimension d2 are not equal
 825                          * for each cell row of the lower dimension(s),
 826                          * therefore we might need to redetermine where
 827                          * this cg should go.
 828                          */
 829                         const int dim2 = dd->dim[d2];
 830                         /* If this cg crosses the box boundary in dimension d2
 831                          * we can use the communicated flag, so we do not
 832                          * have to worry about pbc.
 833                          */
 834                         if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
 835                                (flag & DD_FLAG_FW(d2))) ||
 836                               (dd->ci[dim2] == 0 &&
 837                                (flag & DD_FLAG_BW(d2)))))
 838                         {
 839                             /* Clear the two flags for this dimension */
 840                             flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
 841                             /* Determine the location of this cg
 842                              * in lattice coordinates
 843                              */
 844                             real pos_d = rvecBuffer.buffer[buf_pos][dim2];
 845                             if (tric_dir[dim2])
 846                             {
 847                                 for (int d3 = dim2+1; d3 < DIM; d3++)
 848                                 {
 849                                     pos_d += pos[d3]*tcm[d3][dim2];
 850                                 }
 851                             }
 852
 853                             GMX_ASSERT(dim2 >= 0 && dim2 < DIM, "Keep the static analyzer happy");
 854
 855                             /* Check of we are not at the box edge.
 856                              * pbc is only handled in the first step above,
 857                              * but this check could move over pbc while
 858                              * the first step did not due to different rounding.
 859                              */
 860                             if (pos_d >= cell_x1[dim2] &&
 861                                 dd->ci[dim2] != dd->nc[dim2]-1)
 862                             {
 863                                 flag |= DD_FLAG_FW(d2);
 864                             }
 865                             else if (pos_d < cell_x0[dim2] &&
 866                                      dd->ci[dim2] != 0)
 867                             {
 868                                 flag |= DD_FLAG_BW(d2);
 869                             }
 870                             flagBuffer.buffer[cg*DD_CGIBS + 1] = flag;
 871                         }
 872                     }
 873                     /* Set to which neighboring cell this cg should go */
 874                     if (flag & DD_FLAG_FW(d2))
 875                     {
 876                         mc = d2*2;
 877                     }
 878                     else if (flag & DD_FLAG_BW(d2))
 879                     {
 880                         if (dd->nc[dd->dim[d2]] > 2)
 881                         {
 882                             mc = d2*2+1;
 883                         }
 884                         else
 885                         {
 886                             mc = d2*2;
 887                         }
 888                     }
 889                 }
 890             }
 891
 892             const int nrcg = flag & DD_FLAG_NRCG;
 893             if (mc == -1)
 894             {
 895                 /* Set the global charge group index and size */
 896                 const int globalAtomGroupIndex = flagBuffer.buffer[cg*DD_CGIBS];
 897                 dd->globalAtomGroupIndices.push_back(globalAtomGroupIndex);
 898                 dd->atomGrouping_.appendBlock(nrcg);
 899                 /* Copy the state from the buffer */
 900                 if (fr->cutoff_scheme == ecutsGROUP)
 901                 {
 902                     cg_cm = fr->cg_cm;
 903                     copy_rvec(pos, cg_cm[home_pos_cg]);
 904                 }
 905                 buf_pos++;
 906
 907                 /* Set the cginfo */
 908                 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
 909                                                    globalAtomGroupIndex);
 910                 if (comm->bLocalCG)
 911                 {
 912                     comm->bLocalCG[globalAtomGroupIndex] = TRUE;
 913                 }
 914
 915                 rvec *rvecPtr = as_rvec_array(rvecBuffer.buffer.data());
 916                 for (int i = 0; i < nrcg; i++)
 917                 {
 918                     copy_rvec(rvecPtr[buf_pos++],
 919                               state->x[home_pos_at+i]);
 920                 }
 921                 if (bV)
 922                 {
 923                     for (int i = 0; i < nrcg; i++)
 924                     {
 925                         copy_rvec(rvecPtr[buf_pos++],
 926                                   state->v[home_pos_at+i]);
 927                     }
 928                 }
 929                 if (bCGP)
 930                 {
 931                     for (int i = 0; i < nrcg; i++)
 932                     {
 933                         copy_rvec(rvecPtr[buf_pos++],
 934                                   state->cg_p[home_pos_at+i]);
 935                     }
 936                 }
 937                 home_pos_cg += 1;
 938                 home_pos_at += nrcg;
 939             }
 940             else
 941             {
 942                 /* Reallocate the buffers if necessary  */
 943                 if ((ncg[mc] + 1)*DD_CGIBS > gmx::index(comm->cggl_flag[mc].size()))
 944                 {
 945                     comm->cggl_flag[mc].resize((ncg[mc] + 1)*DD_CGIBS);
 946                 }
 947                 size_t nvr = ncg[mc] + nat[mc]*nvec;
 948                 if (nvr + 1 + nrcg*nvec > comm->cgcm_state[mc].size())
 949                 {
 950                     comm->cgcm_state[mc].resize(nvr + 1 + nrcg*nvec);
 951                 }
 952                 /* Copy from the receive to the send buffers */
 953                 memcpy(comm->cggl_flag[mc].data() + ncg[mc]*DD_CGIBS,
 954                        flagBuffer.buffer.data() + cg*DD_CGIBS,
 955                        DD_CGIBS*sizeof(int));
 956                 memcpy(comm->cgcm_state[mc][nvr],
 957                        rvecBuffer.buffer.data() + buf_pos,
 958                        (1 + nrcg*nvec)*sizeof(rvec));
 959                 buf_pos += 1 + nrcg*nvec;
 960                 ncg[mc] += 1;
 961                 nat[mc] += nrcg;
 962             }
 963         }
 964     }
 965
 966     /* With sorting (!bCompact) the indices are now only partially up to date
 967      * and ncg_home and nat_home are not the real count, since there are
 968      * "holes" in the arrays for the charge groups that moved to neighbors.
 969      */
 970     if (fr->cutoff_scheme == ecutsVERLET)
 971     {
 972         /* We need to clear the moved flags for the received atoms,
 973          * because the moved buffer will be passed to the nbnxn gridding call.
 974          */
 975         int *moved = getMovedBuffer(comm, dd->ncg_home, home_pos_cg);
 976
 977         for (int i =  dd->ncg_home; i < home_pos_cg; i++)
 978         {
 979             moved[i] = 0;
 980         }
 981     }
 982
 983     dd->ncg_home = home_pos_cg;
 984     comm->atomRanges.setEnd(DDAtomRanges::Type::Home, home_pos_at);
 985
 986     if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
 987     {
 988         /* We overallocated before, we need to set the right size here */
 989         dd_resize_state(state, f, comm->atomRanges.numHomeAtoms());
 990     }
 991
 992     if (debug)
 993     {
 994         fprintf(debug,
 995                 "Finished repartitioning: cgs moved out %d, new home %d\n",
 996                 *ncg_moved, dd->ncg_home-*ncg_moved);
 997
 998     }
 999 }