ae83ec4d79a67c90cab26cdd6e88ab6f7a429bfc
[alexxy/gromacs.git] / src / gromacs / domdec / redistribute.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /* \internal \file
36  *
37  * \brief Implements atom redistribution functions.
38  *
39  * \author Berk Hess <hess@kth.se>
40  * \ingroup module_domdec
41  */
42
43 #include "gmxpre.h"
44
45 #include "redistribute.h"
46
47 #include <cstring>
48
49 #include "gromacs/domdec/domdec_network.h"
50 #include "gromacs/domdec/ga2la.h"
51 #include "gromacs/gmxlib/nrnb.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdlib/gmx_omp_nthreads.h"
54 #include "gromacs/mdlib/nsgrid.h"
55 #include "gromacs/mdtypes/forcerec.h"
56 #include "gromacs/mdtypes/nblist.h"
57 #include "gromacs/mdtypes/state.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/fatalerror.h"
60
61 #include "domdec_internal.h"
62 #include "utility.h"
63
64 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
65 #define DD_CGIBS 2
66
67 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
68 #define DD_FLAG_NRCG  65535
69 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
70 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
71
72 static int compact_and_copy_vec_at(int ncg, const int *move,
73                                    const gmx::RangePartitioning &atomGroups,
74                                    int nvec, int vec,
75                                    rvec *src, gmx_domdec_comm_t *comm,
76                                    gmx_bool bCompact)
77 {
78     int home_pos       = 0;
79     int pos_vec[DIM*2] = { 0 };
80
81     for (int g = 0; g < ncg; g++)
82     {
83         const auto atomGroup = atomGroups.block(g);
84         int        m         = move[g];
85         if (m == -1)
86         {
87             if (bCompact)
88             {
89                 /* Compact the home array in place */
90                 for (int i : atomGroup)
91                 {
92                     copy_rvec(src[i], src[home_pos++]);
93                 }
94             }
95         }
96         else
97         {
98             /* Copy to the communication buffer */
99             int numAtoms  = atomGroup.size();
100             pos_vec[m]   += 1 + vec*numAtoms;
101             for (int i : atomGroup)
102             {
103                 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
104             }
105             pos_vec[m] += (nvec - vec - 1)*numAtoms;
106         }
107         if (!bCompact)
108         {
109             home_pos += atomGroup.size();
110         }
111     }
112
113     return home_pos;
114 }
115
116 static int compact_and_copy_vec_cg(int ncg, const int *move,
117                                    const gmx::RangePartitioning &atomGroups,
118                                    int nvec, rvec *src, gmx_domdec_comm_t *comm,
119                                    gmx_bool bCompact)
120 {
121     int home_pos       = 0;
122     int pos_vec[DIM*2] = { 0 };
123
124     for (int g = 0; g < ncg; g++)
125     {
126         const auto atomGroup = atomGroups.block(g);
127         int        m         = move[g];
128         if (m == -1)
129         {
130             if (bCompact)
131             {
132                 /* Compact the home array in place */
133                 copy_rvec(src[g], src[home_pos++]);
134             }
135         }
136         else
137         {
138             /* Copy to the communication buffer */
139             copy_rvec(src[g], comm->cgcm_state[m][pos_vec[m]]);
140             pos_vec[m] += 1 + atomGroup.size()*nvec;
141         }
142     }
143     if (!bCompact)
144     {
145         home_pos = ncg;
146     }
147
148     return home_pos;
149 }
150
151 static int compact_ind(int                     numAtomGroups,
152                        const int              *move,
153                        gmx::ArrayRef<int>      globalAtomGroupIndices,
154                        gmx::RangePartitioning *atomGroups,
155                        gmx::ArrayRef<int>      globalAtomIndices,
156                        gmx_ga2la_t            *ga2la,
157                        char                   *bLocalCG,
158                        int                    *cginfo)
159 {
160     atomGroups->clear();
161     int home_pos = 0;
162     int nat      = 0;
163     for (int g = 0; g < numAtomGroups; g++)
164     {
165         if (move[g] == -1)
166         {
167             /* Compact the home arrays in place.
168              * Anything that can be done here avoids access to global arrays.
169              */
170             atomGroups->appendBlock(nat);
171             for (int a : atomGroups->block(g))
172             {
173                 const int a_gl         = globalAtomIndices[a];
174                 globalAtomIndices[nat] = a_gl;
175                 /* The cell number stays 0, so we don't need to set it */
176                 ga2la_change_la(ga2la, a_gl, nat);
177                 nat++;
178             }
179             globalAtomGroupIndices[home_pos] = globalAtomGroupIndices[g];
180             cginfo[home_pos]                 = cginfo[g];
181             /* The charge group remains local, so bLocalCG does not change */
182             home_pos++;
183         }
184         else
185         {
186             /* Clear the global indices */
187             for (int a : atomGroups->block(g))
188             {
189                 ga2la_del(ga2la, globalAtomIndices[a]);
190             }
191             if (bLocalCG)
192             {
193                 bLocalCG[globalAtomGroupIndices[g]] = FALSE;
194             }
195         }
196     }
197
198     GMX_ASSERT(atomGroups->numBlocks() == home_pos, "The atom group data and atomGroups should be consistent");
199
200     return home_pos;
201 }
202
203 static void clear_and_mark_ind(int                           numAtomGroups,
204                                const int                    *move,
205                                gmx::ArrayRef<const int>      globalAtomGroupIndices,
206                                const gmx::RangePartitioning &atomGroups,
207                                gmx::ArrayRef<const int>      globalAtomIndices,
208                                gmx_ga2la_t                  *ga2la,
209                                char                         *bLocalCG,
210                                int                          *cell_index)
211 {
212     for (int g = 0; g < numAtomGroups; g++)
213     {
214         if (move[g] >= 0)
215         {
216             /* Clear the global indices */
217             for (int a : atomGroups.block(g))
218             {
219                 ga2la_del(ga2la, globalAtomIndices[a]);
220             }
221             if (bLocalCG)
222             {
223                 bLocalCG[globalAtomGroupIndices[g]] = FALSE;
224             }
225             /* Signal that this group has moved using the ns cell index.
226              * Here we set it to -1. fill_grid will change it
227              * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
228              */
229             cell_index[g] = -1;
230         }
231     }
232 }
233
234 static void print_cg_move(FILE *fplog,
235                           gmx_domdec_t *dd,
236                           gmx_int64_t step, int cg, int dim, int dir,
237                           gmx_bool bHaveCgcmOld, real limitd,
238                           rvec cm_old, rvec cm_new, real pos_d)
239 {
240     gmx_domdec_comm_t *comm;
241     char               buf[22];
242
243     comm = dd->comm;
244
245     fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
246     if (limitd > 0)
247     {
248         fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
249                 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
250                 ddglatnr(dd, dd->atomGrouping().block(cg).begin()), limitd, dim2char(dim));
251     }
252     else
253     {
254         /* We don't have a limiting distance available: don't print it */
255         fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
256                 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
257                 ddglatnr(dd, dd->atomGrouping().block(cg).begin()), dim2char(dim));
258     }
259     fprintf(fplog, "distance out of cell %f\n",
260             dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
261     if (bHaveCgcmOld)
262     {
263         fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
264                 cm_old[XX], cm_old[YY], cm_old[ZZ]);
265     }
266     fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
267             cm_new[XX], cm_new[YY], cm_new[ZZ]);
268     fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
269             dim2char(dim),
270             comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
271     fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
272             dim2char(dim),
273             comm->cell_x0[dim], comm->cell_x1[dim]);
274 }
275
276 static void cg_move_error(FILE *fplog,
277                           gmx_domdec_t *dd,
278                           gmx_int64_t step, int cg, int dim, int dir,
279                           gmx_bool bHaveCgcmOld, real limitd,
280                           rvec cm_old, rvec cm_new, real pos_d)
281 {
282     if (fplog)
283     {
284         print_cg_move(fplog, dd, step, cg, dim, dir,
285                       bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
286     }
287     print_cg_move(stderr, dd, step, cg, dim, dir,
288                   bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
289     gmx_fatal(FARGS,
290               "%s moved too far between two domain decomposition steps\n"
291               "This usually means that your system is not well equilibrated",
292               dd->comm->bCGs ? "A charge group" : "An atom");
293 }
294
295 static void rotate_state_atom(t_state *state, int a)
296 {
297     if (state->flags & (1 << estX))
298     {
299         /* Rotate the complete state; for a rectangular box only */
300         state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
301         state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
302     }
303     if (state->flags & (1 << estV))
304     {
305         state->v[a][YY] = -state->v[a][YY];
306         state->v[a][ZZ] = -state->v[a][ZZ];
307     }
308     if (state->flags & (1 << estCGP))
309     {
310         state->cg_p[a][YY] = -state->cg_p[a][YY];
311         state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
312     }
313 }
314
315 /* Returns a pointer to a buffer of size \p numAtomsNew with contents 0 from numAtomsOld upwards
316  *
317  * Note: numAtomsOld should either be 0 or match the current buffer size.
318  */
319 static int *getMovedBuffer(gmx_domdec_comm_t *comm,
320                            size_t             numAtomsOld,
321                            size_t             numAtomsNew)
322 {
323     std::vector<int> &movedBuffer = comm->movedBuffer;
324
325     GMX_RELEASE_ASSERT(numAtomsOld == 0 || movedBuffer.size() == numAtomsOld, "numAtomsOld should either be 0 or match the current size");
326
327     /* Contents up to numAtomsOld should be preserved, clear from numAtomsOld */
328     if (numAtomsOld == 0)
329     {
330         movedBuffer.clear();
331     }
332     movedBuffer.resize(numAtomsNew);
333
334     return movedBuffer.data();
335 }
336
337 /* Determine to which domains atomGroups in the range \p cg_start, \p cg_end should go.
338  *
339  * Returns in the move array where the groups should go.
340  * Also updates \p cg_cm for jumps over periodic boundaries.
341  *
342  * \TODO Rename cg to atomGroup.
343  */
344 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
345                          gmx_domdec_t *dd,
346                          t_state *state,
347                          const ivec tric_dir, matrix tcm,
348                          const rvec cell_x0, const rvec cell_x1,
349                          rvec limitd, const rvec limit0, const rvec limit1,
350                          const gmx::RangePartitioning &atomGroups,
351                          int cg_start, int cg_end,
352                          rvec *cg_cm,
353                          int *move)
354 {
355     const int npbcdim = dd->npbcdim;
356
357     for (int g = cg_start; g < cg_end; g++)
358     {
359         const auto atomGroup = atomGroups.block(g);
360         const int  numAtoms  = atomGroup.size();
361         // TODO: Rename this center of geometry variable to cogNew
362         rvec       cm_new;
363         if (numAtoms == 1)
364         {
365             copy_rvec(state->x[atomGroup.begin()], cm_new);
366         }
367         else
368         {
369             real invNumAtoms = 1/static_cast<real>(numAtoms);
370             clear_rvec(cm_new);
371             for (int k : atomGroup)
372             {
373                 rvec_inc(cm_new, state->x[k]);
374             }
375             for (int d = 0; d < DIM; d++)
376             {
377                 cm_new[d] = invNumAtoms*cm_new[d];
378             }
379         }
380
381         ivec dev = { 0 };
382         /* Do pbc and check DD cell boundary crossings */
383         for (int d = DIM - 1; d >= 0; d--)
384         {
385             if (dd->nc[d] > 1)
386             {
387                 bool bScrew = (dd->bScrewPBC && d == XX);
388                 /* Determine the location of this cg in lattice coordinates */
389                 real pos_d = cm_new[d];
390                 if (tric_dir[d])
391                 {
392                     for (int d2 = d + 1; d2 < DIM; d2++)
393                     {
394                         pos_d += cm_new[d2]*tcm[d2][d];
395                     }
396                 }
397                 /* Put the charge group in the triclinic unit-cell */
398                 if (pos_d >= cell_x1[d])
399                 {
400                     if (pos_d >= limit1[d])
401                     {
402                         cg_move_error(fplog, dd, step, g, d, 1,
403                                       cg_cm != as_rvec_array(state->x.data()), limitd[d],
404                                       cg_cm[g], cm_new, pos_d);
405                     }
406                     dev[d] = 1;
407                     if (dd->ci[d] == dd->nc[d] - 1)
408                     {
409                         rvec_dec(cm_new, state->box[d]);
410                         if (bScrew)
411                         {
412                             cm_new[YY] = state->box[YY][YY] - cm_new[YY];
413                             cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
414                         }
415                         for (int k : atomGroup)
416                         {
417                             rvec_dec(state->x[k], state->box[d]);
418                             if (bScrew)
419                             {
420                                 rotate_state_atom(state, k);
421                             }
422                         }
423                     }
424                 }
425                 else if (pos_d < cell_x0[d])
426                 {
427                     if (pos_d < limit0[d])
428                     {
429                         cg_move_error(fplog, dd, step, g, d, -1,
430                                       cg_cm != as_rvec_array(state->x.data()), limitd[d],
431                                       cg_cm[g], cm_new, pos_d);
432                     }
433                     dev[d] = -1;
434                     if (dd->ci[d] == 0)
435                     {
436                         rvec_inc(cm_new, state->box[d]);
437                         if (bScrew)
438                         {
439                             cm_new[YY] = state->box[YY][YY] - cm_new[YY];
440                             cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
441                         }
442                         for (int k : atomGroup)
443                         {
444                             rvec_inc(state->x[k], state->box[d]);
445                             if (bScrew)
446                             {
447                                 rotate_state_atom(state, k);
448                             }
449                         }
450                     }
451                 }
452             }
453             else if (d < npbcdim)
454             {
455                 /* Put the charge group in the rectangular unit-cell */
456                 while (cm_new[d] >= state->box[d][d])
457                 {
458                     rvec_dec(cm_new, state->box[d]);
459                     for (int k : atomGroup)
460                     {
461                         rvec_dec(state->x[k], state->box[d]);
462                     }
463                 }
464                 while (cm_new[d] < 0)
465                 {
466                     rvec_inc(cm_new, state->box[d]);
467                     for (int k : atomGroup)
468                     {
469                         rvec_inc(state->x[k], state->box[d]);
470                     }
471                 }
472             }
473         }
474
475         copy_rvec(cm_new, cg_cm[g]);
476
477         /* Determine where this cg should go */
478         int flag = 0;
479         int mc   = -1;
480         for (int d = 0; d < dd->ndim; d++)
481         {
482             int dim = dd->dim[d];
483             if (dev[dim] == 1)
484             {
485                 flag |= DD_FLAG_FW(d);
486                 if (mc == -1)
487                 {
488                     mc = d*2;
489                 }
490             }
491             else if (dev[dim] == -1)
492             {
493                 flag |= DD_FLAG_BW(d);
494                 if (mc == -1)
495                 {
496                     if (dd->nc[dim] > 2)
497                     {
498                         mc = d*2 + 1;
499                     }
500                     else
501                     {
502                         mc = d*2;
503                     }
504                 }
505             }
506         }
507         /* Temporarily store the flag in move */
508         move[g] = mc + flag;
509     }
510 }
511
512 void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
513                         gmx_domdec_t *dd, ivec tric_dir,
514                         t_state *state, PaddedRVecVector *f,
515                         t_forcerec *fr,
516                         gmx_bool bCompact,
517                         t_nrnb *nrnb,
518                         int *ncg_stay_home,
519                         int *ncg_moved)
520 {
521     gmx_domdec_comm_t *comm = dd->comm;
522
523     if (dd->bScrewPBC)
524     {
525         check_screw_box(state->box);
526     }
527
528     rvec *cg_cm = nullptr;
529     if (fr->cutoff_scheme == ecutsGROUP)
530     {
531         cg_cm = fr->cg_cm;
532     }
533
534     // Positions are always present, so there's nothing to flag
535     bool                bV   = state->flags & (1<<estV);
536     bool                bCGP = state->flags & (1<<estCGP);
537
538     DDBufferAccess<int> moveBuffer(comm->intBuffer, dd->globalAtomGroupIndices.size());
539     int                *move = moveBuffer.buffer.data();
540
541     const int           npbcdim = dd->npbcdim;
542
543     rvec                cell_x0, cell_x1, limitd, limit0, limit1;
544     for (int d = 0; (d < DIM); d++)
545     {
546         limitd[d] = dd->comm->cellsize_min[d];
547         if (d >= npbcdim && dd->ci[d] == 0)
548         {
549             cell_x0[d] = -GMX_FLOAT_MAX;
550         }
551         else
552         {
553             cell_x0[d] = comm->cell_x0[d];
554         }
555         if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
556         {
557             cell_x1[d] = GMX_FLOAT_MAX;
558         }
559         else
560         {
561             cell_x1[d] = comm->cell_x1[d];
562         }
563         if (d < npbcdim)
564         {
565             limit0[d] = comm->old_cell_x0[d] - limitd[d];
566             limit1[d] = comm->old_cell_x1[d] + limitd[d];
567         }
568         else
569         {
570             /* We check after communication if a charge group moved
571              * more than one cell. Set the pre-comm check limit to float_max.
572              */
573             limit0[d] = -GMX_FLOAT_MAX;
574             limit1[d] =  GMX_FLOAT_MAX;
575         }
576     }
577
578     matrix tcm;
579     make_tric_corr_matrix(npbcdim, state->box, tcm);
580
581     const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
582
583     const int                     nthread = gmx_omp_nthreads_get(emntDomdec);
584
585     /* Compute the center of geometry for all home charge groups
586      * and put them in the box and determine where they should go.
587      */
588 #pragma omp parallel for num_threads(nthread) schedule(static)
589     for (int thread = 0; thread < nthread; thread++)
590     {
591         try
592         {
593             calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
594                          cell_x0, cell_x1, limitd, limit0, limit1,
595                          atomGrouping,
596                          ( thread   *dd->ncg_home)/nthread,
597                          ((thread+1)*dd->ncg_home)/nthread,
598                          fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
599                          move);
600         }
601         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
602     }
603
604     int ncg[DIM*2] = { 0 };
605     int nat[DIM*2] = { 0 };
606     for (int cg = 0; cg < dd->ncg_home; cg++)
607     {
608         if (move[cg] >= 0)
609         {
610             const int flag = move[cg] & ~DD_FLAG_NRCG;
611             const int mc   = move[cg] & DD_FLAG_NRCG;
612             move[cg]       = mc;
613
614             std::vector<int> &cggl_flag = comm->cggl_flag[mc];
615
616             /* TODO: See if we can use push_back instead */
617             if ((ncg[mc] + 1)*DD_CGIBS > gmx::index(cggl_flag.size()))
618             {
619                 cggl_flag.resize((ncg[mc] + 1)*DD_CGIBS);
620             }
621             cggl_flag[ncg[mc]*DD_CGIBS  ] = dd->globalAtomGroupIndices[cg];
622             /* We store the cg size in the lower 16 bits
623              * and the place where the charge group should go
624              * in the next 6 bits. This saves some communication volume.
625              */
626             const int nrcg = atomGrouping.block(cg).size();
627             cggl_flag[ncg[mc]*DD_CGIBS+1] = nrcg | flag;
628             ncg[mc] += 1;
629             nat[mc] += nrcg;
630         }
631     }
632
633     inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
634     inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
635
636     *ncg_moved = 0;
637     for (int i = 0; i < dd->ndim*2; i++)
638     {
639         *ncg_moved += ncg[i];
640     }
641
642     int nvec = 1;
643     if (bV)
644     {
645         nvec++;
646     }
647     if (bCGP)
648     {
649         nvec++;
650     }
651
652     /* Make sure the communication buffers are large enough */
653     for (int mc = 0; mc < dd->ndim*2; mc++)
654     {
655         size_t nvr = ncg[mc] + nat[mc]*nvec;
656         if (nvr > comm->cgcm_state[mc].size())
657         {
658             comm->cgcm_state[mc].resize(nvr);
659         }
660     }
661
662     int home_pos_cg;
663     switch (fr->cutoff_scheme)
664     {
665         case ecutsGROUP:
666             /* Recalculating cg_cm might be cheaper than communicating,
667              * but that could give rise to rounding issues.
668              */
669             home_pos_cg =
670                 compact_and_copy_vec_cg(dd->ncg_home, move, dd->atomGrouping(),
671                                         nvec, cg_cm, comm, bCompact);
672             break;
673         case ecutsVERLET:
674             /* Without charge groups we send the moved atom coordinates
675              * over twice. This is so the code below can be used without
676              * many conditionals for both for with and without charge groups.
677              */
678             home_pos_cg =
679                 compact_and_copy_vec_cg(dd->ncg_home, move, dd->atomGrouping(),
680                                         nvec, as_rvec_array(state->x.data()), comm, FALSE);
681             if (bCompact)
682             {
683                 home_pos_cg -= *ncg_moved;
684             }
685             break;
686         default:
687             gmx_incons("unimplemented");
688             home_pos_cg = 0;
689     }
690
691     int vec         = 0;
692     int home_pos_at =
693         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
694                                 nvec, vec++, as_rvec_array(state->x.data()),
695                                 comm, bCompact);
696     if (bV)
697     {
698         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
699                                 nvec, vec++, as_rvec_array(state->v.data()),
700                                 comm, bCompact);
701     }
702     if (bCGP)
703     {
704         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
705                                 nvec, vec++, as_rvec_array(state->cg_p.data()),
706                                 comm, bCompact);
707     }
708
709     if (bCompact)
710     {
711         compact_ind(dd->ncg_home, move,
712                     dd->globalAtomGroupIndices, &dd->atomGrouping_, dd->globalAtomIndices,
713                     dd->ga2la, comm->bLocalCG,
714                     fr->cginfo);
715     }
716     else
717     {
718         int *moved;
719         if (fr->cutoff_scheme == ecutsVERLET)
720         {
721             moved = getMovedBuffer(comm, 0, dd->ncg_home);
722         }
723         else
724         {
725             moved = fr->ns->grid->cell_index;
726         }
727
728         clear_and_mark_ind(dd->ncg_home, move,
729                            dd->globalAtomGroupIndices, dd->atomGrouping(), dd->globalAtomIndices,
730                            dd->ga2la, comm->bLocalCG,
731                            moved);
732     }
733
734     /* Now we can remove the excess global atom-group indices from the list */
735     dd->globalAtomGroupIndices.resize(home_pos_cg);
736     dd->atomGrouping_.reduceNumBlocks(home_pos_cg);
737
738     /* We reuse the intBuffer without reacquiring since we are in the same scope */
739     DDBufferAccess<int> &flagBuffer = moveBuffer;
740
741     const cginfo_mb_t   *cginfo_mb  = fr->cginfo_mb;
742
743     *ncg_stay_home = home_pos_cg;
744     for (int d = 0; d < dd->ndim; d++)
745     {
746         DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, 0);
747
748         const int                 dim = dd->dim[d];
749         int ncg_recv                  = 0;
750         int                       nvr = 0;
751         for (int dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
752         {
753             const int cdd = d*2 + dir;
754             /* Communicate the cg and atom counts */
755             int       sbuf[2] = { ncg[cdd], nat[cdd] };
756             if (debug)
757             {
758                 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
759                         d, dir, sbuf[0], sbuf[1]);
760             }
761             int rbuf[2];
762             ddSendrecv(dd, d, dir, sbuf, 2, rbuf, 2);
763
764             flagBuffer.resize((ncg_recv + rbuf[0])*DD_CGIBS);
765
766             /* Communicate the charge group indices, sizes and flags */
767             ddSendrecv(dd, d, dir,
768                        comm->cggl_flag[cdd].data(), sbuf[0]*DD_CGIBS,
769                        flagBuffer.buffer.data() + ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
770
771             const int nvs = ncg[cdd] + nat[cdd]*nvec;
772             const int i   = rbuf[0]  + rbuf[1] *nvec;
773             rvecBuffer.resize(nvr + i);
774
775             /* Communicate cgcm and state */
776             ddSendrecv(dd, d, dir,
777                        as_rvec_array(comm->cgcm_state[cdd].data()), nvs,
778                        as_rvec_array(rvecBuffer.buffer.data()) + nvr, i);
779             ncg_recv += rbuf[0];
780             nvr      += i;
781         }
782
783         dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
784         if (fr->cutoff_scheme == ecutsGROUP)
785         {
786             /* Here we resize to more than necessary and shrink later */
787             dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
788         }
789
790         /* Process the received charge groups */
791         int buf_pos = 0;
792         for (int cg = 0; cg < ncg_recv; cg++)
793         {
794             int   flag = flagBuffer.buffer[cg*DD_CGIBS + 1];
795             rvec &pos  = as_rvec_array(rvecBuffer.buffer.data())[buf_pos];
796
797             if (dim >= npbcdim && dd->nc[dim] > 2)
798             {
799                 rvec &pos = as_rvec_array(rvecBuffer.buffer.data())[buf_pos];
800
801                 /* No pbc in this dim and more than one domain boundary.
802                  * We do a separate check if a charge group didn't move too far.
803                  */
804                 if (((flag & DD_FLAG_FW(d)) &&
805                      pos[dim] > cell_x1[dim]) ||
806                     ((flag & DD_FLAG_BW(d)) &&
807                      pos[dim] < cell_x0[dim]))
808                 {
809                     cg_move_error(fplog, dd, step, cg, dim,
810                                   (flag & DD_FLAG_FW(d)) ? 1 : 0,
811                                   fr->cutoff_scheme == ecutsGROUP, 0,
812                                   pos, pos, pos[dim]);
813                 }
814             }
815
816             int mc = -1;
817             if (d < dd->ndim-1)
818             {
819                 /* Check which direction this cg should go */
820                 for (int d2 = d + 1; (d2 < dd->ndim && mc == -1); d2++)
821                 {
822                     if (isDlbOn(dd->comm))
823                     {
824                         /* The cell boundaries for dimension d2 are not equal
825                          * for each cell row of the lower dimension(s),
826                          * therefore we might need to redetermine where
827                          * this cg should go.
828                          */
829                         const int dim2 = dd->dim[d2];
830                         /* If this cg crosses the box boundary in dimension d2
831                          * we can use the communicated flag, so we do not
832                          * have to worry about pbc.
833                          */
834                         if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
835                                (flag & DD_FLAG_FW(d2))) ||
836                               (dd->ci[dim2] == 0 &&
837                                (flag & DD_FLAG_BW(d2)))))
838                         {
839                             /* Clear the two flags for this dimension */
840                             flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
841                             /* Determine the location of this cg
842                              * in lattice coordinates
843                              */
844                             real pos_d = rvecBuffer.buffer[buf_pos][dim2];
845                             if (tric_dir[dim2])
846                             {
847                                 for (int d3 = dim2+1; d3 < DIM; d3++)
848                                 {
849                                     pos_d += pos[d3]*tcm[d3][dim2];
850                                 }
851                             }
852
853                             GMX_ASSERT(dim2 >= 0 && dim2 < DIM, "Keep the static analyzer happy");
854
855                             /* Check of we are not at the box edge.
856                              * pbc is only handled in the first step above,
857                              * but this check could move over pbc while
858                              * the first step did not due to different rounding.
859                              */
860                             if (pos_d >= cell_x1[dim2] &&
861                                 dd->ci[dim2] != dd->nc[dim2]-1)
862                             {
863                                 flag |= DD_FLAG_FW(d2);
864                             }
865                             else if (pos_d < cell_x0[dim2] &&
866                                      dd->ci[dim2] != 0)
867                             {
868                                 flag |= DD_FLAG_BW(d2);
869                             }
870                             flagBuffer.buffer[cg*DD_CGIBS + 1] = flag;
871                         }
872                     }
873                     /* Set to which neighboring cell this cg should go */
874                     if (flag & DD_FLAG_FW(d2))
875                     {
876                         mc = d2*2;
877                     }
878                     else if (flag & DD_FLAG_BW(d2))
879                     {
880                         if (dd->nc[dd->dim[d2]] > 2)
881                         {
882                             mc = d2*2+1;
883                         }
884                         else
885                         {
886                             mc = d2*2;
887                         }
888                     }
889                 }
890             }
891
892             const int nrcg = flag & DD_FLAG_NRCG;
893             if (mc == -1)
894             {
895                 /* Set the global charge group index and size */
896                 const int globalAtomGroupIndex = flagBuffer.buffer[cg*DD_CGIBS];
897                 dd->globalAtomGroupIndices.push_back(globalAtomGroupIndex);
898                 dd->atomGrouping_.appendBlock(nrcg);
899                 /* Copy the state from the buffer */
900                 if (fr->cutoff_scheme == ecutsGROUP)
901                 {
902                     cg_cm = fr->cg_cm;
903                     copy_rvec(pos, cg_cm[home_pos_cg]);
904                 }
905                 buf_pos++;
906
907                 /* Set the cginfo */
908                 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
909                                                    globalAtomGroupIndex);
910                 if (comm->bLocalCG)
911                 {
912                     comm->bLocalCG[globalAtomGroupIndex] = TRUE;
913                 }
914
915                 rvec *rvecPtr = as_rvec_array(rvecBuffer.buffer.data());
916                 for (int i = 0; i < nrcg; i++)
917                 {
918                     copy_rvec(rvecPtr[buf_pos++],
919                               state->x[home_pos_at+i]);
920                 }
921                 if (bV)
922                 {
923                     for (int i = 0; i < nrcg; i++)
924                     {
925                         copy_rvec(rvecPtr[buf_pos++],
926                                   state->v[home_pos_at+i]);
927                     }
928                 }
929                 if (bCGP)
930                 {
931                     for (int i = 0; i < nrcg; i++)
932                     {
933                         copy_rvec(rvecPtr[buf_pos++],
934                                   state->cg_p[home_pos_at+i]);
935                     }
936                 }
937                 home_pos_cg += 1;
938                 home_pos_at += nrcg;
939             }
940             else
941             {
942                 /* Reallocate the buffers if necessary  */
943                 if ((ncg[mc] + 1)*DD_CGIBS > gmx::index(comm->cggl_flag[mc].size()))
944                 {
945                     comm->cggl_flag[mc].resize((ncg[mc] + 1)*DD_CGIBS);
946                 }
947                 size_t nvr = ncg[mc] + nat[mc]*nvec;
948                 if (nvr + 1 + nrcg*nvec > comm->cgcm_state[mc].size())
949                 {
950                     comm->cgcm_state[mc].resize(nvr + 1 + nrcg*nvec);
951                 }
952                 /* Copy from the receive to the send buffers */
953                 memcpy(comm->cggl_flag[mc].data() + ncg[mc]*DD_CGIBS,
954                        flagBuffer.buffer.data() + cg*DD_CGIBS,
955                        DD_CGIBS*sizeof(int));
956                 memcpy(comm->cgcm_state[mc][nvr],
957                        rvecBuffer.buffer.data() + buf_pos,
958                        (1 + nrcg*nvec)*sizeof(rvec));
959                 buf_pos += 1 + nrcg*nvec;
960                 ncg[mc] += 1;
961                 nat[mc] += nrcg;
962             }
963         }
964     }
965
966     /* With sorting (!bCompact) the indices are now only partially up to date
967      * and ncg_home and nat_home are not the real count, since there are
968      * "holes" in the arrays for the charge groups that moved to neighbors.
969      */
970     if (fr->cutoff_scheme == ecutsVERLET)
971     {
972         /* We need to clear the moved flags for the received atoms,
973          * because the moved buffer will be passed to the nbnxn gridding call.
974          */
975         int *moved = getMovedBuffer(comm, dd->ncg_home, home_pos_cg);
976
977         for (int i =  dd->ncg_home; i < home_pos_cg; i++)
978         {
979             moved[i] = 0;
980         }
981     }
982
983     dd->ncg_home = home_pos_cg;
984     comm->atomRanges.setEnd(DDAtomRanges::Type::Home, home_pos_at);
985
986     if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
987     {
988         /* We overallocated before, we need to set the right size here */
989         dd_resize_state(state, f, comm->atomRanges.numHomeAtoms());
990     }
991
992     if (debug)
993     {
994         fprintf(debug,
995                 "Finished repartitioning: cgs moved out %d, new home %d\n",
996                 *ncg_moved, dd->ncg_home-*ncg_moved);
997
998     }
999 }