8afc95d8f9376fae1fab88a76e648d4e1cd77f2d
[alexxy/gromacs.git] / src / gromacs / domdec / redistribute.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /* \internal \file
36  *
37  * \brief Implements atom redistribution functions.
38  *
39  * \author Berk Hess <hess@kth.se>
40  * \ingroup module_domdec
41  */
42
43 #include "gmxpre.h"
44
45 #include "redistribute.h"
46
47 #include <cstring>
48
49 #include "gromacs/domdec/domdec_network.h"
50 #include "gromacs/domdec/ga2la.h"
51 #include "gromacs/gmxlib/nrnb.h"
52 #include "gromacs/math/vec.h"
53 #include "gromacs/mdlib/gmx_omp_nthreads.h"
54 #include "gromacs/mdlib/nsgrid.h"
55 #include "gromacs/mdtypes/forcerec.h"
56 #include "gromacs/mdtypes/nblist.h"
57 #include "gromacs/mdtypes/state.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/fatalerror.h"
60
61 #include "domdec_internal.h"
62 #include "utility.h"
63
64 /* The size per charge group of the cggl_flag buffer in gmx_domdec_comm_t */
65 #define DD_CGIBS 2
66
67 /* The flags for the cggl_flag buffer in gmx_domdec_comm_t */
68 #define DD_FLAG_NRCG  65535
69 #define DD_FLAG_FW(d) (1<<(16+(d)*2))
70 #define DD_FLAG_BW(d) (1<<(16+(d)*2+1))
71
72 static int compact_and_copy_vec_at(int ncg, int *move,
73                                    const gmx::RangePartitioning &atomGroups,
74                                    int nvec, int vec,
75                                    rvec *src, gmx_domdec_comm_t *comm,
76                                    gmx_bool bCompact)
77 {
78     int home_pos       = 0;
79     int pos_vec[DIM*2] = { 0 };
80
81     for (int g = 0; g < ncg; g++)
82     {
83         const auto atomGroup = atomGroups.block(g);
84         int        m         = move[g];
85         if (m == -1)
86         {
87             if (bCompact)
88             {
89                 /* Compact the home array in place */
90                 for (int i : atomGroup)
91                 {
92                     copy_rvec(src[i], src[home_pos++]);
93                 }
94             }
95         }
96         else
97         {
98             /* Copy to the communication buffer */
99             int numAtoms  = atomGroup.size();
100             pos_vec[m]   += 1 + vec*numAtoms;
101             for (int i : atomGroup)
102             {
103                 copy_rvec(src[i], comm->cgcm_state[m][pos_vec[m]++]);
104             }
105             pos_vec[m] += (nvec - vec - 1)*numAtoms;
106         }
107         if (!bCompact)
108         {
109             home_pos += atomGroup.size();
110         }
111     }
112
113     return home_pos;
114 }
115
116 static int compact_and_copy_vec_cg(int ncg, int *move,
117                                    const gmx::RangePartitioning &atomGroups,
118                                    int nvec, rvec *src, gmx_domdec_comm_t *comm,
119                                    gmx_bool bCompact)
120 {
121     int home_pos       = 0;
122     int pos_vec[DIM*2] = { 0 };
123
124     for (int g = 0; g < ncg; g++)
125     {
126         const auto atomGroup = atomGroups.block(g);
127         int        m         = move[g];
128         if (m == -1)
129         {
130             if (bCompact)
131             {
132                 /* Compact the home array in place */
133                 copy_rvec(src[g], src[home_pos++]);
134             }
135         }
136         else
137         {
138             /* Copy to the communication buffer */
139             copy_rvec(src[g], comm->cgcm_state[m][pos_vec[m]]);
140             pos_vec[m] += 1 + atomGroup.size()*nvec;
141         }
142     }
143     if (!bCompact)
144     {
145         home_pos = ncg;
146     }
147
148     return home_pos;
149 }
150
151 static int compact_ind(int                     numAtomGroups,
152                        const int              *move,
153                        gmx::ArrayRef<int>      globalAtomGroupIndices,
154                        gmx::RangePartitioning *atomGroups,
155                        gmx::ArrayRef<int>      globalAtomIndices,
156                        gmx_ga2la_t            *ga2la,
157                        char                   *bLocalCG,
158                        int                    *cginfo)
159 {
160     atomGroups->clear();
161     int home_pos = 0;
162     int nat      = 0;
163     for (int g = 0; g < numAtomGroups; g++)
164     {
165         if (move[g] == -1)
166         {
167             /* Compact the home arrays in place.
168              * Anything that can be done here avoids access to global arrays.
169              */
170             atomGroups->appendBlock(nat);
171             for (int a : atomGroups->block(g))
172             {
173                 const int a_gl         = globalAtomIndices[a];
174                 globalAtomIndices[nat] = a_gl;
175                 /* The cell number stays 0, so we don't need to set it */
176                 ga2la_change_la(ga2la, a_gl, nat);
177                 nat++;
178             }
179             globalAtomGroupIndices[home_pos] = globalAtomGroupIndices[g];
180             cginfo[home_pos]                 = cginfo[g];
181             /* The charge group remains local, so bLocalCG does not change */
182             home_pos++;
183         }
184         else
185         {
186             /* Clear the global indices */
187             for (int a : atomGroups->block(g))
188             {
189                 ga2la_del(ga2la, globalAtomIndices[a]);
190             }
191             if (bLocalCG)
192             {
193                 bLocalCG[globalAtomGroupIndices[g]] = FALSE;
194             }
195         }
196     }
197
198     GMX_ASSERT(atomGroups->numBlocks() == home_pos, "The atom group data and atomGroups should be consistent");
199
200     return home_pos;
201 }
202
203 static void clear_and_mark_ind(int                           numAtomGroups,
204                                const int                    *move,
205                                gmx::ArrayRef<const int>      globalAtomGroupIndices,
206                                const gmx::RangePartitioning &atomGroups,
207                                gmx::ArrayRef<const int>      globalAtomIndices,
208                                gmx_ga2la_t                  *ga2la,
209                                char                         *bLocalCG,
210                                int                          *cell_index)
211 {
212     for (int g = 0; g < numAtomGroups; g++)
213     {
214         if (move[g] >= 0)
215         {
216             /* Clear the global indices */
217             for (int a : atomGroups.block(g))
218             {
219                 ga2la_del(ga2la, globalAtomIndices[a]);
220             }
221             if (bLocalCG)
222             {
223                 bLocalCG[globalAtomGroupIndices[g]] = FALSE;
224             }
225             /* Signal that this group has moved using the ns cell index.
226              * Here we set it to -1. fill_grid will change it
227              * from -1 to NSGRID_SIGNAL_MOVED_FAC*grid->ncells.
228              */
229             cell_index[g] = -1;
230         }
231     }
232 }
233
234 static void print_cg_move(FILE *fplog,
235                           gmx_domdec_t *dd,
236                           gmx_int64_t step, int cg, int dim, int dir,
237                           gmx_bool bHaveCgcmOld, real limitd,
238                           rvec cm_old, rvec cm_new, real pos_d)
239 {
240     gmx_domdec_comm_t *comm;
241     char               buf[22];
242
243     comm = dd->comm;
244
245     fprintf(fplog, "\nStep %s:\n", gmx_step_str(step, buf));
246     if (limitd > 0)
247     {
248         fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition (%f) in direction %c\n",
249                 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
250                 ddglatnr(dd, dd->atomGrouping().block(cg).begin()), limitd, dim2char(dim));
251     }
252     else
253     {
254         /* We don't have a limiting distance available: don't print it */
255         fprintf(fplog, "%s %d moved more than the distance allowed by the domain decomposition in direction %c\n",
256                 dd->comm->bCGs ? "The charge group starting at atom" : "Atom",
257                 ddglatnr(dd, dd->atomGrouping().block(cg).begin()), dim2char(dim));
258     }
259     fprintf(fplog, "distance out of cell %f\n",
260             dir == 1 ? pos_d - comm->cell_x1[dim] : pos_d - comm->cell_x0[dim]);
261     if (bHaveCgcmOld)
262     {
263         fprintf(fplog, "Old coordinates: %8.3f %8.3f %8.3f\n",
264                 cm_old[XX], cm_old[YY], cm_old[ZZ]);
265     }
266     fprintf(fplog, "New coordinates: %8.3f %8.3f %8.3f\n",
267             cm_new[XX], cm_new[YY], cm_new[ZZ]);
268     fprintf(fplog, "Old cell boundaries in direction %c: %8.3f %8.3f\n",
269             dim2char(dim),
270             comm->old_cell_x0[dim], comm->old_cell_x1[dim]);
271     fprintf(fplog, "New cell boundaries in direction %c: %8.3f %8.3f\n",
272             dim2char(dim),
273             comm->cell_x0[dim], comm->cell_x1[dim]);
274 }
275
276 static void cg_move_error(FILE *fplog,
277                           gmx_domdec_t *dd,
278                           gmx_int64_t step, int cg, int dim, int dir,
279                           gmx_bool bHaveCgcmOld, real limitd,
280                           rvec cm_old, rvec cm_new, real pos_d)
281 {
282     if (fplog)
283     {
284         print_cg_move(fplog, dd, step, cg, dim, dir,
285                       bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
286     }
287     print_cg_move(stderr, dd, step, cg, dim, dir,
288                   bHaveCgcmOld, limitd, cm_old, cm_new, pos_d);
289     gmx_fatal(FARGS,
290               "%s moved too far between two domain decomposition steps\n"
291               "This usually means that your system is not well equilibrated",
292               dd->comm->bCGs ? "A charge group" : "An atom");
293 }
294
295 static void rotate_state_atom(t_state *state, int a)
296 {
297     if (state->flags & (1 << estX))
298     {
299         /* Rotate the complete state; for a rectangular box only */
300         state->x[a][YY] = state->box[YY][YY] - state->x[a][YY];
301         state->x[a][ZZ] = state->box[ZZ][ZZ] - state->x[a][ZZ];
302     }
303     if (state->flags & (1 << estV))
304     {
305         state->v[a][YY] = -state->v[a][YY];
306         state->v[a][ZZ] = -state->v[a][ZZ];
307     }
308     if (state->flags & (1 << estCGP))
309     {
310         state->cg_p[a][YY] = -state->cg_p[a][YY];
311         state->cg_p[a][ZZ] = -state->cg_p[a][ZZ];
312     }
313 }
314
315 /* Returns a pointer to a buffer of size \p numAtomsNew with contents 0 from numAtomsOld upwards
316  *
317  * Note: numAtomsOld should either be 0 or match the current buffer size.
318  */
319 static int *getMovedBuffer(gmx_domdec_comm_t *comm,
320                            size_t             numAtomsOld,
321                            size_t             numAtomsNew)
322 {
323     std::vector<int> &movedBuffer = comm->movedBuffer;
324
325     GMX_RELEASE_ASSERT(numAtomsOld == 0 || movedBuffer.size() == numAtomsOld, "numAtomsOld should either be 0 or match the current size");
326
327     /* Contents up to numAtomsOld should be preserved, clear from numAtomsOld */
328     if (numAtomsOld == 0)
329     {
330         movedBuffer.clear();
331     }
332     movedBuffer.resize(numAtomsNew);
333
334     return movedBuffer.data();
335 }
336
337 /* Determine to which domains atomGroups in the range \p cg_start, \p cg_end should go.
338  *
339  * Returns in the move array where the groups should go.
340  * Also updates \p cg_cm for jumps over periodic boundaries.
341  *
342  * \TODO Rename cg to atomGroup.
343  */
344 static void calc_cg_move(FILE *fplog, gmx_int64_t step,
345                          gmx_domdec_t *dd,
346                          t_state *state,
347                          ivec tric_dir, matrix tcm,
348                          rvec cell_x0, rvec cell_x1,
349                          rvec limitd, rvec limit0, rvec limit1,
350                          const gmx::RangePartitioning &atomGroups,
351                          int cg_start, int cg_end,
352                          rvec *cg_cm,
353                          int *move)
354 {
355     const int npbcdim = dd->npbcdim;
356
357     for (int g = cg_start; g < cg_end; g++)
358     {
359         const auto atomGroup = atomGroups.block(g);
360         const int  numAtoms  = atomGroup.size();
361         // TODO: Rename this center of geometry variable to cogNew
362         rvec       cm_new;
363         if (numAtoms == 1)
364         {
365             copy_rvec(state->x[atomGroup.begin()], cm_new);
366         }
367         else
368         {
369             real invNumAtoms = 1/static_cast<real>(numAtoms);
370             clear_rvec(cm_new);
371             for (int k : atomGroup)
372             {
373                 rvec_inc(cm_new, state->x[k]);
374             }
375             for (int d = 0; d < DIM; d++)
376             {
377                 cm_new[d] = invNumAtoms*cm_new[d];
378             }
379         }
380
381         ivec dev = { 0 };
382         /* Do pbc and check DD cell boundary crossings */
383         for (int d = DIM - 1; d >= 0; d--)
384         {
385             if (dd->nc[d] > 1)
386             {
387                 bool bScrew = (dd->bScrewPBC && d == XX);
388                 /* Determine the location of this cg in lattice coordinates */
389                 real pos_d = cm_new[d];
390                 if (tric_dir[d])
391                 {
392                     for (int d2 = d + 1; d2 < DIM; d2++)
393                     {
394                         pos_d += cm_new[d2]*tcm[d2][d];
395                     }
396                 }
397                 /* Put the charge group in the triclinic unit-cell */
398                 if (pos_d >= cell_x1[d])
399                 {
400                     if (pos_d >= limit1[d])
401                     {
402                         cg_move_error(fplog, dd, step, g, d, 1,
403                                       cg_cm != as_rvec_array(state->x.data()), limitd[d],
404                                       cg_cm[g], cm_new, pos_d);
405                     }
406                     dev[d] = 1;
407                     if (dd->ci[d] == dd->nc[d] - 1)
408                     {
409                         rvec_dec(cm_new, state->box[d]);
410                         if (bScrew)
411                         {
412                             cm_new[YY] = state->box[YY][YY] - cm_new[YY];
413                             cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
414                         }
415                         for (int k : atomGroup)
416                         {
417                             rvec_dec(state->x[k], state->box[d]);
418                             if (bScrew)
419                             {
420                                 rotate_state_atom(state, k);
421                             }
422                         }
423                     }
424                 }
425                 else if (pos_d < cell_x0[d])
426                 {
427                     if (pos_d < limit0[d])
428                     {
429                         cg_move_error(fplog, dd, step, g, d, -1,
430                                       cg_cm != as_rvec_array(state->x.data()), limitd[d],
431                                       cg_cm[g], cm_new, pos_d);
432                     }
433                     dev[d] = -1;
434                     if (dd->ci[d] == 0)
435                     {
436                         rvec_inc(cm_new, state->box[d]);
437                         if (bScrew)
438                         {
439                             cm_new[YY] = state->box[YY][YY] - cm_new[YY];
440                             cm_new[ZZ] = state->box[ZZ][ZZ] - cm_new[ZZ];
441                         }
442                         for (int k : atomGroup)
443                         {
444                             rvec_inc(state->x[k], state->box[d]);
445                             if (bScrew)
446                             {
447                                 rotate_state_atom(state, k);
448                             }
449                         }
450                     }
451                 }
452             }
453             else if (d < npbcdim)
454             {
455                 /* Put the charge group in the rectangular unit-cell */
456                 while (cm_new[d] >= state->box[d][d])
457                 {
458                     rvec_dec(cm_new, state->box[d]);
459                     for (int k : atomGroup)
460                     {
461                         rvec_dec(state->x[k], state->box[d]);
462                     }
463                 }
464                 while (cm_new[d] < 0)
465                 {
466                     rvec_inc(cm_new, state->box[d]);
467                     for (int k : atomGroup)
468                     {
469                         rvec_inc(state->x[k], state->box[d]);
470                     }
471                 }
472             }
473         }
474
475         copy_rvec(cm_new, cg_cm[g]);
476
477         /* Determine where this cg should go */
478         int flag = 0;
479         int mc   = -1;
480         for (int d = 0; d < dd->ndim; d++)
481         {
482             int dim = dd->dim[d];
483             if (dev[dim] == 1)
484             {
485                 flag |= DD_FLAG_FW(d);
486                 if (mc == -1)
487                 {
488                     mc = d*2;
489                 }
490             }
491             else if (dev[dim] == -1)
492             {
493                 flag |= DD_FLAG_BW(d);
494                 if (mc == -1)
495                 {
496                     if (dd->nc[dim] > 2)
497                     {
498                         mc = d*2 + 1;
499                     }
500                     else
501                     {
502                         mc = d*2;
503                     }
504                 }
505             }
506         }
507         /* Temporarily store the flag in move */
508         move[g] = mc + flag;
509     }
510 }
511
512 void dd_redistribute_cg(FILE *fplog, gmx_int64_t step,
513                         gmx_domdec_t *dd, ivec tric_dir,
514                         t_state *state, PaddedRVecVector *f,
515                         t_forcerec *fr,
516                         gmx_bool bCompact,
517                         t_nrnb *nrnb,
518                         int *ncg_stay_home,
519                         int *ncg_moved)
520 {
521     int               *move;
522     int                npbcdim;
523     int                ncg[DIM*2] = { 0 }, nat[DIM*2] = { 0 };
524     int                i, cg, d, dim, dim2, dir, d2, d3;
525     int                mc, cdd, nrcg, ncg_recv, nvs, nvec, vec;
526     int                sbuf[2], rbuf[2];
527     int                home_pos_cg, home_pos_at, buf_pos;
528     real               pos_d;
529     matrix             tcm;
530     rvec              *cg_cm = nullptr, cell_x0, cell_x1, limitd, limit0, limit1;
531     cginfo_mb_t       *cginfo_mb;
532     gmx_domdec_comm_t *comm;
533     int               *moved;
534     int                nthread, thread;
535
536     if (dd->bScrewPBC)
537     {
538         check_screw_box(state->box);
539     }
540
541     comm  = dd->comm;
542     if (fr->cutoff_scheme == ecutsGROUP)
543     {
544         cg_cm = fr->cg_cm;
545     }
546
547     // Positions are always present, so there's nothing to flag
548     bool                bV   = state->flags & (1<<estV);
549     bool                bCGP = state->flags & (1<<estCGP);
550
551     DDBufferAccess<int> moveBuffer(comm->intBuffer, dd->globalAtomGroupIndices.size());
552     move = moveBuffer.buffer.data();
553
554     npbcdim = dd->npbcdim;
555
556     for (d = 0; (d < DIM); d++)
557     {
558         limitd[d] = dd->comm->cellsize_min[d];
559         if (d >= npbcdim && dd->ci[d] == 0)
560         {
561             cell_x0[d] = -GMX_FLOAT_MAX;
562         }
563         else
564         {
565             cell_x0[d] = comm->cell_x0[d];
566         }
567         if (d >= npbcdim && dd->ci[d] == dd->nc[d] - 1)
568         {
569             cell_x1[d] = GMX_FLOAT_MAX;
570         }
571         else
572         {
573             cell_x1[d] = comm->cell_x1[d];
574         }
575         if (d < npbcdim)
576         {
577             limit0[d] = comm->old_cell_x0[d] - limitd[d];
578             limit1[d] = comm->old_cell_x1[d] + limitd[d];
579         }
580         else
581         {
582             /* We check after communication if a charge group moved
583              * more than one cell. Set the pre-comm check limit to float_max.
584              */
585             limit0[d] = -GMX_FLOAT_MAX;
586             limit1[d] =  GMX_FLOAT_MAX;
587         }
588     }
589
590     make_tric_corr_matrix(npbcdim, state->box, tcm);
591
592     const gmx::RangePartitioning &atomGrouping = dd->atomGrouping();
593
594     nthread = gmx_omp_nthreads_get(emntDomdec);
595
596     /* Compute the center of geometry for all home charge groups
597      * and put them in the box and determine where they should go.
598      */
599 #pragma omp parallel for num_threads(nthread) schedule(static)
600     for (thread = 0; thread < nthread; thread++)
601     {
602         try
603         {
604             calc_cg_move(fplog, step, dd, state, tric_dir, tcm,
605                          cell_x0, cell_x1, limitd, limit0, limit1,
606                          atomGrouping,
607                          ( thread   *dd->ncg_home)/nthread,
608                          ((thread+1)*dd->ncg_home)/nthread,
609                          fr->cutoff_scheme == ecutsGROUP ? cg_cm : as_rvec_array(state->x.data()),
610                          move);
611         }
612         GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR;
613     }
614
615     for (cg = 0; cg < dd->ncg_home; cg++)
616     {
617         if (move[cg] >= 0)
618         {
619             mc       = move[cg];
620             int flag = mc & ~DD_FLAG_NRCG;
621             mc       = mc & DD_FLAG_NRCG;
622             move[cg] = mc;
623
624             std::vector<int> &cggl_flag = comm->cggl_flag[mc];
625
626             /* TODO: See if we can use push_back instead */
627             if (static_cast<size_t>((ncg[mc] + 1)*DD_CGIBS) > cggl_flag.size())
628             {
629                 cggl_flag.resize((ncg[mc] + 1)*DD_CGIBS);
630             }
631             cggl_flag[ncg[mc]*DD_CGIBS  ] = dd->globalAtomGroupIndices[cg];
632             /* We store the cg size in the lower 16 bits
633              * and the place where the charge group should go
634              * in the next 6 bits. This saves some communication volume.
635              */
636             nrcg = atomGrouping.block(cg).size();
637             cggl_flag[ncg[mc]*DD_CGIBS+1] = nrcg | flag;
638             ncg[mc] += 1;
639             nat[mc] += nrcg;
640         }
641     }
642
643     inc_nrnb(nrnb, eNR_CGCM, comm->atomRanges.numHomeAtoms());
644     inc_nrnb(nrnb, eNR_RESETX, dd->ncg_home);
645
646     *ncg_moved = 0;
647     for (i = 0; i < dd->ndim*2; i++)
648     {
649         *ncg_moved += ncg[i];
650     }
651
652     nvec = 1;
653     if (bV)
654     {
655         nvec++;
656     }
657     if (bCGP)
658     {
659         nvec++;
660     }
661
662     /* Make sure the communication buffers are large enough */
663     for (mc = 0; mc < dd->ndim*2; mc++)
664     {
665         size_t nvr = ncg[mc] + nat[mc]*nvec;
666         if (nvr > comm->cgcm_state[mc].size())
667         {
668             comm->cgcm_state[mc].resize(nvr);
669         }
670     }
671
672     switch (fr->cutoff_scheme)
673     {
674         case ecutsGROUP:
675             /* Recalculating cg_cm might be cheaper than communicating,
676              * but that could give rise to rounding issues.
677              */
678             home_pos_cg =
679                 compact_and_copy_vec_cg(dd->ncg_home, move, dd->atomGrouping(),
680                                         nvec, cg_cm, comm, bCompact);
681             break;
682         case ecutsVERLET:
683             /* Without charge groups we send the moved atom coordinates
684              * over twice. This is so the code below can be used without
685              * many conditionals for both for with and without charge groups.
686              */
687             home_pos_cg =
688                 compact_and_copy_vec_cg(dd->ncg_home, move, dd->atomGrouping(),
689                                         nvec, as_rvec_array(state->x.data()), comm, FALSE);
690             if (bCompact)
691             {
692                 home_pos_cg -= *ncg_moved;
693             }
694             break;
695         default:
696             gmx_incons("unimplemented");
697             home_pos_cg = 0;
698     }
699
700     vec         = 0;
701     home_pos_at =
702         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
703                                 nvec, vec++, as_rvec_array(state->x.data()),
704                                 comm, bCompact);
705     if (bV)
706     {
707         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
708                                 nvec, vec++, as_rvec_array(state->v.data()),
709                                 comm, bCompact);
710     }
711     if (bCGP)
712     {
713         compact_and_copy_vec_at(dd->ncg_home, move, dd->atomGrouping(),
714                                 nvec, vec++, as_rvec_array(state->cg_p.data()),
715                                 comm, bCompact);
716     }
717
718     if (bCompact)
719     {
720         compact_ind(dd->ncg_home, move,
721                     dd->globalAtomGroupIndices, &dd->atomGrouping_, dd->globalAtomIndices,
722                     dd->ga2la, comm->bLocalCG,
723                     fr->cginfo);
724     }
725     else
726     {
727         if (fr->cutoff_scheme == ecutsVERLET)
728         {
729             moved = getMovedBuffer(comm, 0, dd->ncg_home);
730         }
731         else
732         {
733             moved = fr->ns->grid->cell_index;
734         }
735
736         clear_and_mark_ind(dd->ncg_home, move,
737                            dd->globalAtomGroupIndices, dd->atomGrouping(), dd->globalAtomIndices,
738                            dd->ga2la, comm->bLocalCG,
739                            moved);
740     }
741
742     /* Now we can remove the excess global atom-group indices from the list */
743     dd->globalAtomGroupIndices.resize(home_pos_cg);
744     dd->atomGrouping_.reduceNumBlocks(home_pos_cg);
745
746     /* We reuse the intBuffer without reacquiring since we are in the same scope */
747     DDBufferAccess<int> &flagBuffer = moveBuffer;
748
749     cginfo_mb = fr->cginfo_mb;
750
751     *ncg_stay_home = home_pos_cg;
752     for (d = 0; d < dd->ndim; d++)
753     {
754         DDBufferAccess<gmx::RVec> rvecBuffer(comm->rvecBuffer, 0);
755
756         dim      = dd->dim[d];
757         ncg_recv = 0;
758         int nvr  = 0;
759         for (dir = 0; dir < (dd->nc[dim] == 2 ? 1 : 2); dir++)
760         {
761             cdd = d*2 + dir;
762             /* Communicate the cg and atom counts */
763             sbuf[0] = ncg[cdd];
764             sbuf[1] = nat[cdd];
765             if (debug)
766             {
767                 fprintf(debug, "Sending ddim %d dir %d: ncg %d nat %d\n",
768                         d, dir, sbuf[0], sbuf[1]);
769             }
770             ddSendrecv(dd, d, dir, sbuf, 2, rbuf, 2);
771
772             flagBuffer.resize((ncg_recv + rbuf[0])*DD_CGIBS);
773
774             /* Communicate the charge group indices, sizes and flags */
775             ddSendrecv(dd, d, dir,
776                        comm->cggl_flag[cdd].data(), sbuf[0]*DD_CGIBS,
777                        flagBuffer.buffer.data() + ncg_recv*DD_CGIBS, rbuf[0]*DD_CGIBS);
778
779             nvs = ncg[cdd] + nat[cdd]*nvec;
780             i   = rbuf[0]  + rbuf[1] *nvec;
781             rvecBuffer.resize(nvr + i);
782
783             /* Communicate cgcm and state */
784             ddSendrecv(dd, d, dir,
785                        as_rvec_array(comm->cgcm_state[cdd].data()), nvs,
786                        as_rvec_array(rvecBuffer.buffer.data()) + nvr, i);
787             ncg_recv += rbuf[0];
788             nvr      += i;
789         }
790
791         dd_check_alloc_ncg(fr, state, f, home_pos_cg + ncg_recv);
792         if (fr->cutoff_scheme == ecutsGROUP)
793         {
794             /* Here we resize to more than necessary and shrink later */
795             dd_resize_state(state, f, home_pos_at + ncg_recv*MAX_CGCGSIZE);
796         }
797
798         /* Process the received charge groups */
799         buf_pos = 0;
800         for (cg = 0; cg < ncg_recv; cg++)
801         {
802             int   flag = flagBuffer.buffer[cg*DD_CGIBS + 1];
803             rvec &pos  = as_rvec_array(rvecBuffer.buffer.data())[buf_pos];
804
805             if (dim >= npbcdim && dd->nc[dim] > 2)
806             {
807                 rvec &pos = as_rvec_array(rvecBuffer.buffer.data())[buf_pos];
808
809                 /* No pbc in this dim and more than one domain boundary.
810                  * We do a separate check if a charge group didn't move too far.
811                  */
812                 if (((flag & DD_FLAG_FW(d)) &&
813                      pos[dim] > cell_x1[dim]) ||
814                     ((flag & DD_FLAG_BW(d)) &&
815                      pos[dim] < cell_x0[dim]))
816                 {
817                     cg_move_error(fplog, dd, step, cg, dim,
818                                   (flag & DD_FLAG_FW(d)) ? 1 : 0,
819                                   fr->cutoff_scheme == ecutsGROUP, 0,
820                                   pos, pos, pos[dim]);
821                 }
822             }
823
824             mc = -1;
825             if (d < dd->ndim-1)
826             {
827                 /* Check which direction this cg should go */
828                 for (d2 = d+1; (d2 < dd->ndim && mc == -1); d2++)
829                 {
830                     if (isDlbOn(dd->comm))
831                     {
832                         /* The cell boundaries for dimension d2 are not equal
833                          * for each cell row of the lower dimension(s),
834                          * therefore we might need to redetermine where
835                          * this cg should go.
836                          */
837                         dim2 = dd->dim[d2];
838                         /* If this cg crosses the box boundary in dimension d2
839                          * we can use the communicated flag, so we do not
840                          * have to worry about pbc.
841                          */
842                         if (!((dd->ci[dim2] == dd->nc[dim2]-1 &&
843                                (flag & DD_FLAG_FW(d2))) ||
844                               (dd->ci[dim2] == 0 &&
845                                (flag & DD_FLAG_BW(d2)))))
846                         {
847                             /* Clear the two flags for this dimension */
848                             flag &= ~(DD_FLAG_FW(d2) | DD_FLAG_BW(d2));
849                             /* Determine the location of this cg
850                              * in lattice coordinates
851                              */
852                             pos_d = rvecBuffer.buffer[buf_pos][dim2];
853                             if (tric_dir[dim2])
854                             {
855                                 for (d3 = dim2+1; d3 < DIM; d3++)
856                                 {
857                                     pos_d += pos[d3]*tcm[d3][dim2];
858                                 }
859                             }
860
861                             GMX_ASSERT(dim2 >= 0 && dim2 < DIM, "Keep the static analyzer happy");
862
863                             /* Check of we are not at the box edge.
864                              * pbc is only handled in the first step above,
865                              * but this check could move over pbc while
866                              * the first step did not due to different rounding.
867                              */
868                             if (pos_d >= cell_x1[dim2] &&
869                                 dd->ci[dim2] != dd->nc[dim2]-1)
870                             {
871                                 flag |= DD_FLAG_FW(d2);
872                             }
873                             else if (pos_d < cell_x0[dim2] &&
874                                      dd->ci[dim2] != 0)
875                             {
876                                 flag |= DD_FLAG_BW(d2);
877                             }
878                             flagBuffer.buffer[cg*DD_CGIBS + 1] = flag;
879                         }
880                     }
881                     /* Set to which neighboring cell this cg should go */
882                     if (flag & DD_FLAG_FW(d2))
883                     {
884                         mc = d2*2;
885                     }
886                     else if (flag & DD_FLAG_BW(d2))
887                     {
888                         if (dd->nc[dd->dim[d2]] > 2)
889                         {
890                             mc = d2*2+1;
891                         }
892                         else
893                         {
894                             mc = d2*2;
895                         }
896                     }
897                 }
898             }
899
900             nrcg = flag & DD_FLAG_NRCG;
901             if (mc == -1)
902             {
903                 /* Set the global charge group index and size */
904                 const int globalAtomGroupIndex = flagBuffer.buffer[cg*DD_CGIBS];
905                 dd->globalAtomGroupIndices.push_back(globalAtomGroupIndex);
906                 dd->atomGrouping_.appendBlock(nrcg);
907                 /* Copy the state from the buffer */
908                 if (fr->cutoff_scheme == ecutsGROUP)
909                 {
910                     cg_cm = fr->cg_cm;
911                     copy_rvec(pos, cg_cm[home_pos_cg]);
912                 }
913                 buf_pos++;
914
915                 /* Set the cginfo */
916                 fr->cginfo[home_pos_cg] = ddcginfo(cginfo_mb,
917                                                    globalAtomGroupIndex);
918                 if (comm->bLocalCG)
919                 {
920                     comm->bLocalCG[globalAtomGroupIndex] = TRUE;
921                 }
922
923                 rvec *rvecPtr = as_rvec_array(rvecBuffer.buffer.data());
924                 for (i = 0; i < nrcg; i++)
925                 {
926                     copy_rvec(rvecPtr[buf_pos++],
927                               state->x[home_pos_at+i]);
928                 }
929                 if (bV)
930                 {
931                     for (i = 0; i < nrcg; i++)
932                     {
933                         copy_rvec(rvecPtr[buf_pos++],
934                                   state->v[home_pos_at+i]);
935                     }
936                 }
937                 if (bCGP)
938                 {
939                     for (i = 0; i < nrcg; i++)
940                     {
941                         copy_rvec(rvecPtr[buf_pos++],
942                                   state->cg_p[home_pos_at+i]);
943                     }
944                 }
945                 home_pos_cg += 1;
946                 home_pos_at += nrcg;
947             }
948             else
949             {
950                 /* Reallocate the buffers if necessary  */
951                 if (static_cast<size_t>((ncg[mc] + 1)*DD_CGIBS) > comm->cggl_flag[mc].size())
952                 {
953                     comm->cggl_flag[mc].resize((ncg[mc] + 1)*DD_CGIBS);
954                 }
955                 size_t nvr = ncg[mc] + nat[mc]*nvec;
956                 if (nvr + 1 + nrcg*nvec > comm->cgcm_state[mc].size())
957                 {
958                     comm->cgcm_state[mc].resize(nvr + 1 + nrcg*nvec);
959                 }
960                 /* Copy from the receive to the send buffers */
961                 memcpy(comm->cggl_flag[mc].data() + ncg[mc]*DD_CGIBS,
962                        flagBuffer.buffer.data() + cg*DD_CGIBS,
963                        DD_CGIBS*sizeof(int));
964                 memcpy(comm->cgcm_state[mc][nvr],
965                        rvecBuffer.buffer.data() + buf_pos,
966                        (1 + nrcg*nvec)*sizeof(rvec));
967                 buf_pos += 1 + nrcg*nvec;
968                 ncg[mc] += 1;
969                 nat[mc] += nrcg;
970             }
971         }
972     }
973
974     /* With sorting (!bCompact) the indices are now only partially up to date
975      * and ncg_home and nat_home are not the real count, since there are
976      * "holes" in the arrays for the charge groups that moved to neighbors.
977      */
978     if (fr->cutoff_scheme == ecutsVERLET)
979     {
980         /* We need to clear the moved flags for the received atoms,
981          * because the moved buffer will be passed to the nbnxn gridding call.
982          */
983         moved = getMovedBuffer(comm, dd->ncg_home, home_pos_cg);
984
985         for (int i =  dd->ncg_home; i < home_pos_cg; i++)
986         {
987             moved[i] = 0;
988         }
989     }
990
991     dd->ncg_home = home_pos_cg;
992     comm->atomRanges.setEnd(DDAtomRanges::Type::Home, home_pos_at);
993
994     if (fr->cutoff_scheme == ecutsGROUP && !bCompact)
995     {
996         /* We overallocated before, we need to set the right size here */
997         dd_resize_state(state, f, comm->atomRanges.numHomeAtoms());
998     }
999
1000     if (debug)
1001     {
1002         fprintf(debug,
1003                 "Finished repartitioning: cgs moved out %d, new home %d\n",
1004                 *ncg_moved, dd->ncg_home-*ncg_moved);
1005
1006     }
1007 }