2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 /* TODO find out what this file should be called */
47 #include "gromacs/ewald/pme.h"
48 #include "gromacs/fft/parallel_3dfft.h"
49 #include "gromacs/math/vec.h"
50 #include "gromacs/timing/cyclecounter.h"
51 #include "gromacs/utility/fatalerror.h"
52 #include "gromacs/utility/smalloc.h"
54 #include "pme_internal.h"
57 # include "gromacs/fileio/pdbio.h"
58 # include "gromacs/utility/cstringutil.h"
59 # include "gromacs/utility/futil.h"
64 /* GMX_CACHE_SEP should be a multiple of the SIMD and SIMD4 register size
65 * to preserve alignment.
67 #define GMX_CACHE_SEP 64
69 void gmx_sum_qgrid_dd(gmx_pme_t* pme, real* grid, const int direction)
72 pme_overlap_t* overlap;
73 int send_index0, send_nindex;
74 int recv_index0, recv_nindex;
76 int i, j, k, ix, iy, iz, icnt;
77 int send_id, recv_id, datasize;
79 real * sendptr, *recvptr;
81 /* Start with minor-rank communication. This is a bit of a pain since it is not contiguous */
82 overlap = &pme->overlap[1];
84 for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
86 /* Since we have already (un)wrapped the overlap in the z-dimension,
87 * we only have to communicate 0 to nkz (not pmegrid_nz).
89 if (direction == GMX_SUM_GRID_FORWARD)
91 send_id = overlap->comm_data[ipulse].send_id;
92 recv_id = overlap->comm_data[ipulse].recv_id;
93 send_index0 = overlap->comm_data[ipulse].send_index0;
94 send_nindex = overlap->comm_data[ipulse].send_nindex;
95 recv_index0 = overlap->comm_data[ipulse].recv_index0;
96 recv_nindex = overlap->comm_data[ipulse].recv_nindex;
100 send_id = overlap->comm_data[ipulse].recv_id;
101 recv_id = overlap->comm_data[ipulse].send_id;
102 send_index0 = overlap->comm_data[ipulse].recv_index0;
103 send_nindex = overlap->comm_data[ipulse].recv_nindex;
104 recv_index0 = overlap->comm_data[ipulse].send_index0;
105 recv_nindex = overlap->comm_data[ipulse].send_nindex;
108 /* Copy data to contiguous send buffer */
112 "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
116 pme->pmegrid_start_iy,
117 send_index0 - pme->pmegrid_start_iy,
118 send_index0 - pme->pmegrid_start_iy + send_nindex);
121 for (i = 0; i < pme->pmegrid_nx; i++)
124 for (j = 0; j < send_nindex; j++)
126 iy = j + send_index0 - pme->pmegrid_start_iy;
127 for (k = 0; k < pme->nkz; k++)
130 overlap->sendbuf[icnt++] =
131 grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz];
136 datasize = pme->pmegrid_nx * pme->nkz;
138 MPI_Sendrecv(overlap->sendbuf.data(),
139 send_nindex * datasize,
143 overlap->recvbuf.data(),
144 recv_nindex * datasize,
151 /* Get data from contiguous recv buffer */
155 "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
159 pme->pmegrid_start_iy,
160 recv_index0 - pme->pmegrid_start_iy,
161 recv_index0 - pme->pmegrid_start_iy + recv_nindex);
164 for (i = 0; i < pme->pmegrid_nx; i++)
167 for (j = 0; j < recv_nindex; j++)
169 iy = j + recv_index0 - pme->pmegrid_start_iy;
170 for (k = 0; k < pme->nkz; k++)
173 if (direction == GMX_SUM_GRID_FORWARD)
175 grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz] +=
176 overlap->recvbuf[icnt++];
180 grid[ix * (pme->pmegrid_ny * pme->pmegrid_nz) + iy * (pme->pmegrid_nz) + iz] =
181 overlap->recvbuf[icnt++];
188 /* Major dimension is easier, no copying required,
189 * but we might have to sum to separate array.
190 * Since we don't copy, we have to communicate up to pmegrid_nz,
191 * not nkz as for the minor direction.
193 overlap = &pme->overlap[0];
195 for (size_t ipulse = 0; ipulse < overlap->comm_data.size(); ipulse++)
197 if (direction == GMX_SUM_GRID_FORWARD)
199 send_id = overlap->comm_data[ipulse].send_id;
200 recv_id = overlap->comm_data[ipulse].recv_id;
201 send_index0 = overlap->comm_data[ipulse].send_index0;
202 send_nindex = overlap->comm_data[ipulse].send_nindex;
203 recv_index0 = overlap->comm_data[ipulse].recv_index0;
204 recv_nindex = overlap->comm_data[ipulse].recv_nindex;
205 recvptr = overlap->recvbuf.data();
209 send_id = overlap->comm_data[ipulse].recv_id;
210 recv_id = overlap->comm_data[ipulse].send_id;
211 send_index0 = overlap->comm_data[ipulse].recv_index0;
212 send_nindex = overlap->comm_data[ipulse].recv_nindex;
213 recv_index0 = overlap->comm_data[ipulse].send_index0;
214 recv_nindex = overlap->comm_data[ipulse].send_nindex;
215 recvptr = grid + (recv_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
218 sendptr = grid + (send_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
219 datasize = pme->pmegrid_ny * pme->pmegrid_nz;
224 "PME send rank %d %d -> %d grid start %d Communicating %d to %d\n",
228 pme->pmegrid_start_ix,
229 send_index0 - pme->pmegrid_start_ix,
230 send_index0 - pme->pmegrid_start_ix + send_nindex);
232 "PME recv rank %d %d <- %d grid start %d Communicating %d to %d\n",
236 pme->pmegrid_start_ix,
237 recv_index0 - pme->pmegrid_start_ix,
238 recv_index0 - pme->pmegrid_start_ix + recv_nindex);
241 MPI_Sendrecv(sendptr,
242 send_nindex * datasize,
247 recv_nindex * datasize,
254 /* ADD data from contiguous recv buffer */
255 if (direction == GMX_SUM_GRID_FORWARD)
257 p = grid + (recv_index0 - pme->pmegrid_start_ix) * (pme->pmegrid_ny * pme->pmegrid_nz);
258 for (i = 0; i < recv_nindex * datasize; i++)
260 p[i] += overlap->recvbuf[i];
265 GMX_UNUSED_VALUE(pme);
266 GMX_UNUSED_VALUE(grid);
267 GMX_UNUSED_VALUE(direction);
269 GMX_RELEASE_ASSERT(false, "gmx_sum_qgrid_dd() should not be called without MPI");
274 int copy_pmegrid_to_fftgrid(const gmx_pme_t* pme, const real* pmegrid, real* fftgrid, int grid_index)
276 ivec local_fft_ndata, local_fft_offset, local_fft_size;
281 /* Dimensions should be identical for A/B grid, so we just use A here */
282 gmx_parallel_3dfft_real_limits(
283 pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset, local_fft_size);
285 local_pme_size[0] = pme->pmegrid_nx;
286 local_pme_size[1] = pme->pmegrid_ny;
287 local_pme_size[2] = pme->pmegrid_nz;
289 /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
290 the offset is identical, and the PME grid always has more data (due to overlap)
297 sprintf(fn, "pmegrid%d.pdb", pme->nodeid);
298 fp = gmx_ffopen(fn, "w");
299 sprintf(fn, "pmegrid%d.txt", pme->nodeid);
300 fp2 = gmx_ffopen(fn, "w");
303 for (ix = 0; ix < local_fft_ndata[XX]; ix++)
305 for (iy = 0; iy < local_fft_ndata[YY]; iy++)
307 for (iz = 0; iz < local_fft_ndata[ZZ]; iz++)
309 pmeidx = ix * (local_pme_size[YY] * local_pme_size[ZZ])
310 + iy * (local_pme_size[ZZ]) + iz;
311 fftidx = ix * (local_fft_size[YY] * local_fft_size[ZZ])
312 + iy * (local_fft_size[ZZ]) + iz;
313 fftgrid[fftidx] = pmegrid[pmeidx];
315 val = 100 * pmegrid[pmeidx];
316 if (pmegrid[pmeidx] != 0)
318 gmx_fprintf_pdb_atomline(fp,
334 if (pmegrid[pmeidx] != 0)
337 "%-12s %5d %5d %5d %12.5e\n",
339 pme->pmegrid_start_ix + ix,
340 pme->pmegrid_start_iy + iy,
341 pme->pmegrid_start_iz + iz,
357 #ifdef PME_TIME_THREADS
358 static gmx_cycles_t omp_cyc_start()
360 return gmx_cycles_read();
363 static gmx_cycles_t omp_cyc_end(gmx_cycles_t c)
365 return gmx_cycles_read() - c;
370 int copy_fftgrid_to_pmegrid(struct gmx_pme_t* pme, const real* fftgrid, real* pmegrid, int grid_index, int nthread, int thread)
372 ivec local_fft_ndata, local_fft_offset, local_fft_size;
374 int ixy0, ixy1, ixy, ix, iy, iz;
376 #ifdef PME_TIME_THREADS
378 static double cs1 = 0;
382 #ifdef PME_TIME_THREADS
383 c1 = omp_cyc_start();
385 /* Dimensions should be identical for A/B grid, so we just use A here */
386 gmx_parallel_3dfft_real_limits(
387 pme->pfft_setup[grid_index], local_fft_ndata, local_fft_offset, local_fft_size);
389 local_pme_size[0] = pme->pmegrid_nx;
390 local_pme_size[1] = pme->pmegrid_ny;
391 local_pme_size[2] = pme->pmegrid_nz;
393 /* The fftgrid is always 'justified' to the lower-left corner of the PME grid,
394 the offset is identical, and the PME grid always has more data (due to overlap)
396 ixy0 = ((thread)*local_fft_ndata[XX] * local_fft_ndata[YY]) / nthread;
397 ixy1 = ((thread + 1) * local_fft_ndata[XX] * local_fft_ndata[YY]) / nthread;
399 for (ixy = ixy0; ixy < ixy1; ixy++)
401 ix = ixy / local_fft_ndata[YY];
402 iy = ixy - ix * local_fft_ndata[YY];
404 pmeidx = (ix * local_pme_size[YY] + iy) * local_pme_size[ZZ];
405 fftidx = (ix * local_fft_size[YY] + iy) * local_fft_size[ZZ];
406 for (iz = 0; iz < local_fft_ndata[ZZ]; iz++)
408 pmegrid[pmeidx + iz] = fftgrid[fftidx + iz];
412 #ifdef PME_TIME_THREADS
413 c1 = omp_cyc_end(c1);
418 printf("copy %.2f\n", cs1 * 1e-9);
426 void wrap_periodic_pmegrid(const gmx_pme_t* pme, real* pmegrid)
428 int nx, ny, nz, pny, pnz, ny_x, overlap, ix, iy, iz;
434 pny = pme->pmegrid_ny;
435 pnz = pme->pmegrid_nz;
437 overlap = pme->pme_order - 1;
439 /* Add periodic overlap in z */
440 for (ix = 0; ix < pme->pmegrid_nx; ix++)
442 for (iy = 0; iy < pme->pmegrid_ny; iy++)
444 for (iz = 0; iz < overlap; iz++)
446 pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[(ix * pny + iy) * pnz + nz + iz];
451 if (pme->nnodes_minor == 1)
453 for (ix = 0; ix < pme->pmegrid_nx; ix++)
455 for (iy = 0; iy < overlap; iy++)
457 for (iz = 0; iz < nz; iz++)
459 pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[(ix * pny + ny + iy) * pnz + iz];
465 if (pme->nnodes_major == 1)
467 ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
469 for (ix = 0; ix < overlap; ix++)
471 for (iy = 0; iy < ny_x; iy++)
473 for (iz = 0; iz < nz; iz++)
475 pmegrid[(ix * pny + iy) * pnz + iz] += pmegrid[((nx + ix) * pny + iy) * pnz + iz];
483 void unwrap_periodic_pmegrid(struct gmx_pme_t* pme, real* pmegrid)
485 int nx, ny, nz, pny, pnz, ny_x, overlap, ix;
491 pny = pme->pmegrid_ny;
492 pnz = pme->pmegrid_nz;
494 overlap = pme->pme_order - 1;
496 if (pme->nnodes_major == 1)
498 ny_x = (pme->nnodes_minor == 1 ? ny : pme->pmegrid_ny);
500 for (ix = 0; ix < overlap; ix++)
504 for (iy = 0; iy < ny_x; iy++)
506 for (iz = 0; iz < nz; iz++)
508 pmegrid[((nx + ix) * pny + iy) * pnz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
514 if (pme->nnodes_minor == 1)
516 #pragma omp parallel for num_threads(pme->nthread) schedule(static)
517 for (ix = 0; ix < pme->pmegrid_nx; ix++)
519 // Trivial OpenMP region that does not throw, no need for try/catch
522 for (iy = 0; iy < overlap; iy++)
524 for (iz = 0; iz < nz; iz++)
526 pmegrid[(ix * pny + ny + iy) * pnz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
532 /* Copy periodic overlap in z */
533 #pragma omp parallel for num_threads(pme->nthread) schedule(static)
534 for (ix = 0; ix < pme->pmegrid_nx; ix++)
536 // Trivial OpenMP region that does not throw, no need for try/catch
539 for (iy = 0; iy < pme->pmegrid_ny; iy++)
541 for (iz = 0; iz < overlap; iz++)
543 pmegrid[(ix * pny + iy) * pnz + nz + iz] = pmegrid[(ix * pny + iy) * pnz + iz];
549 void set_grid_alignment(int gmx_unused* pmegrid_nz, int gmx_unused pme_order)
551 #ifdef PME_SIMD4_SPREAD_GATHER
553 # if !PME_4NSIMD_GATHER
558 /* Round nz up to a multiple of 4 to ensure alignment */
559 *pmegrid_nz = ((*pmegrid_nz + 3) & ~3);
564 static void set_gridsize_alignment(int gmx_unused* gridsize, int gmx_unused pme_order)
566 #ifdef PME_SIMD4_SPREAD_GATHER
567 # if !PME_4NSIMD_GATHER
570 /* Add extra elements to ensured aligned operations do not go
571 * beyond the allocated grid size.
572 * Note that for pme_order=5, the pme grid z-size alignment
573 * ensures that we will not go beyond the grid size.
581 void pmegrid_init(pmegrid_t* grid,
591 gmx_bool set_alignment,
600 grid->offset[XX] = x0;
601 grid->offset[YY] = y0;
602 grid->offset[ZZ] = z0;
603 grid->n[XX] = x1 - x0 + pme_order - 1;
604 grid->n[YY] = y1 - y0 + pme_order - 1;
605 grid->n[ZZ] = z1 - z0 + pme_order - 1;
606 copy_ivec(grid->n, grid->s);
609 set_grid_alignment(&nz, pme_order);
614 else if (nz != grid->s[ZZ])
616 gmx_incons("pmegrid_init call with an unaligned z size");
619 grid->order = pme_order;
622 gridsize = grid->s[XX] * grid->s[YY] * grid->s[ZZ];
623 set_gridsize_alignment(&gridsize, pme_order);
624 snew_aligned(grid->grid, gridsize, SIMD4_ALIGNMENT);
632 static int div_round_up(int enumerator, int denominator)
634 return (enumerator + denominator - 1) / denominator;
637 static void make_subgrid_division(const ivec n, int ovl, int nthread, ivec nsub)
639 int gsize_opt, gsize;
644 for (nsx = 1; nsx <= nthread; nsx++)
646 if (nthread % nsx == 0)
648 for (nsy = 1; nsy <= nthread; nsy++)
650 if (nsx * nsy <= nthread && nthread % (nsx * nsy) == 0)
652 nsz = nthread / (nsx * nsy);
654 /* Determine the number of grid points per thread */
655 gsize = (div_round_up(n[XX], nsx) + ovl) * (div_round_up(n[YY], nsy) + ovl)
656 * (div_round_up(n[ZZ], nsz) + ovl);
658 /* Minimize the number of grids points per thread
659 * and, secondarily, the number of cuts in minor dimensions.
661 if (gsize_opt == -1 || gsize < gsize_opt
662 || (gsize == gsize_opt && (nsz < nsub[ZZ] || (nsz == nsub[ZZ] && nsy < nsub[YY]))))
674 env = getenv("GMX_PME_THREAD_DIVISION");
677 sscanf(env, "%20d %20d %20d", &nsub[XX], &nsub[YY], &nsub[ZZ]);
680 if (nsub[XX] * nsub[YY] * nsub[ZZ] != nthread)
683 "PME grid thread division (%d x %d x %d) does not match the total number of "
692 void pmegrids_init(pmegrids_t* grids,
698 gmx_bool bUseThreads,
704 int t, x, y, z, d, i, tfac;
705 int max_comm_lines = -1;
707 n[XX] = nx - (pme_order - 1);
708 n[YY] = ny - (pme_order - 1);
709 n[ZZ] = nz - (pme_order - 1);
711 copy_ivec(n, n_base);
712 n_base[ZZ] = nz_base;
714 pmegrid_init(&grids->grid, 0, 0, 0, 0, 0, 0, n[XX], n[YY], n[ZZ], FALSE, pme_order, nullptr);
716 grids->nthread = nthread;
718 make_subgrid_division(n_base, pme_order - 1, grids->nthread, grids->nc);
725 for (d = 0; d < DIM; d++)
727 nst[d] = div_round_up(n[d], grids->nc[d]) + pme_order - 1;
729 set_grid_alignment(&nst[ZZ], pme_order);
734 "pmegrid thread local division: %d x %d x %d\n",
738 fprintf(debug, "pmegrid %d %d %d max thread pmegrid %d %d %d\n", nx, ny, nz, nst[XX], nst[YY], nst[ZZ]);
741 snew(grids->grid_th, grids->nthread);
743 gridsize = nst[XX] * nst[YY] * nst[ZZ];
744 set_gridsize_alignment(&gridsize, pme_order);
745 snew_aligned(grids->grid_all,
746 grids->nthread * gridsize + (grids->nthread + 1) * GMX_CACHE_SEP,
749 for (x = 0; x < grids->nc[XX]; x++)
751 for (y = 0; y < grids->nc[YY]; y++)
753 for (z = 0; z < grids->nc[ZZ]; z++)
755 pmegrid_init(&grids->grid_th[t],
759 (n[XX] * (x)) / grids->nc[XX],
760 (n[YY] * (y)) / grids->nc[YY],
761 (n[ZZ] * (z)) / grids->nc[ZZ],
762 (n[XX] * (x + 1)) / grids->nc[XX],
763 (n[YY] * (y + 1)) / grids->nc[YY],
764 (n[ZZ] * (z + 1)) / grids->nc[ZZ],
767 grids->grid_all + GMX_CACHE_SEP + t * (gridsize + GMX_CACHE_SEP));
775 grids->grid_th = nullptr;
779 for (d = DIM - 1; d >= 0; d--)
781 snew(grids->g2t[d], n[d]);
783 for (i = 0; i < n[d]; i++)
785 /* The second check should match the parameters
786 * of the pmegrid_init call above.
788 while (t + 1 < grids->nc[d] && i >= (n[d] * (t + 1)) / grids->nc[d])
792 grids->g2t[d][i] = t * tfac;
795 tfac *= grids->nc[d];
799 case XX: max_comm_lines = overlap_x; break;
800 case YY: max_comm_lines = overlap_y; break;
801 case ZZ: max_comm_lines = pme_order - 1; break;
803 grids->nthread_comm[d] = 0;
804 while ((n[d] * grids->nthread_comm[d]) / grids->nc[d] < max_comm_lines
805 && grids->nthread_comm[d] < grids->nc[d])
807 grids->nthread_comm[d]++;
809 if (debug != nullptr)
812 "pmegrid thread grid communication range in %c: %d\n",
814 grids->nthread_comm[d]);
816 /* It should be possible to make grids->nthread_comm[d]==grids->nc[d]
817 * work, but this is not a problematic restriction.
819 if (grids->nc[d] > 1 && grids->nthread_comm[d] > grids->nc[d])
822 "Too many threads for PME (%d) compared to the number of grid lines, reduce "
823 "the number of threads doing PME",
829 void pmegrids_destroy(pmegrids_t* grids)
831 if (grids->grid.grid != nullptr)
833 sfree_aligned(grids->grid.grid);
835 if (grids->nthread > 0)
837 sfree_aligned(grids->grid_all);
838 sfree(grids->grid_th);
840 for (int d = 0; d < DIM; d++)
842 sfree(grids->g2t[d]);
847 void make_gridindex_to_localindex(int n, int local_start, int local_range, int** global_to_local, real** fraction_shift)
849 /* Here we construct array for looking up the grid line index and
850 * fraction for particles. This is done because it is slighlty
851 * faster than the modulo operation and to because we need to take
852 * care of rounding issues, see below.
853 * We use an array size of c_pmeNeighborUnitcellCount times the grid size
854 * to allow for particles to be out of the triclinic unit-cell.
856 const int arraySize = c_pmeNeighborUnitcellCount * n;
860 snew(gtl, arraySize);
861 snew(fsh, arraySize);
863 for (int i = 0; i < arraySize; i++)
865 /* Transform global grid index to the local grid index.
866 * Our local grid always runs from 0 to local_range-1.
868 gtl[i] = (i - local_start + n) % n;
869 /* For coordinates that fall within the local grid the fraction
870 * is correct, we don't need to shift it.
873 /* Check if we are using domain decomposition for PME */
876 /* Due to rounding issues i could be 1 beyond the lower or
877 * upper boundary of the local grid. Correct the index for this.
878 * If we shift the index, we need to shift the fraction by
879 * the same amount in the other direction to not affect
881 * Note that due to this shifting the weights at the end of
882 * the spline might change, but that will only involve values
883 * between zero and values close to the precision of a real,
884 * which is anyhow the accuracy of the whole mesh calculation.
888 /* When this i is used, we should round the local index up */
892 else if (gtl[i] == local_range && local_range > 0)
894 /* When this i is used, we should round the local index down */
895 gtl[i] = local_range - 1;
901 *global_to_local = gtl;
902 *fraction_shift = fsh;
905 void reuse_pmegrids(const pmegrids_t* oldgrid, pmegrids_t* newgrid)
909 for (d = 0; d < DIM; d++)
911 if (newgrid->grid.n[d] > oldgrid->grid.n[d])
917 sfree_aligned(newgrid->grid.grid);
918 newgrid->grid.grid = oldgrid->grid.grid;
920 if (newgrid->grid_th != nullptr && newgrid->nthread == oldgrid->nthread)
922 sfree_aligned(newgrid->grid_all);
923 newgrid->grid_all = oldgrid->grid_all;
924 for (t = 0; t < newgrid->nthread; t++)
926 newgrid->grid_th[t].grid = oldgrid->grid_th[t].grid;