2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "pme-gather.h"
42 #include "gromacs/ewald/pme-internal.h"
43 #include "gromacs/ewald/pme-simd.h"
44 #include "gromacs/ewald/pme-spline-work.h"
45 #include "gromacs/math/vec.h"
46 #include "gromacs/utility/smalloc.h"
48 #define DO_FSPLINE(order) \
49 for (ithx = 0; (ithx < order); ithx++) \
51 index_x = (i0+ithx)*pny*pnz; \
55 for (ithy = 0; (ithy < order); ithy++) \
57 index_xy = index_x+(j0+ithy)*pnz; \
62 for (ithz = 0; (ithz < order); ithz++) \
64 gval = grid[index_xy+(k0+ithz)]; \
65 fxy1 += thz[ithz]*gval; \
66 fz1 += dthz[ithz]*gval; \
75 void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
76 gmx_bool bClearF, pme_atomcomm_t *atc,
80 /* sum forces for local particles */
81 int nn, n, ithx, ithy, ithz, i0, j0, k0;
82 int index_x, index_xy;
83 int nx, ny, nz, pnx, pny, pnz;
85 real tx, ty, dx, dy, coefficient;
86 real fx, fy, fz, gval;
88 real *thx, *thy, *thz, *dthx, *dthy, *dthz;
90 real rxx, ryx, ryy, rzx, rzy, rzz;
93 struct pme_spline_work *work;
95 #if defined PME_SIMD4_SPREAD_GATHER && !defined PME_SIMD4_UNALIGNED
96 real thz_buffer[GMX_SIMD4_WIDTH*3], *thz_aligned;
97 real dthz_buffer[GMX_SIMD4_WIDTH*3], *dthz_aligned;
99 thz_aligned = gmx_simd4_align_r(thz_buffer);
100 dthz_aligned = gmx_simd4_align_r(dthz_buffer);
103 work = pme->spline_work;
105 order = pme->pme_order;
106 thx = spline->theta[XX];
107 thy = spline->theta[YY];
108 thz = spline->theta[ZZ];
109 dthx = spline->dtheta[XX];
110 dthy = spline->dtheta[YY];
111 dthz = spline->dtheta[ZZ];
115 pnx = pme->pmegrid_nx;
116 pny = pme->pmegrid_ny;
117 pnz = pme->pmegrid_nz;
119 rxx = pme->recipbox[XX][XX];
120 ryx = pme->recipbox[YY][XX];
121 ryy = pme->recipbox[YY][YY];
122 rzx = pme->recipbox[ZZ][XX];
123 rzy = pme->recipbox[ZZ][YY];
124 rzz = pme->recipbox[ZZ][ZZ];
126 for (nn = 0; nn < spline->n; nn++)
129 coefficient = scale*atc->coefficient[n];
137 if (coefficient != 0)
142 idxptr = atc->idx[n];
149 /* Pointer arithmetic alert, next six statements */
150 thx = spline->theta[XX] + norder;
151 thy = spline->theta[YY] + norder;
152 thz = spline->theta[ZZ] + norder;
153 dthx = spline->dtheta[XX] + norder;
154 dthy = spline->dtheta[YY] + norder;
155 dthz = spline->dtheta[ZZ] + norder;
160 #ifdef PME_SIMD4_SPREAD_GATHER
161 #ifdef PME_SIMD4_UNALIGNED
162 #define PME_GATHER_F_SIMD4_ORDER4
164 #define PME_GATHER_F_SIMD4_ALIGNED
167 #include "gromacs/ewald/pme-simd4.h"
173 #ifdef PME_SIMD4_SPREAD_GATHER
174 #define PME_GATHER_F_SIMD4_ALIGNED
176 #include "gromacs/ewald/pme-simd4.h"
186 atc->f[n][XX] += -coefficient*( fx*nx*rxx );
187 atc->f[n][YY] += -coefficient*( fx*nx*ryx + fy*ny*ryy );
188 atc->f[n][ZZ] += -coefficient*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
191 /* Since the energy and not forces are interpolated
192 * the net force might not be exactly zero.
193 * This can be solved by also interpolating F, but
194 * that comes at a cost.
195 * A better hack is to remove the net force every
196 * step, but that must be done at a higher level
197 * since this routine doesn't see all atoms if running
198 * in parallel. Don't know how important it is? EL 990726
203 real gather_energy_bsplines(struct gmx_pme_t *pme, real *grid,
206 splinedata_t *spline;
207 int n, ithx, ithy, ithz, i0, j0, k0;
208 int index_x, index_xy;
210 real energy, pot, tx, ty, coefficient, gval;
211 real *thx, *thy, *thz;
215 spline = &atc->spline[0];
217 order = pme->pme_order;
220 for (n = 0; (n < atc->n); n++)
222 coefficient = atc->coefficient[n];
224 if (coefficient != 0)
226 idxptr = atc->idx[n];
233 /* Pointer arithmetic alert, next three statements */
234 thx = spline->theta[XX] + norder;
235 thy = spline->theta[YY] + norder;
236 thz = spline->theta[ZZ] + norder;
239 for (ithx = 0; (ithx < order); ithx++)
241 index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
244 for (ithy = 0; (ithy < order); ithy++)
246 index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
249 for (ithz = 0; (ithz < order); ithz++)
251 gval = grid[index_xy+(k0+ithz)];
252 pot += tx*ty*thz[ithz]*gval;
258 energy += pot*coefficient;