New and reorganized documentation
[alexxy/gromacs.git] / src / gromacs / ewald / pme-gather.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5  * Copyright (c) 2001-2004, The GROMACS development team.
6  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8  * and including many others, as listed in the AUTHORS file in the
9  * top-level source directory and at http://www.gromacs.org.
10  *
11  * GROMACS is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public License
13  * as published by the Free Software Foundation; either version 2.1
14  * of the License, or (at your option) any later version.
15  *
16  * GROMACS is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with GROMACS; if not, see
23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
25  *
26  * If you want to redistribute modifications to GROMACS, please
27  * consider that scientific software is very special. Version
28  * control is crucial - bugs must be traceable. We will be happy to
29  * consider code for inclusion in the official distribution, but
30  * derived work must not be called official GROMACS. Details are found
31  * in the README & COPYING files - if they are missing, get the
32  * official version at http://www.gromacs.org.
33  *
34  * To help us fund GROMACS development, we humbly ask that you cite
35  * the research papers on the package. Check out http://www.gromacs.org.
36  */
37
38 #include "gmxpre.h"
39
40 #include "pme-gather.h"
41
42 #include "gromacs/ewald/pme-internal.h"
43 #include "gromacs/ewald/pme-simd.h"
44 #include "gromacs/ewald/pme-spline-work.h"
45 #include "gromacs/math/vec.h"
46 #include "gromacs/utility/smalloc.h"
47
48 #define DO_FSPLINE(order)                      \
49     for (ithx = 0; (ithx < order); ithx++)              \
50     {                                              \
51         index_x = (i0+ithx)*pny*pnz;               \
52         tx      = thx[ithx];                       \
53         dx      = dthx[ithx];                      \
54                                                \
55         for (ithy = 0; (ithy < order); ithy++)          \
56         {                                          \
57             index_xy = index_x+(j0+ithy)*pnz;      \
58             ty       = thy[ithy];                  \
59             dy       = dthy[ithy];                 \
60             fxy1     = fz1 = 0;                    \
61                                                \
62             for (ithz = 0; (ithz < order); ithz++)      \
63             {                                      \
64                 gval  = grid[index_xy+(k0+ithz)];  \
65                 fxy1 += thz[ithz]*gval;            \
66                 fz1  += dthz[ithz]*gval;           \
67             }                                      \
68             fx += dx*ty*fxy1;                      \
69             fy += tx*dy*fxy1;                      \
70             fz += tx*ty*fz1;                       \
71         }                                          \
72     }
73
74
75 void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
76                        gmx_bool bClearF, pme_atomcomm_t *atc,
77                        splinedata_t *spline,
78                        real scale)
79 {
80     /* sum forces for local particles */
81     int                     nn, n, ithx, ithy, ithz, i0, j0, k0;
82     int                     index_x, index_xy;
83     int                     nx, ny, nz, pny, pnz;
84     int                 *   idxptr;
85     real                    tx, ty, dx, dy, coefficient;
86     real                    fx, fy, fz, gval;
87     real                    fxy1, fz1;
88     real                   *thx, *thy, *thz, *dthx, *dthy, *dthz;
89     int                     norder;
90     real                    rxx, ryx, ryy, rzx, rzy, rzz;
91     int                     order;
92
93 #ifdef PME_SIMD4_SPREAD_GATHER
94     // cppcheck-suppress unreadVariable cppcheck seems not to analyze code from pme-simd4.h
95     struct pme_spline_work *work = pme->spline_work;
96 #ifndef PME_SIMD4_UNALIGNED
97     real                    thz_buffer[GMX_SIMD4_WIDTH*3],  *thz_aligned;
98     real                    dthz_buffer[GMX_SIMD4_WIDTH*3], *dthz_aligned;
99
100     thz_aligned  = gmx_simd4_align_r(thz_buffer);
101     dthz_aligned = gmx_simd4_align_r(dthz_buffer);
102 #endif
103 #endif
104
105     order = pme->pme_order;
106     nx    = pme->nkx;
107     ny    = pme->nky;
108     nz    = pme->nkz;
109     pny   = pme->pmegrid_ny;
110     pnz   = pme->pmegrid_nz;
111
112     rxx   = pme->recipbox[XX][XX];
113     ryx   = pme->recipbox[YY][XX];
114     ryy   = pme->recipbox[YY][YY];
115     rzx   = pme->recipbox[ZZ][XX];
116     rzy   = pme->recipbox[ZZ][YY];
117     rzz   = pme->recipbox[ZZ][ZZ];
118
119     for (nn = 0; nn < spline->n; nn++)
120     {
121         n           = spline->ind[nn];
122         coefficient = scale*atc->coefficient[n];
123
124         if (bClearF)
125         {
126             atc->f[n][XX] = 0;
127             atc->f[n][YY] = 0;
128             atc->f[n][ZZ] = 0;
129         }
130         if (coefficient != 0)
131         {
132             fx     = 0;
133             fy     = 0;
134             fz     = 0;
135             idxptr = atc->idx[n];
136             norder = nn*order;
137
138             i0   = idxptr[XX];
139             j0   = idxptr[YY];
140             k0   = idxptr[ZZ];
141
142             /* Pointer arithmetic alert, next six statements */
143             thx  = spline->theta[XX] + norder;
144             thy  = spline->theta[YY] + norder;
145             thz  = spline->theta[ZZ] + norder;
146             dthx = spline->dtheta[XX] + norder;
147             dthy = spline->dtheta[YY] + norder;
148             dthz = spline->dtheta[ZZ] + norder;
149
150             switch (order)
151             {
152                 case 4:
153 #ifdef PME_SIMD4_SPREAD_GATHER
154 #ifdef PME_SIMD4_UNALIGNED
155 #define PME_GATHER_F_SIMD4_ORDER4
156 #else
157 #define PME_GATHER_F_SIMD4_ALIGNED
158 #define PME_ORDER 4
159 #endif
160 #include "gromacs/ewald/pme-simd4.h"
161 #else
162                     DO_FSPLINE(4);
163 #endif
164                     break;
165                 case 5:
166 #ifdef PME_SIMD4_SPREAD_GATHER
167 #define PME_GATHER_F_SIMD4_ALIGNED
168 #define PME_ORDER 5
169 #include "gromacs/ewald/pme-simd4.h"
170 #else
171                     DO_FSPLINE(5);
172 #endif
173                     break;
174                 default:
175                     DO_FSPLINE(order);
176                     break;
177             }
178
179             atc->f[n][XX] += -coefficient*( fx*nx*rxx );
180             atc->f[n][YY] += -coefficient*( fx*nx*ryx + fy*ny*ryy );
181             atc->f[n][ZZ] += -coefficient*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
182         }
183     }
184     /* Since the energy and not forces are interpolated
185      * the net force might not be exactly zero.
186      * This can be solved by also interpolating F, but
187      * that comes at a cost.
188      * A better hack is to remove the net force every
189      * step, but that must be done at a higher level
190      * since this routine doesn't see all atoms if running
191      * in parallel. Don't know how important it is?  EL 990726
192      */
193 }
194
195
196 real gather_energy_bsplines(struct gmx_pme_t *pme, real *grid,
197                             pme_atomcomm_t *atc)
198 {
199     splinedata_t *spline;
200     int     n, ithx, ithy, ithz, i0, j0, k0;
201     int     index_x, index_xy;
202     int *   idxptr;
203     real    energy, pot, tx, ty, coefficient, gval;
204     real    *thx, *thy, *thz;
205     int     norder;
206     int     order;
207
208     spline = &atc->spline[0];
209
210     order = pme->pme_order;
211
212     energy = 0;
213     for (n = 0; (n < atc->n); n++)
214     {
215         coefficient      = atc->coefficient[n];
216
217         if (coefficient != 0)
218         {
219             idxptr = atc->idx[n];
220             norder = n*order;
221
222             i0   = idxptr[XX];
223             j0   = idxptr[YY];
224             k0   = idxptr[ZZ];
225
226             /* Pointer arithmetic alert, next three statements */
227             thx  = spline->theta[XX] + norder;
228             thy  = spline->theta[YY] + norder;
229             thz  = spline->theta[ZZ] + norder;
230
231             pot = 0;
232             for (ithx = 0; (ithx < order); ithx++)
233             {
234                 index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
235                 tx      = thx[ithx];
236
237                 for (ithy = 0; (ithy < order); ithy++)
238                 {
239                     index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
240                     ty       = thy[ithy];
241
242                     for (ithz = 0; (ithz < order); ithz++)
243                     {
244                         gval  = grid[index_xy+(k0+ithz)];
245                         pot  += tx*ty*thz[ithz]*gval;
246                     }
247
248                 }
249             }
250
251             energy += pot*coefficient;
252         }
253     }
254
255     return energy;
256 }