src/gromacs/ewald/pme-gather.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37
  38 #include "gmxpre.h"
  39
  40 #include "pme-gather.h"
  41
  42 #include "gromacs/ewald/pme-internal.h"
  43 #include "gromacs/ewald/pme-simd.h"
  44 #include "gromacs/ewald/pme-spline-work.h"
  45 #include "gromacs/math/vec.h"
  46 #include "gromacs/utility/smalloc.h"
  47
  48 #define DO_FSPLINE(order)                      \
  49     for (ithx = 0; (ithx < order); ithx++)              \
  50     {                                              \
  51         index_x = (i0+ithx)*pny*pnz;               \
  52         tx      = thx[ithx];                       \
  53         dx      = dthx[ithx];                      \
  54                                                \
  55         for (ithy = 0; (ithy < order); ithy++)          \
  56         {                                          \
  57             index_xy = index_x+(j0+ithy)*pnz;      \
  58             ty       = thy[ithy];                  \
  59             dy       = dthy[ithy];                 \
  60             fxy1     = fz1 = 0;                    \
  61                                                \
  62             for (ithz = 0; (ithz < order); ithz++)      \
  63             {                                      \
  64                 gval  = grid[index_xy+(k0+ithz)];  \
  65                 fxy1 += thz[ithz]*gval;            \
  66                 fz1  += dthz[ithz]*gval;           \
  67             }                                      \
  68             fx += dx*ty*fxy1;                      \
  69             fy += tx*dy*fxy1;                      \
  70             fz += tx*ty*fz1;                       \
  71         }                                          \
  72     }
  73
  74
  75 void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
  76                        gmx_bool bClearF, pme_atomcomm_t *atc,
  77                        splinedata_t *spline,
  78                        real scale)
  79 {
  80     /* sum forces for local particles */
  81     int                     nn, n, ithx, ithy, ithz, i0, j0, k0;
  82     int                     index_x, index_xy;
  83     int                     nx, ny, nz, pnx, pny, pnz;
  84     int                 *   idxptr;
  85     real                    tx, ty, dx, dy, coefficient;
  86     real                    fx, fy, fz, gval;
  87     real                    fxy1, fz1;
  88     real                   *thx, *thy, *thz, *dthx, *dthy, *dthz;
  89     int                     norder;
  90     real                    rxx, ryx, ryy, rzx, rzy, rzz;
  91     int                     order;
  92
  93     struct pme_spline_work *work;
  94
  95 #if defined PME_SIMD4_SPREAD_GATHER && !defined PME_SIMD4_UNALIGNED
  96     real           thz_buffer[GMX_SIMD4_WIDTH*3],  *thz_aligned;
  97     real           dthz_buffer[GMX_SIMD4_WIDTH*3], *dthz_aligned;
  98
  99     thz_aligned  = gmx_simd4_align_r(thz_buffer);
 100     dthz_aligned = gmx_simd4_align_r(dthz_buffer);
 101 #endif
 102
 103     work = pme->spline_work;
 104
 105     order = pme->pme_order;
 106     thx   = spline->theta[XX];
 107     thy   = spline->theta[YY];
 108     thz   = spline->theta[ZZ];
 109     dthx  = spline->dtheta[XX];
 110     dthy  = spline->dtheta[YY];
 111     dthz  = spline->dtheta[ZZ];
 112     nx    = pme->nkx;
 113     ny    = pme->nky;
 114     nz    = pme->nkz;
 115     pnx   = pme->pmegrid_nx;
 116     pny   = pme->pmegrid_ny;
 117     pnz   = pme->pmegrid_nz;
 118
 119     rxx   = pme->recipbox[XX][XX];
 120     ryx   = pme->recipbox[YY][XX];
 121     ryy   = pme->recipbox[YY][YY];
 122     rzx   = pme->recipbox[ZZ][XX];
 123     rzy   = pme->recipbox[ZZ][YY];
 124     rzz   = pme->recipbox[ZZ][ZZ];
 125
 126     for (nn = 0; nn < spline->n; nn++)
 127     {
 128         n           = spline->ind[nn];
 129         coefficient = scale*atc->coefficient[n];
 130
 131         if (bClearF)
 132         {
 133             atc->f[n][XX] = 0;
 134             atc->f[n][YY] = 0;
 135             atc->f[n][ZZ] = 0;
 136         }
 137         if (coefficient != 0)
 138         {
 139             fx     = 0;
 140             fy     = 0;
 141             fz     = 0;
 142             idxptr = atc->idx[n];
 143             norder = nn*order;
 144
 145             i0   = idxptr[XX];
 146             j0   = idxptr[YY];
 147             k0   = idxptr[ZZ];
 148
 149             /* Pointer arithmetic alert, next six statements */
 150             thx  = spline->theta[XX] + norder;
 151             thy  = spline->theta[YY] + norder;
 152             thz  = spline->theta[ZZ] + norder;
 153             dthx = spline->dtheta[XX] + norder;
 154             dthy = spline->dtheta[YY] + norder;
 155             dthz = spline->dtheta[ZZ] + norder;
 156
 157             switch (order)
 158             {
 159                 case 4:
 160 #ifdef PME_SIMD4_SPREAD_GATHER
 161 #ifdef PME_SIMD4_UNALIGNED
 162 #define PME_GATHER_F_SIMD4_ORDER4
 163 #else
 164 #define PME_GATHER_F_SIMD4_ALIGNED
 165 #define PME_ORDER 4
 166 #endif
 167 #include "gromacs/ewald/pme-simd4.h"
 168 #else
 169                     DO_FSPLINE(4);
 170 #endif
 171                     break;
 172                 case 5:
 173 #ifdef PME_SIMD4_SPREAD_GATHER
 174 #define PME_GATHER_F_SIMD4_ALIGNED
 175 #define PME_ORDER 5
 176 #include "gromacs/ewald/pme-simd4.h"
 177 #else
 178                     DO_FSPLINE(5);
 179 #endif
 180                     break;
 181                 default:
 182                     DO_FSPLINE(order);
 183                     break;
 184             }
 185
 186             atc->f[n][XX] += -coefficient*( fx*nx*rxx );
 187             atc->f[n][YY] += -coefficient*( fx*nx*ryx + fy*ny*ryy );
 188             atc->f[n][ZZ] += -coefficient*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
 189         }
 190     }
 191     /* Since the energy and not forces are interpolated
 192      * the net force might not be exactly zero.
 193      * This can be solved by also interpolating F, but
 194      * that comes at a cost.
 195      * A better hack is to remove the net force every
 196      * step, but that must be done at a higher level
 197      * since this routine doesn't see all atoms if running
 198      * in parallel. Don't know how important it is?  EL 990726
 199      */
 200 }
 201
 202
 203 real gather_energy_bsplines(struct gmx_pme_t *pme, real *grid,
 204                             pme_atomcomm_t *atc)
 205 {
 206     splinedata_t *spline;
 207     int     n, ithx, ithy, ithz, i0, j0, k0;
 208     int     index_x, index_xy;
 209     int *   idxptr;
 210     real    energy, pot, tx, ty, coefficient, gval;
 211     real    *thx, *thy, *thz;
 212     int     norder;
 213     int     order;
 214
 215     spline = &atc->spline[0];
 216
 217     order = pme->pme_order;
 218
 219     energy = 0;
 220     for (n = 0; (n < atc->n); n++)
 221     {
 222         coefficient      = atc->coefficient[n];
 223
 224         if (coefficient != 0)
 225         {
 226             idxptr = atc->idx[n];
 227             norder = n*order;
 228
 229             i0   = idxptr[XX];
 230             j0   = idxptr[YY];
 231             k0   = idxptr[ZZ];
 232
 233             /* Pointer arithmetic alert, next three statements */
 234             thx  = spline->theta[XX] + norder;
 235             thy  = spline->theta[YY] + norder;
 236             thz  = spline->theta[ZZ] + norder;
 237
 238             pot = 0;
 239             for (ithx = 0; (ithx < order); ithx++)
 240             {
 241                 index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
 242                 tx      = thx[ithx];
 243
 244                 for (ithy = 0; (ithy < order); ithy++)
 245                 {
 246                     index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
 247                     ty       = thy[ithy];
 248
 249                     for (ithz = 0; (ithz < order); ithz++)
 250                     {
 251                         gval  = grid[index_xy+(k0+ithz)];
 252                         pot  += tx*ty*thz[ithz]*gval;
 253                     }
 254
 255                 }
 256             }
 257
 258             energy += pot*coefficient;
 259         }
 260     }
 261
 262     return energy;
 263 }