src/gromacs/ewald/pme-gather.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2017, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37
  38 #include "gmxpre.h"
  39
  40 #include "gromacs/math/vec.h"
  41 #include "gromacs/simd/simd.h"
  42 #include "gromacs/utility/basedefinitions.h"
  43 #include "gromacs/utility/smalloc.h"
  44
  45 #include "pme-internal.h"
  46 #include "pme-simd.h"
  47 #include "pme-spline-work.h"
  48
  49 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
  50
  51 #define DO_FSPLINE(order)                      \
  52     for (int ithx = 0; (ithx < order); ithx++)              \
  53     {                                              \
  54         const int  index_x = (i0+ithx)*pny*pnz;               \
  55         const real tx      = thx[ithx];                             \
  56         const real dx      = dthx[ithx];                                      \
  57                                                \
  58         for (int ithy = 0; (ithy < order); ithy++)          \
  59         {                                          \
  60             const int  index_xy = index_x+(j0+ithy)*pnz;      \
  61             const real ty       = thy[ithy];                            \
  62             const real dy       = dthy[ithy];                                     \
  63             real       fxy1     = 0, fz1 = 0;                                 \
  64                                                \
  65             for (int ithz = 0; (ithz < order); ithz++)      \
  66             {                                      \
  67                 const real gval  = grid[index_xy+(k0+ithz)];  \
  68                 fxy1 += thz[ithz]*gval;            \
  69                 fz1  += dthz[ithz]*gval;           \
  70             }                                      \
  71             fx += dx*ty*fxy1;                      \
  72             fy += tx*dy*fxy1;                      \
  73             fz += tx*ty*fz1;                       \
  74         }                                          \
  75     }
  76
  77
  78 void gather_f_bsplines(struct gmx_pme_t *pme, real *grid,
  79                        gmx_bool bClearF, pme_atomcomm_t *atc,
  80                        splinedata_t *spline,
  81                        real scale)
  82 {
  83     /* sum forces for local particles */
  84
  85 #ifdef PME_SIMD4_SPREAD_GATHER
  86     // cppcheck-suppress unreadVariable cppcheck seems not to analyze code from pme-simd4.h
  87     struct pme_spline_work *work = pme->spline_work;
  88 #ifndef PME_SIMD4_UNALIGNED
  89     GMX_ALIGNED(real, GMX_SIMD4_WIDTH)  thz_aligned[GMX_SIMD4_WIDTH*2];
  90     GMX_ALIGNED(real, GMX_SIMD4_WIDTH)  dthz_aligned[GMX_SIMD4_WIDTH*2];
  91 #endif
  92 #endif
  93
  94     const int  order = pme->pme_order;
  95     const int  nx    = pme->nkx;
  96     const int  ny    = pme->nky;
  97     const int  nz    = pme->nkz;
  98     const int  pny   = pme->pmegrid_ny;
  99     const int  pnz   = pme->pmegrid_nz;
 100
 101     const real rxx   = pme->recipbox[XX][XX];
 102     const real ryx   = pme->recipbox[YY][XX];
 103     const real ryy   = pme->recipbox[YY][YY];
 104     const real rzx   = pme->recipbox[ZZ][XX];
 105     const real rzy   = pme->recipbox[ZZ][YY];
 106     const real rzz   = pme->recipbox[ZZ][ZZ];
 107
 108     for (int nn = 0; nn < spline->n; nn++)
 109     {
 110         const int  n           = spline->ind[nn];
 111         const real coefficient = scale*atc->coefficient[n];
 112
 113         if (bClearF)
 114         {
 115             atc->f[n][XX] = 0;
 116             atc->f[n][YY] = 0;
 117             atc->f[n][ZZ] = 0;
 118         }
 119         if (coefficient != 0)
 120         {
 121             real       fx     = 0;
 122             real       fy     = 0;
 123             real       fz     = 0;
 124             const int* idxptr = atc->idx[n];
 125             const int  norder = nn*order;
 126
 127             const int  i0   = idxptr[XX];
 128             const int  j0   = idxptr[YY];
 129             const int  k0   = idxptr[ZZ];
 130
 131             /* Pointer arithmetic alert, next six statements */
 132             const real* thx  = spline->theta[XX] + norder;
 133             const real* thy  = spline->theta[YY] + norder;
 134             const real* thz  = spline->theta[ZZ] + norder;
 135             const real* dthx = spline->dtheta[XX] + norder;
 136             const real* dthy = spline->dtheta[YY] + norder;
 137             const real* dthz = spline->dtheta[ZZ] + norder;
 138
 139             switch (order)
 140             {
 141                 case 4:
 142 #ifdef PME_SIMD4_SPREAD_GATHER
 143 #ifdef PME_SIMD4_UNALIGNED
 144 #define PME_GATHER_F_SIMD4_ORDER4
 145 #else
 146 #define PME_GATHER_F_SIMD4_ALIGNED
 147 #define PME_ORDER 4
 148 #endif
 149 #include "pme-simd4.h"
 150 #else
 151                     DO_FSPLINE(4);
 152 #endif
 153                     break;
 154                 case 5:
 155 #ifdef PME_SIMD4_SPREAD_GATHER
 156 #define PME_GATHER_F_SIMD4_ALIGNED
 157 #define PME_ORDER 5
 158 #include "pme-simd4.h"
 159 #else
 160                     DO_FSPLINE(5);
 161 #endif
 162                     break;
 163                 default:
 164                     DO_FSPLINE(order);
 165                     break;
 166             }
 167
 168             atc->f[n][XX] += -coefficient*( fx*nx*rxx );
 169             atc->f[n][YY] += -coefficient*( fx*nx*ryx + fy*ny*ryy );
 170             atc->f[n][ZZ] += -coefficient*( fx*nx*rzx + fy*ny*rzy + fz*nz*rzz );
 171         }
 172     }
 173     /* Since the energy and not forces are interpolated
 174      * the net force might not be exactly zero.
 175      * This can be solved by also interpolating F, but
 176      * that comes at a cost.
 177      * A better hack is to remove the net force every
 178      * step, but that must be done at a higher level
 179      * since this routine doesn't see all atoms if running
 180      * in parallel. Don't know how important it is?  EL 990726
 181      */
 182 }
 183
 184
 185 real gather_energy_bsplines(struct gmx_pme_t *pme, real *grid,
 186                             pme_atomcomm_t *atc)
 187 {
 188     splinedata_t *spline;
 189     int     n, ithx, ithy, ithz, i0, j0, k0;
 190     int     index_x, index_xy;
 191     int *   idxptr;
 192     real    energy, pot, tx, ty, coefficient, gval;
 193     real    *thx, *thy, *thz;
 194     int     norder;
 195     int     order;
 196
 197     spline = &atc->spline[0];
 198
 199     order = pme->pme_order;
 200
 201     energy = 0;
 202     for (n = 0; (n < atc->n); n++)
 203     {
 204         coefficient      = atc->coefficient[n];
 205
 206         if (coefficient != 0)
 207         {
 208             idxptr = atc->idx[n];
 209             norder = n*order;
 210
 211             i0   = idxptr[XX];
 212             j0   = idxptr[YY];
 213             k0   = idxptr[ZZ];
 214
 215             /* Pointer arithmetic alert, next three statements */
 216             thx  = spline->theta[XX] + norder;
 217             thy  = spline->theta[YY] + norder;
 218             thz  = spline->theta[ZZ] + norder;
 219
 220             pot = 0;
 221             for (ithx = 0; (ithx < order); ithx++)
 222             {
 223                 index_x = (i0+ithx)*pme->pmegrid_ny*pme->pmegrid_nz;
 224                 tx      = thx[ithx];
 225
 226                 for (ithy = 0; (ithy < order); ithy++)
 227                 {
 228                     index_xy = index_x+(j0+ithy)*pme->pmegrid_nz;
 229                     ty       = thy[ithy];
 230
 231                     for (ithz = 0; (ithz < order); ithz++)
 232                     {
 233                         gval  = grid[index_xy+(k0+ithz)];
 234                         pot  += tx*ty*thz[ithz]*gval;
 235                     }
 236
 237                 }
 238             }
 239
 240             energy += pot*coefficient;
 241         }
 242     }
 243
 244     return energy;
 245 }