src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*
  36  * Note: this file was generated by the Verlet kernel generator for
  37  * kernel type 4xn.
  38  */
  39
  40 #ifdef HAVE_CONFIG_H
  41 #include <config.h>
  42 #endif
  43
  44 #include "typedefs.h"
  45
  46 #ifdef GMX_NBNXN_SIMD_4XN
  47
  48 #ifdef GMX_NBNXN_HALF_WIDTH_SIMD
  49 #define GMX_USE_HALF_WIDTH_SIMD_HERE
  50 #endif
  51 #include "gromacs/simd/macros.h"
  52 #include "gromacs/simd/vector_operations.h"
  53
  54 #if !(GMX_SIMD_WIDTH_HERE == 2 || GMX_SIMD_WIDTH_HERE == 4 || GMX_SIMD_WIDTH_HERE == 8)
  55 #error "unsupported SIMD width"
  56 #endif
  57
  58 #define GMX_SIMD_J_UNROLL_SIZE 1
  59 #include "nbnxn_kernel_simd_4xn.h"
  60 #include "../nbnxn_kernel_common.h"
  61 #include "gmx_omp_nthreads.h"
  62 #include "types/force_flags.h"
  63
  64 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
  65  */
  66 enum {
  67     coultRF, coultTAB, coultTAB_TWIN, coultEWALD, coultEWALD_TWIN, coultNR
  68 };
  69
  70 /* Declare and define the kernel function pointer lookup tables. */
  71 static p_nbk_func_ener p_nbk_ener[coultNR][ljcrNR] =
  72 {
  73     {
  74         nbnxn_kernel_simd_4xn_rf_comb_geom_ener,
  75         nbnxn_kernel_simd_4xn_rf_comb_lb_ener,
  76         nbnxn_kernel_simd_4xn_rf_comb_none_ener,
  77     },
  78     {
  79         nbnxn_kernel_simd_4xn_tab_comb_geom_ener,
  80         nbnxn_kernel_simd_4xn_tab_comb_lb_ener,
  81         nbnxn_kernel_simd_4xn_tab_comb_none_ener,
  82     },
  83     {
  84         nbnxn_kernel_simd_4xn_tab_twin_comb_geom_ener,
  85         nbnxn_kernel_simd_4xn_tab_twin_comb_lb_ener,
  86         nbnxn_kernel_simd_4xn_tab_twin_comb_none_ener,
  87     },
  88     {
  89         nbnxn_kernel_simd_4xn_ewald_comb_geom_ener,
  90         nbnxn_kernel_simd_4xn_ewald_comb_lb_ener,
  91         nbnxn_kernel_simd_4xn_ewald_comb_none_ener,
  92     },
  93     {
  94         nbnxn_kernel_simd_4xn_ewald_twin_comb_geom_ener,
  95         nbnxn_kernel_simd_4xn_ewald_twin_comb_lb_ener,
  96         nbnxn_kernel_simd_4xn_ewald_twin_comb_none_ener,
  97     },
  98 };
  99
 100 static p_nbk_func_ener p_nbk_energrp[coultNR][ljcrNR] =
 101 {
 102     {
 103         nbnxn_kernel_simd_4xn_rf_comb_geom_energrp,
 104         nbnxn_kernel_simd_4xn_rf_comb_lb_energrp,
 105         nbnxn_kernel_simd_4xn_rf_comb_none_energrp,
 106     },
 107     {
 108         nbnxn_kernel_simd_4xn_tab_comb_geom_energrp,
 109         nbnxn_kernel_simd_4xn_tab_comb_lb_energrp,
 110         nbnxn_kernel_simd_4xn_tab_comb_none_energrp,
 111     },
 112     {
 113         nbnxn_kernel_simd_4xn_tab_twin_comb_geom_energrp,
 114         nbnxn_kernel_simd_4xn_tab_twin_comb_lb_energrp,
 115         nbnxn_kernel_simd_4xn_tab_twin_comb_none_energrp,
 116     },
 117     {
 118         nbnxn_kernel_simd_4xn_ewald_comb_geom_energrp,
 119         nbnxn_kernel_simd_4xn_ewald_comb_lb_energrp,
 120         nbnxn_kernel_simd_4xn_ewald_comb_none_energrp,
 121     },
 122     {
 123         nbnxn_kernel_simd_4xn_ewald_twin_comb_geom_energrp,
 124         nbnxn_kernel_simd_4xn_ewald_twin_comb_lb_energrp,
 125         nbnxn_kernel_simd_4xn_ewald_twin_comb_none_energrp,
 126     },
 127 };
 128
 129 static p_nbk_func_noener p_nbk_noener[coultNR][ljcrNR] =
 130 {
 131     {
 132         nbnxn_kernel_simd_4xn_rf_comb_geom_noener,
 133         nbnxn_kernel_simd_4xn_rf_comb_lb_noener,
 134         nbnxn_kernel_simd_4xn_rf_comb_none_noener,
 135     },
 136     {
 137         nbnxn_kernel_simd_4xn_tab_comb_geom_noener,
 138         nbnxn_kernel_simd_4xn_tab_comb_lb_noener,
 139         nbnxn_kernel_simd_4xn_tab_comb_none_noener,
 140     },
 141     {
 142         nbnxn_kernel_simd_4xn_tab_twin_comb_geom_noener,
 143         nbnxn_kernel_simd_4xn_tab_twin_comb_lb_noener,
 144         nbnxn_kernel_simd_4xn_tab_twin_comb_none_noener,
 145     },
 146     {
 147         nbnxn_kernel_simd_4xn_ewald_comb_geom_noener,
 148         nbnxn_kernel_simd_4xn_ewald_comb_lb_noener,
 149         nbnxn_kernel_simd_4xn_ewald_comb_none_noener,
 150     },
 151     {
 152         nbnxn_kernel_simd_4xn_ewald_twin_comb_geom_noener,
 153         nbnxn_kernel_simd_4xn_ewald_twin_comb_lb_noener,
 154         nbnxn_kernel_simd_4xn_ewald_twin_comb_none_noener,
 155     },
 156 };
 157
 158
 159 static void
 160 reduce_group_energies(int ng, int ng_2log,
 161                       const real *VSvdw, const real *VSc,
 162                       real *Vvdw, real *Vc)
 163 {
 164     const int unrollj      = GMX_SIMD_WIDTH_HERE/GMX_SIMD_J_UNROLL_SIZE;
 165     const int unrollj_half = unrollj/2;
 166     int       ng_p2, i, j, j0, j1, c, s;
 167
 168     ng_p2 = (1<<ng_2log);
 169
 170     /* The size of the x86 SIMD energy group buffer array is:
 171      * ng*ng*ng_p2*unrollj_half*simd_width
 172      */
 173     for (i = 0; i < ng; i++)
 174     {
 175         for (j = 0; j < ng; j++)
 176         {
 177             Vvdw[i*ng+j] = 0;
 178             Vc[i*ng+j]   = 0;
 179         }
 180
 181         for (j1 = 0; j1 < ng; j1++)
 182         {
 183             for (j0 = 0; j0 < ng; j0++)
 184             {
 185                 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
 186                 for (s = 0; s < unrollj_half; s++)
 187                 {
 188                     Vvdw[i*ng+j0] += VSvdw[c+0];
 189                     Vvdw[i*ng+j1] += VSvdw[c+1];
 190                     Vc  [i*ng+j0] += VSc  [c+0];
 191                     Vc  [i*ng+j1] += VSc  [c+1];
 192                     c             += unrollj + 2;
 193                 }
 194             }
 195         }
 196     }
 197 }
 198
 199 #else /* GMX_NBNXN_SIMD_4XN */
 200
 201 #include "gmx_fatal.h"
 202
 203 #endif /* GMX_NBNXN_SIMD_4XN */
 204
 205 void
 206 nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t      gmx_unused *nbl_list,
 207                       const nbnxn_atomdata_t    gmx_unused *nbat,
 208                       const interaction_const_t gmx_unused *ic,
 209                       int                       gmx_unused  ewald_excl,
 210                       rvec                      gmx_unused *shift_vec,
 211                       int                       gmx_unused  force_flags,
 212                       int                       gmx_unused  clearF,
 213                       real                      gmx_unused *fshift,
 214                       real                      gmx_unused *Vc,
 215                       real                      gmx_unused *Vvdw)
 216 #ifdef GMX_NBNXN_SIMD_4XN
 217 {
 218     int                nnbl;
 219     nbnxn_pairlist_t **nbl;
 220     int                coult;
 221     int                nb;
 222
 223     nnbl = nbl_list->nnbl;
 224     nbl  = nbl_list->nbl;
 225
 226     if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
 227     {
 228         coult = coultRF;
 229     }
 230     else
 231     {
 232         if (ewald_excl == ewaldexclTable)
 233         {
 234             if (ic->rcoulomb == ic->rvdw)
 235             {
 236                 coult = coultTAB;
 237             }
 238             else
 239             {
 240                 coult = coultTAB_TWIN;
 241             }
 242         }
 243         else
 244         {
 245             if (ic->rcoulomb == ic->rvdw)
 246             {
 247                 coult = coultEWALD;
 248             }
 249             else
 250             {
 251                 coult = coultEWALD_TWIN;
 252             }
 253         }
 254     }
 255
 256 #pragma omp parallel for schedule(static) num_threads(gmx_omp_nthreads_get(emntNonbonded))
 257     for (nb = 0; nb < nnbl; nb++)
 258     {
 259         nbnxn_atomdata_output_t *out;
 260         real                    *fshift_p;
 261
 262         out = &nbat->out[nb];
 263
 264         if (clearF == enbvClearFYes)
 265         {
 266             clear_f(nbat, nb, out->f);
 267         }
 268
 269         if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
 270         {
 271             fshift_p = fshift;
 272         }
 273         else
 274         {
 275             fshift_p = out->fshift;
 276
 277             if (clearF == enbvClearFYes)
 278             {
 279                 clear_fshift(fshift_p);
 280             }
 281         }
 282
 283         if (!(force_flags & GMX_FORCE_ENERGY))
 284         {
 285             /* Don't calculate energies */
 286             p_nbk_noener[coult][nbat->comb_rule](nbl[nb], nbat,
 287                                                  ic,
 288                                                  shift_vec,
 289                                                  out->f,
 290                                                  fshift_p);
 291         }
 292         else if (out->nV == 1)
 293         {
 294             /* No energy groups */
 295             out->Vvdw[0] = 0;
 296             out->Vc[0]   = 0;
 297
 298             p_nbk_ener[coult][nbat->comb_rule](nbl[nb], nbat,
 299                                                ic,
 300                                                shift_vec,
 301                                                out->f,
 302                                                fshift_p,
 303                                                out->Vvdw,
 304                                                out->Vc);
 305         }
 306         else
 307         {
 308             /* Calculate energy group contributions */
 309             int i;
 310
 311             for (i = 0; i < out->nVS; i++)
 312             {
 313                 out->VSvdw[i] = 0;
 314             }
 315             for (i = 0; i < out->nVS; i++)
 316             {
 317                 out->VSc[i] = 0;
 318             }
 319
 320             p_nbk_energrp[coult][nbat->comb_rule](nbl[nb], nbat,
 321                                                   ic,
 322                                                   shift_vec,
 323                                                   out->f,
 324                                                   fshift_p,
 325                                                   out->VSvdw,
 326                                                   out->VSc);
 327
 328             reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
 329                                   out->VSvdw, out->VSc,
 330                                   out->Vvdw, out->Vc);
 331         }
 332     }
 333
 334     if (force_flags & GMX_FORCE_ENERGY)
 335     {
 336         reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
 337     }
 338 }
 339 #else
 340 {
 341     gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
 342                " are not enabled.");
 343 }
 344 #endif
 345 #undef GMX_SIMD_J_UNROLL_SIZE