src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  *
  38  * \brief
  39  * CUDA kernel for GPU version of copy_rvec_to_nbat_real.
  40  * Converts coordinate data from rvec to nb format.
  41  *
  42  *  \author Alan Gray <alang@nvidia.com>
  43  *  \author Jon Vincent <jvincent@nvidia.com>
  44  */
  45
  46 #include "gromacs/gpu_utils/vectype_ops.cuh"
  47 #include "gromacs/nbnxm/nbnxm.h"
  48
  49 /*! \brief CUDA kernel for transforming position coordinates from rvec to nbnxm layout.
  50  *
  51  * TODO:
  52  *  - improve/simplify/document use of cxy_na and na_round
  53  *  - rename kernel so naming matches with the other NBNXM kernels;
  54  *  - enable separate compilation unit
  55
  56  * \param[in]     numColumns        extent of cell-level parallelism
  57  * \param[out]    xnb               position buffer in nbnxm layout
  58  * \param[in]     setFillerCoords   tells whether to set the coordinates of the filler particles
  59  * \param[in]     x                 position buffer
  60  * \param[in]     a                 atom index mapping stride between atoms in memory
  61  * \param[in]     cxy_na            array of extents
  62  * \param[in]     cxy_ind           array of cell indices
  63  * \param[in]     cellOffset        first cell
  64  * \param[in]     numAtomsPerCell   number of atoms per cell
  65  */
  66 __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int                         numColumns,
  67                                              float *  __restrict__       xnb,
  68                                              bool                        setFillerCoords,
  69                                              const rvec *  __restrict__  x,
  70                                              const int *  __restrict__   a,
  71                                              const int *  __restrict__   cxy_na,
  72                                              const int *  __restrict__   cxy_ind,
  73                                              int                         cellOffset,
  74                                              int                         numAtomsPerCell);
  75
  76
  77 __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int                         numColumns,
  78                                              float *  __restrict__       xnb,
  79                                              bool                        setFillerCoords,
  80                                              const rvec *  __restrict__  x,
  81                                              const int *  __restrict__   a,
  82                                              const int *  __restrict__   cxy_na,
  83                                              const int *  __restrict__   cxy_ind,
  84                                              int                         cellOffset,
  85                                              int                         numAtomsPerCell)
  86 {
  87
  88
  89     const float farAway = -1000000.0f;
  90
  91     /* map cell-level parallelism to y component of CUDA block index */
  92     int cxy = blockIdx.y;
  93
  94     if (cxy < numColumns)
  95     {
  96
  97         int na = cxy_na[cxy];
  98         int a0 = (cellOffset + cxy_ind[cxy])*numAtomsPerCell;
  99         int na_round;
 100         if (setFillerCoords)
 101         {
 102             // TODO: This can be done more efficiently
 103             na_round =
 104                 (cxy_ind[cxy+1] - cxy_ind[cxy])*numAtomsPerCell;
 105         }
 106         else
 107         {
 108             /* We fill only the real particle locations.
 109              * We assume the filling entries at the end have been
 110              * properly set before during pair-list generation.
 111              */
 112             na_round = na;
 113         }
 114
 115         /* map parallelism within a cell to x component of CUDA block index linearized
 116          * with threads within a block */
 117         int i, j0;
 118         i = blockIdx.x*blockDim.x+threadIdx.x;
 119
 120         j0 = a0*STRIDE_XYZQ;
 121
 122         // destination address where x shoud be stored in nbnxm layout
 123         float3 *x_dest = (float3 *)&xnb[j0 + 4*i];
 124
 125         /* perform conversion of each element */
 126         if (i < na_round)
 127         {
 128             if (i < na)
 129             {
 130                 *x_dest = *((float3 *)x[a[a0 + i]]);
 131             }
 132             else
 133             {
 134                 *x_dest = make_float3(farAway);
 135             }
 136         }
 137     }
 138
 139 }