X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=blobdiff_plain;f=src%2Fgromacs%2Fewald%2Fpme_spread.cu;h=f5ba3451817b105856e4a4b7e803e5425b8c027e;hb=c088e63019ebc68760d35c3bc916015864aa8e89;hp=d0856602a9e4574e0081a8231f8ec3073e6d2d12;hpb=f59f6ec6e04f4a3732876c2c76250b8444fdcb69;p=alexxy%2Fgromacs.git

diff --git a/src/gromacs/ewald/pme_spread.cu b/src/gromacs/ewald/pme_spread.cu
index d0856602a9..f5ba345181 100644
--- a/src/gromacs/ewald/pme_spread.cu
+++ b/src/gromacs/ewald/pme_spread.cu
@@ -200,7 +200,7 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU
     float  atomCharge;
 
     const int blockIndex      = blockIdx.y * gridDim.x + blockIdx.x;
-    const int atomIndexOffset = blockIndex * atomsPerBlock;
+    const int atomIndexOffset = blockIndex * atomsPerBlock + kernelParams.pipelineAtomStart;
 
     /* Thread index w.r.t. block */
     const int threadLocalId =
@@ -225,8 +225,8 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU
     /* Charges, required for both spline and spread */
     if (c_useAtomDataPrefetch)
     {
-        pme_gpu_stage_atom_data<float, atomsPerBlock, 1>(sm_coefficients,
-                                                         kernelParams.atoms.d_coefficients[0]);
+        pme_gpu_stage_atom_data<float, atomsPerBlock, 1>(
+                sm_coefficients, &kernelParams.atoms.d_coefficients[0][kernelParams.pipelineAtomStart]);
         __syncthreads();
         atomCharge = sm_coefficients[atomIndexLocal];
     }
@@ -237,7 +237,8 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU
 
     if (computeSplines)
     {
-        const float3* __restrict__ gm_coordinates = asFloat3(kernelParams.atoms.d_coordinates);
+        const float3* __restrict__ gm_coordinates =
+                asFloat3(&kernelParams.atoms.d_coordinates[kernelParams.pipelineAtomStart]);
         if (c_useAtomDataPrefetch)
         {
             // Coordinates
@@ -274,8 +275,12 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU
     /* Spreading */
     if (spreadCharges)
     {
-        spread_charges<order, wrapX, wrapY, 0, threadsPerAtom>(
-                kernelParams, &atomCharge, sm_gridlineIndices, sm_theta);
+
+        if (!kernelParams.usePipeline || (atomIndexGlobal < kernelParams.pipelineAtomEnd))
+        {
+            spread_charges<order, wrapX, wrapY, 0, threadsPerAtom>(
+                    kernelParams, &atomCharge, sm_gridlineIndices, sm_theta);
+        }
     }
     if (numGrids == 2)
     {
@@ -293,8 +298,11 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU
         }
         if (spreadCharges)
         {
-            spread_charges<order, wrapX, wrapY, 1, threadsPerAtom>(
-                    kernelParams, &atomCharge, sm_gridlineIndices, sm_theta);
+            if (!kernelParams.usePipeline || (atomIndexGlobal < kernelParams.pipelineAtomEnd))
+            {
+                spread_charges<order, wrapX, wrapY, 1, threadsPerAtom>(
+                        kernelParams, &atomCharge, sm_gridlineIndices, sm_theta);
+            }
         }
     }
 }