X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=blobdiff_plain;f=src%2Fgromacs%2Fewald%2Fpme_spread.cu;h=f5ba3451817b105856e4a4b7e803e5425b8c027e;hb=c088e63019ebc68760d35c3bc916015864aa8e89;hp=d0856602a9e4574e0081a8231f8ec3073e6d2d12;hpb=f59f6ec6e04f4a3732876c2c76250b8444fdcb69;p=alexxy%2Fgromacs.git diff --git a/src/gromacs/ewald/pme_spread.cu b/src/gromacs/ewald/pme_spread.cu index d0856602a9..f5ba345181 100644 --- a/src/gromacs/ewald/pme_spread.cu +++ b/src/gromacs/ewald/pme_spread.cu @@ -200,7 +200,7 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU float atomCharge; const int blockIndex = blockIdx.y * gridDim.x + blockIdx.x; - const int atomIndexOffset = blockIndex * atomsPerBlock; + const int atomIndexOffset = blockIndex * atomsPerBlock + kernelParams.pipelineAtomStart; /* Thread index w.r.t. block */ const int threadLocalId = @@ -225,8 +225,8 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU /* Charges, required for both spline and spread */ if (c_useAtomDataPrefetch) { - pme_gpu_stage_atom_data(sm_coefficients, - kernelParams.atoms.d_coefficients[0]); + pme_gpu_stage_atom_data( + sm_coefficients, &kernelParams.atoms.d_coefficients[0][kernelParams.pipelineAtomStart]); __syncthreads(); atomCharge = sm_coefficients[atomIndexLocal]; } @@ -237,7 +237,8 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU if (computeSplines) { - const float3* __restrict__ gm_coordinates = asFloat3(kernelParams.atoms.d_coordinates); + const float3* __restrict__ gm_coordinates = + asFloat3(&kernelParams.atoms.d_coordinates[kernelParams.pipelineAtomStart]); if (c_useAtomDataPrefetch) { // Coordinates @@ -274,8 +275,12 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU /* Spreading */ if (spreadCharges) { - spread_charges( - kernelParams, &atomCharge, sm_gridlineIndices, sm_theta); + + if (!kernelParams.usePipeline || (atomIndexGlobal < kernelParams.pipelineAtomEnd)) + { + spread_charges( + kernelParams, &atomCharge, sm_gridlineIndices, sm_theta); + } } if (numGrids == 2) { @@ -293,8 +298,11 @@ __launch_bounds__(c_spreadMaxThreadsPerBlock) CLANG_DISABLE_OPTIMIZATION_ATTRIBU } if (spreadCharges) { - spread_charges( - kernelParams, &atomCharge, sm_gridlineIndices, sm_theta); + if (!kernelParams.usePipeline || (atomIndexGlobal < kernelParams.pipelineAtomEnd)) + { + spread_charges( + kernelParams, &atomCharge, sm_gridlineIndices, sm_theta); + } } } }