X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=blobdiff_plain;f=src%2Fgromacs%2Fewald%2Fpme_gpu_program_impl_sycl.cpp;h=a65a2828b074859802b30735aaafa7da323e21f2;hb=01449308f91c3f4ab9cac235fef076e6aac1fa9a;hp=196cff26ae3881a4bbee79fe6f1e5296971bf349;hpb=eb74bfa4c8a28798caf8cb5bb5fa922d87829d62;p=alexxy%2Fgromacs.git diff --git a/src/gromacs/ewald/pme_gpu_program_impl_sycl.cpp b/src/gromacs/ewald/pme_gpu_program_impl_sycl.cpp index 196cff26ae..a65a2828b0 100644 --- a/src/gromacs/ewald/pme_gpu_program_impl_sycl.cpp +++ b/src/gromacs/ewald/pme_gpu_program_impl_sycl.cpp @@ -50,6 +50,7 @@ #include "pme_gpu_program_impl.h" #include "pme_gather_sycl.h" +#include "pme_solve_sycl.h" #include "pme_spread_sycl.h" #include "pme_gpu_constants.h" @@ -62,6 +63,9 @@ constexpr int c_pmeOrder = 4; constexpr bool c_wrapX = true; constexpr bool c_wrapY = true; +constexpr int c_stateA = 0; +constexpr int c_stateB = 1; + static int subGroupSizeFromVendor(const DeviceInformation& deviceInfo) { switch (deviceInfo.deviceVendor) @@ -96,9 +100,20 @@ static int subGroupSizeFromVendor(const DeviceInformation& deviceInfo) INSTANTIATE_##x(order, 2, ThreadsPerAtom::Order, subGroupSize); \ INSTANTIATE_##x(order, 2, ThreadsPerAtom::OrderSquared, subGroupSize); +#define INSTANTIATE_SOLVE(subGroupSize) \ + extern template class PmeSolveKernel; \ + extern template class PmeSolveKernel; \ + extern template class PmeSolveKernel; \ + extern template class PmeSolveKernel; \ + extern template class PmeSolveKernel; \ + extern template class PmeSolveKernel; \ + extern template class PmeSolveKernel; \ + extern template class PmeSolveKernel; + #define INSTANTIATE(order, subGroupSize) \ INSTANTIATE_X(SPREAD, order, subGroupSize); \ - INSTANTIATE_X(GATHER, order, subGroupSize); + INSTANTIATE_X(GATHER, order, subGroupSize); \ + INSTANTIATE_SOLVE(subGroupSize); #if GMX_SYCL_DPCPP INSTANTIATE(4, 16); @@ -107,7 +122,6 @@ INSTANTIATE(4, 32); INSTANTIATE(4, 64); #endif - //! Helper function to set proper kernel functor pointers template static void setKernelPointers(struct PmeGpuProgramImpl* pmeGpuProgram) @@ -164,6 +178,22 @@ static void setKernelPointers(struct PmeGpuProgramImpl* pmeGpuProgram) new PmeGatherKernel(); pmeGpuProgram->gatherKernelReadSplinesThPerAtom4Dual = new PmeGatherKernel(); + pmeGpuProgram->solveXYZKernelA = + new PmeSolveKernel(); + pmeGpuProgram->solveXYZEnergyKernelA = + new PmeSolveKernel(); + pmeGpuProgram->solveYZXKernelA = + new PmeSolveKernel(); + pmeGpuProgram->solveYZXEnergyKernelA = + new PmeSolveKernel(); + pmeGpuProgram->solveXYZKernelB = + new PmeSolveKernel(); + pmeGpuProgram->solveXYZEnergyKernelB = + new PmeSolveKernel(); + pmeGpuProgram->solveYZXKernelB = + new PmeSolveKernel(); + pmeGpuProgram->solveYZXEnergyKernelB = + new PmeSolveKernel(); } PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) : @@ -205,4 +235,20 @@ PmeGpuProgramImpl::~PmeGpuProgramImpl() delete splineAndSpreadKernelThPerAtom4Dual; delete splineAndSpreadKernelWriteSplinesDual; delete splineAndSpreadKernelWriteSplinesThPerAtom4Dual; + delete gatherKernelSingle; + delete gatherKernelThPerAtom4Single; + delete gatherKernelReadSplinesSingle; + delete gatherKernelReadSplinesThPerAtom4Single; + delete gatherKernelDual; + delete gatherKernelThPerAtom4Dual; + delete gatherKernelReadSplinesDual; + delete gatherKernelReadSplinesThPerAtom4Dual; + delete solveYZXKernelA; + delete solveXYZKernelA; + delete solveYZXEnergyKernelA; + delete solveXYZEnergyKernelA; + delete solveYZXKernelB; + delete solveXYZKernelB; + delete solveYZXEnergyKernelB; + delete solveXYZEnergyKernelB; }