X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=blobdiff_plain;f=src%2Fgromacs%2Fewald%2Fpme.cpp;h=e2c87459a0589403888d57fdfead803bbabb869f;hb=8073360f849809b7ce4fa5b5f14ae16209ca087f;hp=b0b59e6eedbaf7fe8f37134805eab50a5280b711;hpb=2f224c7322e2e2e948590e1e73c82ff2b3ed568f;p=alexxy%2Fgromacs.git diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp index b0b59e6eed..e2c87459a0 100644 --- a/src/gromacs/ewald/pme.cpp +++ b/src/gromacs/ewald/pme.cpp @@ -114,7 +114,7 @@ #include "gromacs/utility/logger.h" #include "gromacs/utility/real.h" #include "gromacs/utility/smalloc.h" -#include "gromacs/utility/stringutil.h" +#include "gromacs/utility/message_string_collector.h" #include "gromacs/utility/unique_cptr.h" #include "calculate_spline_moduli.h" @@ -127,94 +127,71 @@ #include "pme_spline_work.h" #include "pme_spread.h" -/*! \brief Help build a descriptive message in \c error if there are - * \c errorReasons why PME on GPU is not supported. - * - * \returns Whether the lack of errorReasons indicate there is support. */ -static bool addMessageIfNotSupported(const std::list& errorReasons, std::string* error) -{ - bool isSupported = errorReasons.empty(); - if (!isSupported && error) - { - std::string regressionTestMarker = "PME GPU does not support"; - // this prefix is tested for in the regression tests script gmxtest.pl - *error = regressionTestMarker; - if (errorReasons.size() == 1) - { - *error += " " + errorReasons.back(); - } - else - { - *error += ": " + gmx::joinStrings(errorReasons, "; "); - } - *error += "."; - } - return isSupported; -} +//NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +bool g_allowPmeWithSyclForTesting = false; bool pme_gpu_supports_build(std::string* error) { - std::list errorReasons; - if (GMX_DOUBLE) - { - errorReasons.emplace_back("a double-precision build"); - } - if (!GMX_GPU) - { - errorReasons.emplace_back("a non-GPU build"); - } - if (GMX_GPU_SYCL) - { - errorReasons.emplace_back("SYCL build"); // SYCL-TODO - } - return addMessageIfNotSupported(errorReasons, error); + gmx::MessageStringCollector errorReasons; + // Before changing the prefix string, make sure that it is not searched for in regression tests. + errorReasons.startContext("PME GPU does not support:"); + errorReasons.appendIf(GMX_DOUBLE, "Double-precision build of GROMACS."); + errorReasons.appendIf(!GMX_GPU, "Non-GPU build of GROMACS."); + errorReasons.appendIf(GMX_GPU_SYCL && !g_allowPmeWithSyclForTesting, "SYCL build."); // SYCL-TODO + errorReasons.finishContext(); + if (error != nullptr) + { + *error = errorReasons.toString(); + } + return errorReasons.isEmpty(); } bool pme_gpu_supports_hardware(const gmx_hw_info_t gmx_unused& hwinfo, std::string* error) { - std::list errorReasons; - - if (GMX_GPU_OPENCL) - { + gmx::MessageStringCollector errorReasons; + // Before changing the prefix string, make sure that it is not searched for in regression tests. + errorReasons.startContext("PME GPU does not support:"); #ifdef __APPLE__ - errorReasons.emplace_back("Apple OS X operating system"); + errorReasons.appendIf(GMX_GPU_OPENCL, "Apple OS X operating system"); #endif + errorReasons.finishContext(); + if (error != nullptr) + { + *error = errorReasons.toString(); } - return addMessageIfNotSupported(errorReasons, error); + return errorReasons.isEmpty(); } bool pme_gpu_supports_input(const t_inputrec& ir, std::string* error) { - std::list errorReasons; - if (!EEL_PME(ir.coulombtype)) - { - errorReasons.emplace_back("systems that do not use PME for electrostatics"); - } - if (ir.pme_order != 4) - { - errorReasons.emplace_back("interpolation orders other than 4"); - } - if (EVDW_PME(ir.vdwtype)) - { - errorReasons.emplace_back("Lennard-Jones PME"); - } - if (!EI_DYNAMICS(ir.eI)) - { - errorReasons.emplace_back( - "Cannot compute PME interactions on a GPU, because PME GPU requires a dynamical " - "integrator (md, sd, etc)."); - } - return addMessageIfNotSupported(errorReasons, error); + gmx::MessageStringCollector errorReasons; + // Before changing the prefix string, make sure that it is not searched for in regression tests. + errorReasons.startContext("PME GPU does not support:"); + errorReasons.appendIf(!EEL_PME(ir.coulombtype), + "Systems that do not use PME for electrostatics."); + errorReasons.appendIf((ir.pme_order != 4), "Interpolation orders other than 4."); + errorReasons.appendIf(EVDW_PME(ir.vdwtype), "Lennard-Jones PME."); + errorReasons.appendIf(!EI_DYNAMICS(ir.eI), "Non-dynamical integrator (use md, sd, etc)."); + errorReasons.finishContext(); + if (error != nullptr) + { + *error = errorReasons.toString(); + } + return errorReasons.isEmpty(); } bool pme_gpu_mixed_mode_supports_input(const t_inputrec& ir, std::string* error) { - std::list errorReasons; - if (ir.efep != efepNO) + gmx::MessageStringCollector errorReasons; + // Before changing the prefix string, make sure that it is not searched for in regression tests. + errorReasons.startContext("PME GPU in Mixed mode does not support:"); + errorReasons.appendIf(ir.efep != FreeEnergyPerturbationType::No, "Free Energy Perturbation."); + errorReasons.finishContext(); + if (error != nullptr) { - errorReasons.emplace_back("Free Energy Perturbation (in PME GPU mixed mode)"); + *error = errorReasons.toString(); } - return addMessageIfNotSupported(errorReasons, error); + return errorReasons.isEmpty(); } /*! \brief \libinternal @@ -228,32 +205,21 @@ bool pme_gpu_mixed_mode_supports_input(const t_inputrec& ir, std::string* error) */ static bool pme_gpu_check_restrictions(const gmx_pme_t* pme, std::string* error) { - std::list errorReasons; - if (pme->nnodes != 1) - { - errorReasons.emplace_back("PME decomposition"); - } - if (pme->pme_order != 4) - { - errorReasons.emplace_back("interpolation orders other than 4"); - } - if (pme->doLJ) - { - errorReasons.emplace_back("Lennard-Jones PME"); - } - if (GMX_DOUBLE) - { - errorReasons.emplace_back("double precision"); - } - if (!GMX_GPU) - { - errorReasons.emplace_back("non-GPU build of GROMACS"); - } - if (GMX_GPU_SYCL) - { - errorReasons.emplace_back("SYCL build of GROMACS"); // SYCL-TODO - } - return addMessageIfNotSupported(errorReasons, error); + gmx::MessageStringCollector errorReasons; + // Before changing the prefix string, make sure that it is not searched for in regression tests. + errorReasons.startContext("PME GPU does not support:"); + errorReasons.appendIf((pme->nnodes != 1), "PME decomposition."); + errorReasons.appendIf((pme->pme_order != 4), "interpolation orders other than 4."); + errorReasons.appendIf(pme->doLJ, "Lennard-Jones PME."); + errorReasons.appendIf(GMX_DOUBLE, "Double precision build of GROMACS."); + errorReasons.appendIf(!GMX_GPU, "Non-GPU build of GROMACS."); + errorReasons.appendIf(GMX_GPU_SYCL && !g_allowPmeWithSyclForTesting, "SYCL build of GROMACS."); // SYCL-TODO + errorReasons.finishContext(); + if (error != nullptr) + { + *error = errorReasons.toString(); + } + return errorReasons.isEmpty(); } PmeRunMode pme_run_mode(const gmx_pme_t* pme) @@ -333,11 +299,7 @@ PmeAtomComm::PmeAtomComm(MPI_Comm PmeMpiCommunicator, const int pmeOrder, const int dimIndex, const bool doSpread) : - dimind(dimIndex), - bSpread(doSpread), - pme_order(pmeOrder), - nthread(numThreads), - spline(nthread) + dimind(dimIndex), bSpread(doSpread), pme_order(pmeOrder), nthread(numThreads), spline(nthread) { if (PmeMpiCommunicator != MPI_COMM_NULL) { @@ -485,8 +447,18 @@ static void init_overlap_comm(pme_overlap_t* ol, int norder, MPI_Comm comm, int MPI_Status stat; for (size_t b = 0; b < ol->comm_data.size(); b++) { - MPI_Sendrecv(&ol->send_size, 1, MPI_INT, ol->comm_data[b].send_id, b, &ol->comm_data[b].recv_size, - 1, MPI_INT, ol->comm_data[b].recv_id, b, ol->mpi_comm, &stat); + MPI_Sendrecv(&ol->send_size, + 1, + MPI_INT, + ol->comm_data[b].send_id, + b, + &ol->comm_data[b].recv_size, + 1, + MPI_INT, + ol->comm_data[b].recv_id, + b, + ol->mpi_comm, + &stat); } #endif @@ -524,7 +496,8 @@ bool gmx_pme_check_restrictions(int pme_order, int nkx, int nky, int nkz, int nu std::string message = gmx::formatString( "pme_order (%d) is larger than the maximum allowed value (%d). Modify and " "recompile the code if you really need such a high order.", - pme_order, PME_ORDER_MAX); + pme_order, + PME_ORDER_MAX); GMX_THROW(gmx::InconsistentInputError(message)); } @@ -555,7 +528,8 @@ bool gmx_pme_check_restrictions(int pme_order, int nkx, int nky, int nkz, int nu "threads, the number of grid lines per rank along x should be >= pme_order (%d) " "or = pmeorder-1. To resolve this issue, use fewer ranks along x (and possibly " "more along y and/or z) by specifying -dd manually.", - nkx / static_cast(numPmeDomainsAlongX), pme_order); + nkx / static_cast(numPmeDomainsAlongX), + pme_order); } return true; @@ -593,9 +567,6 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, gmx::unique_cptr pme(new gmx_pme_t()); - pme->sum_qgrid_tmp = nullptr; - pme->sum_qgrid_dd_tmp = nullptr; - pme->buf_nalloc = 0; pme->nnodes = 1; @@ -659,9 +630,13 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, pme->ndecompdim = 2; #if GMX_MPI - MPI_Comm_split(pme->mpi_comm, pme->nodeid % numPmeDomains.y, pme->nodeid, + MPI_Comm_split(pme->mpi_comm, + pme->nodeid % numPmeDomains.y, + pme->nodeid, &pme->mpi_comm_d[0]); /* My communicator along major dimension */ - MPI_Comm_split(pme->mpi_comm, pme->nodeid / numPmeDomains.y, pme->nodeid, + MPI_Comm_split(pme->mpi_comm, + pme->nodeid / numPmeDomains.y, + pme->nodeid, &pme->mpi_comm_d[1]); /* My communicator along minor dimension */ MPI_Comm_rank(pme->mpi_comm_d[0], &pme->nodeid_major); @@ -704,15 +679,15 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, * not calculating free-energy for Coulomb and/or LJ while gmx_pme_init() * configures with free-energy, but that has never been tested. */ - pme->doCoulomb = EEL_PME(ir->coulombtype); - pme->doLJ = EVDW_PME(ir->vdwtype); - pme->bFEP_q = ((ir->efep != efepNO) && bFreeEnergy_q); - pme->bFEP_lj = ((ir->efep != efepNO) && bFreeEnergy_lj); - pme->bFEP = (pme->bFEP_q || pme->bFEP_lj); - pme->nkx = ir->nkx; - pme->nky = ir->nky; - pme->nkz = ir->nkz; - pme->bP3M = (ir->coulombtype == eelP3M_AD || getenv("GMX_PME_P3M") != nullptr); + pme->doCoulomb = EEL_PME(ir->coulombtype); + pme->doLJ = EVDW_PME(ir->vdwtype); + pme->bFEP_q = ((ir->efep != FreeEnergyPerturbationType::No) && bFreeEnergy_q); + pme->bFEP_lj = ((ir->efep != FreeEnergyPerturbationType::No) && bFreeEnergy_lj); + pme->bFEP = (pme->bFEP_q || pme->bFEP_lj); + pme->nkx = ir->nkx; + pme->nky = ir->nky; + pme->nkz = ir->nkz; + pme->bP3M = (ir->coulombtype == CoulombInteractionType::P3mAD || getenv("GMX_PME_P3M") != nullptr); pme->pme_order = ir->pme_order; pme->ewaldcoeff_q = ewaldcoeff_q; pme->ewaldcoeff_lj = ewaldcoeff_lj; @@ -725,12 +700,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, // The box requires scaling with nwalls = 2, we store that condition as well // as the scaling factor - delete pme->boxScaler; - pme->boxScaler = new EwaldBoxZScaler(*ir); + pme->boxScaler = std::make_unique( + EwaldBoxZScaler(inputrecPbcXY2Walls(ir), ir->wall_ewald_zfac)); /* If we violate restrictions, generate a fatal error here */ - gmx_pme_check_restrictions(pme->pme_order, pme->nkx, pme->nky, pme->nkz, pme->nnodes_major, - pme->bUseThreads, true); + gmx_pme_check_restrictions( + pme->pme_order, pme->nkx, pme->nky, pme->nkz, pme->nnodes_major, pme->bUseThreads, true); if (pme->nnodes > 1) { @@ -761,8 +736,13 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, " and PME grid_y (%d) and grid_z (%d) should be divisible by " "#PME_ranks_y " "(%d)", - gmx::roundToInt((imbal - 1) * 100), pme->nkx, pme->nky, - pme->nnodes_major, pme->nky, pme->nkz, pme->nnodes_minor); + gmx::roundToInt((imbal - 1) * 100), + pme->nkx, + pme->nky, + pme->nnodes_major, + pme->nky, + pme->nkz, + pme->nnodes_minor); } } @@ -771,8 +751,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, * y is always copied through a buffer: we don't need padding in z, * but we do need the overlap in x because of the communication order. */ - init_overlap_comm(&pme->overlap[0], pme->pme_order, pme->mpi_comm_d[0], pme->nnodes_major, - pme->nodeid_major, pme->nkx, + init_overlap_comm(&pme->overlap[0], + pme->pme_order, + pme->mpi_comm_d[0], + pme->nnodes_major, + pme->nodeid_major, + pme->nkx, (div_round_up(pme->nky, pme->nnodes_minor) + pme->pme_order) * (pme->nkz + pme->pme_order - 1)); @@ -780,8 +764,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, * We do this with an offset buffer of equal size, so we need to allocate * extra for the offset. That's what the (+1)*pme->nkz is for. */ - init_overlap_comm(&pme->overlap[1], pme->pme_order, pme->mpi_comm_d[1], pme->nnodes_minor, - pme->nodeid_minor, pme->nky, + init_overlap_comm(&pme->overlap[1], + pme->pme_order, + pme->mpi_comm_d[1], + pme->nnodes_minor, + pme->nodeid_minor, + pme->nky, (div_round_up(pme->nkx, pme->nnodes_major) + pme->pme_order + 1) * pme->nkz); /* Double-check for a limitation of the (current) sum_fftgrid_dd code. @@ -813,12 +801,12 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, pme->pmegrid_start_iy = pme->overlap[1].s2g0[pme->nodeid_minor]; pme->pmegrid_start_iz = 0; - make_gridindex_to_localindex(pme->nkx, pme->pmegrid_start_ix, - pme->pmegrid_nx - (pme->pme_order - 1), &pme->nnx, &pme->fshx); - make_gridindex_to_localindex(pme->nky, pme->pmegrid_start_iy, - pme->pmegrid_ny - (pme->pme_order - 1), &pme->nny, &pme->fshy); - make_gridindex_to_localindex(pme->nkz, pme->pmegrid_start_iz, pme->pmegrid_nz_base, &pme->nnz, - &pme->fshz); + make_gridindex_to_localindex( + pme->nkx, pme->pmegrid_start_ix, pme->pmegrid_nx - (pme->pme_order - 1), &pme->nnx, &pme->fshx); + make_gridindex_to_localindex( + pme->nky, pme->pmegrid_start_iy, pme->pmegrid_ny - (pme->pme_order - 1), &pme->nny, &pme->fshy); + make_gridindex_to_localindex( + pme->nkz, pme->pmegrid_start_iz, pme->pmegrid_nz_base, &pme->nnz, &pme->fshz); pme->spline_work = make_pme_spline_work(pme->pme_order); @@ -830,7 +818,7 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, */ if (pme->doLJ) { - pme->ngrids = ((ir->ljpme_combination_rule == eljpmeLB) ? DO_Q_AND_LJ_LB : DO_Q_AND_LJ); + pme->ngrids = ((ir->ljpme_combination_rule == LongRangeVdW::LB) ? DO_Q_AND_LJ_LB : DO_Q_AND_LJ); } else { @@ -844,10 +832,16 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, { if ((i < DO_Q && pme->doCoulomb && (i == 0 || bFreeEnergy_q)) || (i >= DO_Q && pme->doLJ - && (i == 2 || bFreeEnergy_lj || ir->ljpme_combination_rule == eljpmeLB))) + && (i == 2 || bFreeEnergy_lj || ir->ljpme_combination_rule == LongRangeVdW::LB))) { - pmegrids_init(&pme->pmegrid[i], pme->pmegrid_nx, pme->pmegrid_ny, pme->pmegrid_nz, - pme->pmegrid_nz_base, pme->pme_order, pme->bUseThreads, pme->nthread, + pmegrids_init(&pme->pmegrid[i], + pme->pmegrid_nx, + pme->pmegrid_ny, + pme->pmegrid_nz, + pme->pmegrid_nz_base, + pme->pme_order, + pme->bUseThreads, + pme->nthread, pme->overlap[0].s2g1[pme->nodeid_major] - pme->overlap[0].s2g0[pme->nodeid_major + 1], pme->overlap[1].s2g1[pme->nodeid_minor] @@ -856,8 +850,14 @@ gmx_pme_t* gmx_pme_init(const t_commrec* cr, const auto allocateRealGridForGpu = (pme->runMode == PmeRunMode::Mixed) ? gmx::PinningPolicy::PinnedIfSupported : gmx::PinningPolicy::CannotBePinned; - gmx_parallel_3dfft_init(&pme->pfft_setup[i], ndata, &pme->fftgrid[i], &pme->cfftgrid[i], - pme->mpi_comm_d, bReproducible, pme->nthread, allocateRealGridForGpu); + gmx_parallel_3dfft_init(&pme->pfft_setup[i], + ndata, + &pme->fftgrid[i], + &pme->cfftgrid[i], + pme->mpi_comm_d, + bReproducible, + pme->nthread, + allocateRealGridForGpu); } } @@ -938,15 +938,27 @@ void gmx_pme_reinit(struct gmx_pme_t** pmedata, const gmx::MDLogger dummyLogger; GMX_ASSERT(pmedata, "Invalid PME pointer"); NumPmeDomains numPmeDomains = { pme_src->nnodes_major, pme_src->nnodes_minor }; - *pmedata = gmx_pme_init(cr, numPmeDomains, &irc, pme_src->bFEP_q, pme_src->bFEP_lj, FALSE, - ewaldcoeff_q, ewaldcoeff_lj, pme_src->nthread, pme_src->runMode, - pme_src->gpu, nullptr, nullptr, nullptr, dummyLogger); + *pmedata = gmx_pme_init(cr, + numPmeDomains, + &irc, + pme_src->bFEP_q, + pme_src->bFEP_lj, + FALSE, + ewaldcoeff_q, + ewaldcoeff_lj, + pme_src->nthread, + pme_src->runMode, + pme_src->gpu, + nullptr, + nullptr, + nullptr, + dummyLogger); /* When running PME on the CPU not using domain decomposition, * the atom data is allocated once only in gmx_pme_(re)init(). */ if (!pme_src->gpu && pme_src->nnodes == 1) { - gmx_pme_reinit_atoms(*pmedata, pme_src->atc[0].numAtoms(), nullptr, nullptr); + gmx_pme_reinit_atoms(*pmedata, pme_src->atc[0].numAtoms(), {}, {}); } // TODO this is mostly passing around current values } @@ -957,7 +969,7 @@ void gmx_pme_reinit(struct gmx_pme_t** pmedata, /* We would like to reuse the fft grids, but that's harder */ } -void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef x, gmx::ArrayRef q, real* V) +real gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef x, gmx::ArrayRef q) { pmegrids_t* grid; @@ -985,11 +997,13 @@ void gmx_pme_calc_energy(gmx_pme_t* pme, gmx::ArrayRef x, gmx:: /* Only calculate the spline coefficients, don't actually spread */ spread_on_grid(pme, atc, nullptr, TRUE, FALSE, pme->fftgrid[PME_GRID_QA], FALSE, PME_GRID_QA); - *V = gather_energy_bsplines(pme, grid->grid.grid, atc); + return gather_energy_bsplines(pme, grid->grid.grid, atc); } /*! \brief Calculate initial Lorentz-Berthelot coefficients for LJ-PME */ -static void calc_initial_lb_coeffs(gmx::ArrayRef coefficient, const real* local_c6, const real* local_sigma) +static void calc_initial_lb_coeffs(gmx::ArrayRef coefficient, + gmx::ArrayRef local_c6, + gmx::ArrayRef local_sigma) { for (gmx::index i = 0; i < coefficient.ssize(); ++i) { @@ -1001,7 +1015,7 @@ static void calc_initial_lb_coeffs(gmx::ArrayRef coefficient, const real* } /*! \brief Calculate next Lorentz-Berthelot coefficients for LJ-PME */ -static void calc_next_lb_coeffs(gmx::ArrayRef coefficient, const real* local_sigma) +static void calc_next_lb_coeffs(gmx::ArrayRef coefficient, gmx::ArrayRef local_sigma) { for (gmx::index i = 0; i < coefficient.ssize(); ++i) { @@ -1012,12 +1026,12 @@ static void calc_next_lb_coeffs(gmx::ArrayRef coefficient, const real* loc int gmx_pme_do(struct gmx_pme_t* pme, gmx::ArrayRef coordinates, gmx::ArrayRef forces, - real chargeA[], - real chargeB[], - real c6A[], - real c6B[], - real sigmaA[], - real sigmaB[], + gmx::ArrayRef chargeA, + gmx::ArrayRef chargeB, + gmx::ArrayRef c6A, + gmx::ArrayRef c6B, + gmx::ArrayRef sigmaA, + gmx::ArrayRef sigmaB, const matrix box, const t_commrec* cr, int maxshift_x, @@ -1037,21 +1051,16 @@ int gmx_pme_do(struct gmx_pme_t* pme, GMX_ASSERT(pme->runMode == PmeRunMode::CPU, "gmx_pme_do should not be called on the GPU PME run."); - int d, npme, grid_index, max_grid_index; - PmeAtomComm& atc = pme->atc[0]; - pmegrids_t* pmegrid = nullptr; - real* grid = nullptr; - real* coefficient = nullptr; - PmeOutput output[2]; // The second is used for the B state with FEP - real scale, lambda; - gmx_bool bClearF; - gmx_parallel_3dfft_t pfft_setup; - real* fftgrid; - t_complex* cfftgrid; - int thread; - gmx_bool bFirst, bDoSplines; - int fep_state; - int fep_states_lj = pme->bFEP_lj ? 2 : 1; + PmeAtomComm& atc = pme->atc[0]; + pmegrids_t* pmegrid = nullptr; + real* grid = nullptr; + gmx::ArrayRef coefficient; + std::array output; // The second is used for the B state with FEP + gmx_parallel_3dfft_t pfft_setup; + real* fftgrid; + t_complex* cfftgrid; + int thread; + const int fep_states_lj = pme->bFEP_lj ? 2 : 1; // There's no support for computing energy without virial, or vice versa const bool computeEnergyAndVirial = (stepWork.computeEnergy || stepWork.computeVirial); @@ -1091,7 +1100,7 @@ int gmx_pme_do(struct gmx_pme_t* pme, pme->boxScaler->scaleBox(box, scaledBox); gmx::invertBoxMatrix(scaledBox, pme->recipbox); - bFirst = TRUE; + bool bFirst = true; /* For simplicity, we construct the splines for all particles if * more than one PME calculations is needed. Some optimization @@ -1100,7 +1109,7 @@ int gmx_pme_do(struct gmx_pme_t* pme, * that don't yet have them. */ - bDoSplines = pme->bFEP || (pme->doCoulomb && pme->doLJ); + bool bDoSplines = pme->bFEP || (pme->doCoulomb && pme->doLJ); /* We need a maximum of four separate PME calculations: * grid_index=0: Coulomb PME with charges from state A @@ -1112,9 +1121,9 @@ int gmx_pme_do(struct gmx_pme_t* pme, */ /* If we are doing LJ-PME with LB, we only do Q here */ - max_grid_index = (pme->ljpme_combination_rule == eljpmeLB) ? DO_Q : DO_Q_AND_LJ; - - for (grid_index = 0; grid_index < max_grid_index; ++grid_index) + const int max_grid_index = (pme->ljpme_combination_rule == LongRangeVdW::LB) ? DO_Q : DO_Q_AND_LJ; + bool bClearF; + for (int grid_index = 0; grid_index < max_grid_index; ++grid_index) { /* Check if we should do calculations at this grid_index * If grid_index is odd we should be doing FEP @@ -1141,34 +1150,19 @@ int gmx_pme_do(struct gmx_pme_t* pme, grid = pmegrid->grid.grid; - if (debug) - { - fprintf(debug, "PME: number of ranks = %d, rank = %d\n", cr->nnodes, cr->nodeid); - fprintf(debug, "Grid = %p\n", static_cast(grid)); - if (grid == nullptr) - { - gmx_fatal(FARGS, "No grid!"); - } - } - if (pme->nnodes == 1) { - atc.coefficient = gmx::arrayRefFromArray(coefficient, coordinates.size()); + atc.coefficient = coefficient; } else { - wallcycle_start(wcycle, ewcPME_REDISTXF); + wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF); do_redist_pos_coeffs(pme, cr, bFirst, coordinates, coefficient); - wallcycle_stop(wcycle, ewcPME_REDISTXF); + wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF); } - if (debug) - { - fprintf(debug, "Rank= %6d, pme local particles=%6d\n", cr->nodeid, atc.numAtoms()); - } - - wallcycle_start(wcycle, ewcPME_SPREAD); + wallcycle_start(wcycle, WallCycleCounter::PmeSpread); /* Spread the coefficients on a grid */ spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index); @@ -1192,7 +1186,7 @@ int gmx_pme_do(struct gmx_pme_t* pme, copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index); } - wallcycle_stop(wcycle, ewcPME_SPREAD); + wallcycle_stop(wcycle, WallCycleCounter::PmeSpread); /* TODO If the OpenMP and single-threaded implementations converge, then spread_on_grid() and @@ -1211,61 +1205,72 @@ int gmx_pme_do(struct gmx_pme_t* pme, /* do 3d-fft */ if (thread == 0) { - wallcycle_start(wcycle, ewcPME_FFT); + wallcycle_start(wcycle, WallCycleCounter::PmeFft); } gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, thread, wcycle); if (thread == 0) { - wallcycle_stop(wcycle, ewcPME_FFT); + wallcycle_stop(wcycle, WallCycleCounter::PmeFft); } /* solve in k-space for our local cells */ if (thread == 0) { - wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); + wallcycle_start( + wcycle, + (grid_index < DO_Q ? WallCycleCounter::PmeSolve : WallCycleCounter::LJPme)); } if (grid_index < DO_Q) { - loop_count = solve_pme_yzx( - pme, cfftgrid, scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ], - computeEnergyAndVirial, pme->nthread, thread); + loop_count = solve_pme_yzx(pme, + cfftgrid, + scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ], + computeEnergyAndVirial, + pme->nthread, + thread); } else { loop_count = - solve_pme_lj_yzx(pme, &cfftgrid, FALSE, + solve_pme_lj_yzx(pme, + &cfftgrid, + FALSE, scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ], - computeEnergyAndVirial, pme->nthread, thread); + computeEnergyAndVirial, + pme->nthread, + thread); } if (thread == 0) { - wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); + wallcycle_stop( + wcycle, + (grid_index < DO_Q ? WallCycleCounter::PmeSolve : WallCycleCounter::LJPme)); inc_nrnb(nrnb, eNR_SOLVEPME, loop_count); } /* do 3d-invfft */ if (thread == 0) { - wallcycle_start(wcycle, ewcPME_FFT); + wallcycle_start(wcycle, WallCycleCounter::PmeFft); } gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, thread, wcycle); if (thread == 0) { - wallcycle_stop(wcycle, ewcPME_FFT); + wallcycle_stop(wcycle, WallCycleCounter::PmeFft); if (pme->nodeid == 0) { - real ntot = pme->nkx * pme->nky * pme->nkz; - npme = static_cast(ntot * std::log(ntot) / std::log(2.0)); + real ntot = pme->nkx * pme->nky * pme->nkz; + const int npme = static_cast(ntot * std::log(ntot) / std::log(2.0)); inc_nrnb(nrnb, eNR_FFT, 2 * npme); } /* Note: this wallcycle region is closed below outside an OpenMP region, so take care if refactoring code here. */ - wallcycle_start(wcycle, ewcPME_GATHER); + wallcycle_start(wcycle, WallCycleCounter::PmeGather); } copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread); @@ -1293,25 +1298,28 @@ int gmx_pme_do(struct gmx_pme_t* pme, * atc->f is the actual force array, not a buffer, * therefore we should not clear it. */ - lambda = grid_index < DO_Q ? lambda_q : lambda_lj; - bClearF = (bFirst && PAR(cr)); + real lambda = grid_index < DO_Q ? lambda_q : lambda_lj; + bClearF = (bFirst && PAR(cr)); #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (thread = 0; thread < pme->nthread; thread++) { try { - gather_f_bsplines(pme, grid, bClearF, &atc, &atc.spline[thread], + gather_f_bsplines(pme, + grid, + bClearF, + &atc, + &atc.spline[thread], pme->bFEP ? (grid_index % 2 == 0 ? 1.0 - lambda : lambda) : 1.0); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR } - inc_nrnb(nrnb, eNR_GATHERFBSP, - pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms()); + inc_nrnb(nrnb, eNR_GATHERFBSP, pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms()); /* Note: this wallcycle region is opened above inside an OpenMP region, so take care if refactoring code here. */ - wallcycle_stop(wcycle, ewcPME_GATHER); + wallcycle_stop(wcycle, WallCycleCounter::PmeGather); } if (computeEnergyAndVirial) @@ -1334,13 +1342,16 @@ int gmx_pme_do(struct gmx_pme_t* pme, /* For Lorentz-Berthelot combination rules in LJ-PME, we need to calculate * seven terms. */ - if (pme->doLJ && pme->ljpme_combination_rule == eljpmeLB) + if (pme->doLJ && pme->ljpme_combination_rule == LongRangeVdW::LB) { /* Loop over A- and B-state if we are doing FEP */ - for (fep_state = 0; fep_state < fep_states_lj; ++fep_state) + for (int fep_state = 0; fep_state < fep_states_lj; ++fep_state) { - real *local_c6 = nullptr, *local_sigma = nullptr, *RedistC6 = nullptr, *RedistSigma = nullptr; - gmx::ArrayRef coefficientBuffer; + std::vector local_c6; + std::vector local_sigma; + gmx::ArrayRef RedistC6; + gmx::ArrayRef RedistSigma; + gmx::ArrayRef coefficientBuffer; if (pme->nnodes == 1) { pme->lb_buf1.resize(atc.numAtoms()); @@ -1348,12 +1359,12 @@ int gmx_pme_do(struct gmx_pme_t* pme, switch (fep_state) { case 0: - local_c6 = c6A; - local_sigma = sigmaA; + local_c6.assign(c6A.begin(), c6A.end()); + local_sigma.assign(sigmaA.begin(), sigmaA.end()); break; case 1: - local_c6 = c6B; - local_sigma = sigmaB; + local_c6.assign(c6B.begin(), c6B.end()); + local_sigma.assign(sigmaB.begin(), sigmaB.end()); break; default: gmx_incons("Trying to access wrong FEP-state in LJ-PME routine"); } @@ -1373,31 +1384,31 @@ int gmx_pme_do(struct gmx_pme_t* pme, break; default: gmx_incons("Trying to access wrong FEP-state in LJ-PME routine"); } - wallcycle_start(wcycle, ewcPME_REDISTXF); + wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF); do_redist_pos_coeffs(pme, cr, bFirst, coordinates, RedistC6); pme->lb_buf1.resize(atc.numAtoms()); pme->lb_buf2.resize(atc.numAtoms()); - local_c6 = pme->lb_buf1.data(); + local_c6.assign(pme->lb_buf1.begin(), pme->lb_buf1.end()); for (int i = 0; i < atc.numAtoms(); ++i) { local_c6[i] = atc.coefficient[i]; } do_redist_pos_coeffs(pme, cr, FALSE, coordinates, RedistSigma); - local_sigma = pme->lb_buf2.data(); + local_sigma.assign(pme->lb_buf2.begin(), pme->lb_buf2.end()); for (int i = 0; i < atc.numAtoms(); ++i) { local_sigma[i] = atc.coefficient[i]; } - wallcycle_stop(wcycle, ewcPME_REDISTXF); + wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF); } atc.coefficient = coefficientBuffer; calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma); /*Seven terms in LJ-PME with LB, grid_index < 2 reserved for electrostatics*/ - for (grid_index = 2; grid_index < 9; ++grid_index) + for (int grid_index = 2; grid_index < 9; ++grid_index) { /* Unpack structure */ pmegrid = &pme->pmegrid[grid_index]; @@ -1406,7 +1417,7 @@ int gmx_pme_do(struct gmx_pme_t* pme, calc_next_lb_coeffs(coefficientBuffer, local_sigma); grid = pmegrid->grid.grid; - wallcycle_start(wcycle, ewcPME_SPREAD); + wallcycle_start(wcycle, WallCycleCounter::PmeSpread); /* Spread the c6 on a grid */ spread_on_grid(pme, &atc, pmegrid, bFirst, TRUE, fftgrid, bDoSplines, grid_index); @@ -1415,7 +1426,8 @@ int gmx_pme_do(struct gmx_pme_t* pme, inc_nrnb(nrnb, eNR_WEIGHTS, DIM * atc.numAtoms()); } - inc_nrnb(nrnb, eNR_SPREADBSP, + inc_nrnb(nrnb, + eNR_SPREADBSP, pme->pme_order * pme->pme_order * pme->pme_order * atc.numAtoms()); if (pme->nthread == 1) { @@ -1427,7 +1439,7 @@ int gmx_pme_do(struct gmx_pme_t* pme, } copy_pmegrid_to_fftgrid(pme, grid, fftgrid, grid_index); } - wallcycle_stop(wcycle, ewcPME_SPREAD); + wallcycle_stop(wcycle, WallCycleCounter::PmeSpread); /*Here we start a large thread parallel region*/ #pragma omp parallel num_threads(pme->nthread) private(thread) @@ -1438,13 +1450,13 @@ int gmx_pme_do(struct gmx_pme_t* pme, /* do 3d-fft */ if (thread == 0) { - wallcycle_start(wcycle, ewcPME_FFT); + wallcycle_start(wcycle, WallCycleCounter::PmeFft); } gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, thread, wcycle); if (thread == 0) { - wallcycle_stop(wcycle, ewcPME_FFT); + wallcycle_stop(wcycle, WallCycleCounter::PmeFft); } } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR @@ -1460,16 +1472,20 @@ int gmx_pme_do(struct gmx_pme_t* pme, thread = gmx_omp_get_thread_num(); if (thread == 0) { - wallcycle_start(wcycle, ewcLJPME); + wallcycle_start(wcycle, WallCycleCounter::LJPme); } loop_count = - solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, + solve_pme_lj_yzx(pme, + &pme->cfftgrid[2], + TRUE, scaledBox[XX][XX] * scaledBox[YY][YY] * scaledBox[ZZ][ZZ], - computeEnergyAndVirial, pme->nthread, thread); + computeEnergyAndVirial, + pme->nthread, + thread); if (thread == 0) { - wallcycle_stop(wcycle, ewcLJPME); + wallcycle_stop(wcycle, WallCycleCounter::LJPme); inc_nrnb(nrnb, eNR_SOLVEPME, loop_count); } } @@ -1486,7 +1502,7 @@ int gmx_pme_do(struct gmx_pme_t* pme, bFirst = !pme->doCoulomb; calc_initial_lb_coeffs(coefficientBuffer, local_c6, local_sigma); - for (grid_index = 8; grid_index >= 2; --grid_index) + for (int grid_index = 8; grid_index >= 2; --grid_index) { /* Unpack structure */ pmegrid = &pme->pmegrid[grid_index]; @@ -1502,22 +1518,22 @@ int gmx_pme_do(struct gmx_pme_t* pme, /* do 3d-invfft */ if (thread == 0) { - wallcycle_start(wcycle, ewcPME_FFT); + wallcycle_start(wcycle, WallCycleCounter::PmeFft); } gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, thread, wcycle); if (thread == 0) { - wallcycle_stop(wcycle, ewcPME_FFT); + wallcycle_stop(wcycle, WallCycleCounter::PmeFft); if (pme->nodeid == 0) { - real ntot = pme->nkx * pme->nky * pme->nkz; - npme = static_cast(ntot * std::log(ntot) / std::log(2.0)); + real ntot = pme->nkx * pme->nky * pme->nkz; + const int npme = static_cast(ntot * std::log(ntot) / std::log(2.0)); inc_nrnb(nrnb, eNR_FFT, 2 * npme); } - wallcycle_start(wcycle, ewcPME_GATHER); + wallcycle_start(wcycle, WallCycleCounter::PmeGather); } copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread); @@ -1536,8 +1552,8 @@ int gmx_pme_do(struct gmx_pme_t* pme, if (stepWork.computeForces) { /* interpolate forces for our local atoms */ - bClearF = (bFirst && PAR(cr)); - scale = pme->bFEP ? (fep_state < 1 ? 1.0 - lambda_lj : lambda_lj) : 1.0; + bClearF = (bFirst && PAR(cr)); + real scale = pme->bFEP ? (fep_state < 1 ? 1.0 - lambda_lj : lambda_lj) : 1.0; scale *= lb_scale_factor[grid_index - 2]; #pragma omp parallel for num_threads(pme->nthread) schedule(static) @@ -1545,27 +1561,28 @@ int gmx_pme_do(struct gmx_pme_t* pme, { try { - gather_f_bsplines(pme, grid, bClearF, &pme->atc[0], - &pme->atc[0].spline[thread], scale); + gather_f_bsplines( + pme, grid, bClearF, &pme->atc[0], &pme->atc[0].spline[thread], scale); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR } - inc_nrnb(nrnb, eNR_GATHERFBSP, + inc_nrnb(nrnb, + eNR_GATHERFBSP, pme->pme_order * pme->pme_order * pme->pme_order * pme->atc[0].numAtoms()); } - wallcycle_stop(wcycle, ewcPME_GATHER); + wallcycle_stop(wcycle, WallCycleCounter::PmeGather); bFirst = FALSE; } /* for (grid_index = 8; grid_index >= 2; --grid_index) */ } /* for (fep_state = 0; fep_state < fep_states_lj; ++fep_state) */ - } /* if (pme->doLJ && pme->ljpme_combination_rule == eljpmeLB) */ + } /* if (pme->doLJ && pme->ljpme_combination_rule == LongRangeVdW::LB) */ if (stepWork.computeForces && pme->nnodes > 1) { - wallcycle_start(wcycle, ewcPME_REDISTXF); - for (d = 0; d < pme->ndecompdim; d++) + wallcycle_start(wcycle, WallCycleCounter::PmeRedistXF); + for (int d = 0; d < pme->ndecompdim; d++) { gmx::ArrayRef forcesRef; if (d == pme->ndecompdim - 1) @@ -1578,13 +1595,13 @@ int gmx_pme_do(struct gmx_pme_t* pme, { forcesRef = pme->atc[d + 1].f; } - if (DOMAINDECOMP(cr)) + if (haveDDAtomOrdering(*cr)) { dd_pmeredist_f(pme, &pme->atc[d], forcesRef, d == pme->ndecompdim - 1 && pme->bPPnode); } } - wallcycle_stop(wcycle, ewcPME_REDISTXF); + wallcycle_stop(wcycle, WallCycleCounter::PmeRedistXF); } if (computeEnergyAndVirial) @@ -1609,10 +1626,6 @@ int gmx_pme_do(struct gmx_pme_t* pme, } } } - if (debug) - { - fprintf(debug, "Electrostatic PME mesh energy: %g\n", *energy_q); - } } else { @@ -1640,10 +1653,6 @@ int gmx_pme_do(struct gmx_pme_t* pme, } } } - if (debug) - { - fprintf(debug, "Lennard-Jones PME mesh energy: %g\n", *energy_lj); - } } else { @@ -1660,8 +1669,6 @@ void gmx_pme_destroy(gmx_pme_t* pme) return; } - delete pme->boxScaler; - sfree(pme->nnx); sfree(pme->nny); sfree(pme->nnz); @@ -1697,9 +1704,6 @@ void gmx_pme_destroy(gmx_pme_t* pme) pme_free_all_work(&pme->solve_work, pme->nthread); } - sfree(pme->sum_qgrid_tmp); - sfree(pme->sum_qgrid_dd_tmp); - destroy_pme_spline_work(pme->spline_work); if (pme->gpu != nullptr) @@ -1710,13 +1714,16 @@ void gmx_pme_destroy(gmx_pme_t* pme) delete pme; } -void gmx_pme_reinit_atoms(gmx_pme_t* pme, const int numAtoms, const real* chargesA, const real* chargesB) +void gmx_pme_reinit_atoms(gmx_pme_t* pme, + const int numAtoms, + gmx::ArrayRef chargesA, + gmx::ArrayRef chargesB) { if (pme->gpu != nullptr) { - GMX_ASSERT(!(pme->bFEP_q && chargesB == nullptr), + GMX_ASSERT(!(pme->bFEP_q && chargesB.empty()), "B state charges must be specified if running Coulomb FEP on the GPU"); - pme_gpu_reinit_atoms(pme->gpu, numAtoms, chargesA, pme->bFEP_q ? chargesB : nullptr); + pme_gpu_reinit_atoms(pme->gpu, numAtoms, chargesA.data(), pme->bFEP_q ? chargesB.data() : nullptr); } else { @@ -1729,3 +1736,23 @@ bool gmx_pme_grid_matches(const gmx_pme_t& pme, const ivec grid_size) { return (pme.nkx == grid_size[XX] && pme.nky == grid_size[YY] && pme.nkz == grid_size[ZZ]); } + +void gmx::SeparatePmeRanksPermitted::disablePmeRanks(const std::string& reason) +{ + permitSeparatePmeRanks_ = false; + + if (!reason.empty()) + { + reasons_.push_back(reason); + } +} + +bool gmx::SeparatePmeRanksPermitted::permitSeparatePmeRanks() const +{ + return permitSeparatePmeRanks_; +} + +std::string gmx::SeparatePmeRanksPermitted::reasonsWhyDisabled() const +{ + return joinStrings(reasons_, "; "); +}