#include "pme_pp_communication.h"
/*! \brief environment variable to enable GPU P2P communication */
-static const bool c_enableGpuPmePpComms = (getenv("GMX_GPU_PME_PP_COMMS") != nullptr)
- && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
+static const bool c_enableGpuPmePpComms =
+ (getenv("GMX_GPU_PME_PP_COMMS") != nullptr) && GMX_THREAD_MPI && (GMX_GPU == GMX_GPU_CUDA);
/*! \brief Master PP-PME communication data structure */
-struct gmx_pme_pp {
+struct gmx_pme_pp
+{
MPI_Comm mpi_comm_mysim; /**< MPI communicator for this simulation */
std::vector<PpRanks> ppRanks; /**< The PP partner ranks */
int peerRankId; /**< The peer PP rank id */
std::vector<real> sigmaA;
std::vector<real> sigmaB;
//@}
- gmx::HostVector<gmx::RVec> x; /**< Vector of atom coordinates to transfer to PME ranks */
- std::vector<gmx::RVec> f; /**< Vector of atom forces received from PME ranks */
+ gmx::HostVector<gmx::RVec> x; /**< Vector of atom coordinates to transfer to PME ranks */
+ std::vector<gmx::RVec> f; /**< Vector of atom forces received from PME ranks */
//@{
/**< Vectors of MPI objects used in non-blocking communication between multiple PP ranks per PME rank */
std::vector<MPI_Request> req;
/*! \brief object for receiving coordinates using communications operating on GPU memory space */
std::unique_ptr<gmx::PmeCoordinateReceiverGpu> pmeCoordinateReceiverGpu;
/*! \brief object for sending PME force using communications operating on GPU memory space */
- std::unique_ptr<gmx::PmeForceSenderGpu> pmeForceSenderGpu;
+ std::unique_ptr<gmx::PmeForceSenderGpu> pmeForceSenderGpu;
/*! \brief whether GPU direct communications are active for PME-PP transfers */
bool useGpuDirectComm = false;
};
/*! \brief Initialize the PME-only side of the PME <-> PP communication */
-static std::unique_ptr<gmx_pme_pp> gmx_pme_pp_init(const t_commrec *cr)
+static std::unique_ptr<gmx_pme_pp> gmx_pme_pp_init(const t_commrec* cr)
{
auto pme_pp = std::make_unique<gmx_pme_pp>();
MPI_Comm_rank(cr->mpi_comm_mygroup, &rank);
auto ppRanks = get_pme_ddranks(cr, rank);
pme_pp->ppRanks.reserve(ppRanks.size());
- for (const auto &ppRankId : ppRanks)
+ for (const auto& ppRankId : ppRanks)
{
- pme_pp->ppRanks.push_back({ppRankId, 0});
+ pme_pp->ppRanks.push_back({ ppRankId, 0 });
}
// The peer PP rank is the last one.
pme_pp->peerRankId = pme_pp->ppRanks.back().rankId;
- pme_pp->req.resize(eCommType_NR*pme_pp->ppRanks.size());
- pme_pp->stat.resize(eCommType_NR*pme_pp->ppRanks.size());
+ pme_pp->req.resize(eCommType_NR * pme_pp->ppRanks.size());
+ pme_pp->stat.resize(eCommType_NR * pme_pp->ppRanks.size());
#else
GMX_UNUSED_VALUE(cr);
#endif
static void reset_pmeonly_counters(gmx_wallcycle_t wcycle,
gmx_walltime_accounting_t walltime_accounting,
- t_nrnb *nrnb,
+ t_nrnb* nrnb,
int64_t step,
bool useGpuForPme)
{
}
}
-static gmx_pme_t *gmx_pmeonly_switch(std::vector<gmx_pme_t *> *pmedata,
- const ivec grid_size,
- real ewaldcoeff_q, real ewaldcoeff_lj,
- const t_commrec *cr, const t_inputrec *ir)
+static gmx_pme_t* gmx_pmeonly_switch(std::vector<gmx_pme_t*>* pmedata,
+ const ivec grid_size,
+ real ewaldcoeff_q,
+ real ewaldcoeff_lj,
+ const t_commrec* cr,
+ const t_inputrec* ir)
{
GMX_ASSERT(pmedata, "Bad PME tuning list pointer");
- for (auto &pme : *pmedata)
+ for (auto& pme : *pmedata)
{
GMX_ASSERT(pme, "Bad PME tuning list element pointer");
- if (pme->nkx == grid_size[XX] &&
- pme->nky == grid_size[YY] &&
- pme->nkz == grid_size[ZZ])
+ if (pme->nkx == grid_size[XX] && pme->nky == grid_size[YY] && pme->nkz == grid_size[ZZ])
{
/* Here we have found an existing PME data structure that suits us.
* However, in the GPU case, we have to reinitialize it - there's only one GPU structure.
}
}
- const auto &pme = pmedata->back();
- gmx_pme_t *newStructure = nullptr;
+ const auto& pme = pmedata->back();
+ gmx_pme_t* newStructure = nullptr;
// Copy last structure with new grid params
gmx_pme_reinit(&newStructure, cr, pme, ir, grid_size, ewaldcoeff_q, ewaldcoeff_lj);
pmedata->push_back(newStructure);
* \param[out] maxshift_y Maximum shift in Y direction, if received.
* \param[out] lambda_q Free-energy lambda for electrostatics, if received.
* \param[out] lambda_lj Free-energy lambda for Lennard-Jones, if received.
- * \param[out] bEnerVir Set to true if this is an energy/virial calculation step, otherwise set to false.
- * \param[out] step MD integration step number.
- * \param[out] grid_size PME grid size, if received.
- * \param[out] ewaldcoeff_q Ewald cut-off parameter for electrostatics, if received.
- * \param[out] ewaldcoeff_lj Ewald cut-off parameter for Lennard-Jones, if received.
- * \param[in] useGpuForPme flag on whether PME is on GPU
- * \param[in] stateGpu GPU state propagator object
- * \param[in] runMode PME run mode
+ * \param[out] bEnerVir Set to true if this is an energy/virial calculation step, otherwise
+ * set to false. \param[out] step MD integration step number. \param[out] grid_size PME
+ * grid size, if received. \param[out] ewaldcoeff_q Ewald cut-off parameter for
+ * electrostatics, if received. \param[out] ewaldcoeff_lj Ewald cut-off parameter for
+ * Lennard-Jones, if received. \param[in] useGpuForPme flag on whether PME is on GPU \param[in]
+ * stateGpu GPU state propagator object \param[in] runMode PME run mode
*
* \retval pmerecvqxX All parameters were set, chargeA and chargeB can be NULL.
* \retval pmerecvqxFINISH No parameters were set.
* \retval pmerecvqxSWITCHGRID Only grid_size and *ewaldcoeff were set.
* \retval pmerecvqxRESETCOUNTERS *step was set.
*/
-static int gmx_pme_recv_coeffs_coords(struct gmx_pme_t *pme,
- gmx_pme_pp *pme_pp,
- int *natoms,
+static int gmx_pme_recv_coeffs_coords(struct gmx_pme_t* pme,
+ gmx_pme_pp* pme_pp,
+ int* natoms,
matrix box,
- int *maxshift_x,
- int *maxshift_y,
- real *lambda_q,
- real *lambda_lj,
- gmx_bool *bEnerVir,
- int64_t *step,
- ivec *grid_size,
- real *ewaldcoeff_q,
- real *ewaldcoeff_lj,
+ int* maxshift_x,
+ int* maxshift_y,
+ real* lambda_q,
+ real* lambda_lj,
+ gmx_bool* bEnerVir,
+ int64_t* step,
+ ivec* grid_size,
+ real* ewaldcoeff_q,
+ real* ewaldcoeff_lj,
bool useGpuForPme,
- gmx::StatePropagatorDataGpu *stateGpu,
- PmeRunMode gmx_unused runMode)
+ gmx::StatePropagatorDataGpu* stateGpu,
+ PmeRunMode gmx_unused runMode)
{
int status = -1;
int nat = 0;
cnb.flags = 0;
/* Receive the send count, box and time step from the peer PP node */
- MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE,
- pme_pp->peerRankId, eCommType_CNB,
+ MPI_Recv(&cnb, sizeof(cnb), MPI_BYTE, pme_pp->peerRankId, eCommType_CNB,
pme_pp->mpi_comm_mysim, MPI_STATUS_IGNORE);
/* We accumulate all received flags */
flags |= cnb.flags;
- *step = cnb.step;
+ *step = cnb.step;
if (debug)
{
fprintf(debug, "PME only rank receiving:%s%s%s%s%s\n",
- (cnb.flags & PP_PME_CHARGE) ? " charges" : "",
- (cnb.flags & PP_PME_COORD ) ? " coordinates" : "",
- (cnb.flags & PP_PME_FINISH) ? " finish" : "",
- (cnb.flags & PP_PME_SWITCHGRID) ? " switch grid" : "",
+ (cnb.flags & PP_PME_CHARGE) ? " charges" : "",
+ (cnb.flags & PP_PME_COORD) ? " coordinates" : "",
+ (cnb.flags & PP_PME_FINISH) ? " finish" : "",
+ (cnb.flags & PP_PME_SWITCHGRID) ? " switch grid" : "",
(cnb.flags & PP_PME_RESETCOUNTERS) ? " reset counters" : "");
}
pme_pp->useGpuDirectComm = ((cnb.flags & PP_PME_GPUCOMMS) != 0);
GMX_ASSERT(!pme_pp->useGpuDirectComm || (pme_pp->pmeForceSenderGpu != nullptr),
"The use of GPU direct communication for PME-PP is enabled, "
- "but the PME GPU force reciever object does not exist" );
+ "but the PME GPU force reciever object does not exist");
if (cnb.flags & PP_PME_FINISH)
{
*ewaldcoeff_q = cnb.ewaldcoeff_q;
*ewaldcoeff_lj = cnb.ewaldcoeff_lj;
- status = pmerecvqxSWITCHGRID;
+ status = pmerecvqxSWITCHGRID;
}
if (cnb.flags & PP_PME_RESETCOUNTERS)
atomSetChanged = true;
/* Receive the send counts from the other PP nodes */
- for (auto &sender : pme_pp->ppRanks)
+ for (auto& sender : pme_pp->ppRanks)
{
if (sender.rankId == pme_pp->peerRankId)
{
}
else
{
- MPI_Irecv(&sender.numAtoms, sizeof(sender.numAtoms),
- MPI_BYTE,
- sender.rankId, eCommType_CNB,
- pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
+ MPI_Irecv(&sender.numAtoms, sizeof(sender.numAtoms), MPI_BYTE, sender.rankId,
+ eCommType_CNB, pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
}
}
MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data());
messages = 0;
nat = 0;
- for (const auto &sender : pme_pp->ppRanks)
+ for (const auto& sender : pme_pp->ppRanks)
{
nat += sender.numAtoms;
}
/* Receive the charges in place */
for (int q = 0; q < eCommType_NR; q++)
{
- real *bufferPtr;
+ real* bufferPtr;
- if (!(cnb.flags & (PP_PME_CHARGE<<q)))
+ if (!(cnb.flags & (PP_PME_CHARGE << q)))
{
continue;
}
switch (q)
{
- case eCommType_ChargeA: bufferPtr = pme_pp->chargeA.data(); break;
- case eCommType_ChargeB: bufferPtr = pme_pp->chargeB.data(); break;
+ case eCommType_ChargeA: bufferPtr = pme_pp->chargeA.data(); break;
+ case eCommType_ChargeB: bufferPtr = pme_pp->chargeB.data(); break;
case eCommType_SQRTC6A: bufferPtr = pme_pp->sqrt_c6A.data(); break;
case eCommType_SQRTC6B: bufferPtr = pme_pp->sqrt_c6B.data(); break;
- case eCommType_SigmaA: bufferPtr = pme_pp->sigmaA.data(); break;
- case eCommType_SigmaB: bufferPtr = pme_pp->sigmaB.data(); break;
+ case eCommType_SigmaA: bufferPtr = pme_pp->sigmaA.data(); break;
+ case eCommType_SigmaB: bufferPtr = pme_pp->sigmaB.data(); break;
default: gmx_incons("Wrong eCommType");
}
nat = 0;
- for (const auto &sender : pme_pp->ppRanks)
+ for (const auto& sender : pme_pp->ppRanks)
{
if (sender.numAtoms > 0)
{
- MPI_Irecv(bufferPtr+nat,
- sender.numAtoms*sizeof(real),
- MPI_BYTE,
- sender.rankId, q,
- pme_pp->mpi_comm_mysim,
- &pme_pp->req[messages++]);
+ MPI_Irecv(bufferPtr + nat, sender.numAtoms * sizeof(real), MPI_BYTE,
+ sender.rankId, q, pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
nat += sender.numAtoms;
if (debug)
{
- fprintf(debug, "Received from PP rank %d: %d %s\n",
- sender.rankId, sender.numAtoms,
- (q == eCommType_ChargeA ||
- q == eCommType_ChargeB) ? "charges" : "params");
+ fprintf(debug, "Received from PP rank %d: %d %s\n", sender.rankId,
+ sender.numAtoms,
+ (q == eCommType_ChargeA || q == eCommType_ChargeB) ? "charges"
+ : "params");
}
}
}
}
if (pme_pp->useGpuDirectComm)
{
- GMX_ASSERT(runMode == PmeRunMode::GPU, "GPU Direct PME-PP communication has been enabled, "
+ GMX_ASSERT(runMode == PmeRunMode::GPU,
+ "GPU Direct PME-PP communication has been enabled, "
"but PME run mode is not PmeRunMode::GPU\n");
// This rank will have its data accessed directly by PP rank, so needs to send the remote addresses.
rvec* d_x = nullptr;
rvec* d_f = nullptr;
-#if (GMX_GPU == GMX_GPU_CUDA) //avoid invalid cast for OpenCL
- d_x = reinterpret_cast<rvec*> (pme_gpu_get_device_x(pme));
- d_f = reinterpret_cast<rvec*> (pme_gpu_get_device_f(pme));
-#endif
+# if (GMX_GPU == GMX_GPU_CUDA) // avoid invalid cast for OpenCL
+ d_x = reinterpret_cast<rvec*>(pme_gpu_get_device_x(pme));
+ d_f = reinterpret_cast<rvec*>(pme_gpu_get_device_f(pme));
+# endif
pme_pp->pmeCoordinateReceiverGpu->sendCoordinateBufferAddressToPpRanks(d_x);
pme_pp->pmeForceSenderGpu->sendForceBufferAddressToPpRanks(d_f);
}
/* The box, FE flag and lambda are sent along with the coordinates
* */
copy_mat(cnb.box, box);
- *lambda_q = cnb.lambda_q;
- *lambda_lj = cnb.lambda_lj;
- *bEnerVir = ((cnb.flags & PP_PME_ENER_VIR) != 0U);
- *step = cnb.step;
+ *lambda_q = cnb.lambda_q;
+ *lambda_lj = cnb.lambda_lj;
+ *bEnerVir = ((cnb.flags & PP_PME_ENER_VIR) != 0U);
+ *step = cnb.step;
/* Receive the coordinates in place */
nat = 0;
- for (const auto &sender : pme_pp->ppRanks)
+ for (const auto& sender : pme_pp->ppRanks)
{
if (sender.numAtoms > 0)
{
}
else
{
- MPI_Irecv(pme_pp->x[nat],
- sender.numAtoms*sizeof(rvec),
- MPI_BYTE,
- sender.rankId, eCommType_COORD,
- pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
+ MPI_Irecv(pme_pp->x[nat], sender.numAtoms * sizeof(rvec), MPI_BYTE, sender.rankId,
+ eCommType_COORD, pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
}
nat += sender.numAtoms;
if (debug)
{
- fprintf(debug, "Received from PP rank %d: %d "
+ fprintf(debug,
+ "Received from PP rank %d: %d "
"coordinates\n",
sender.rankId, sender.numAtoms);
}
/* Wait for the coordinates and/or charges to arrive */
MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data());
messages = 0;
- }
- while (status == -1);
+ } while (status == -1);
#else
GMX_UNUSED_VALUE(pme_pp);
GMX_UNUSED_VALUE(box);
if (status == pmerecvqxX)
{
- *natoms = nat;
+ *natoms = nat;
}
return status;
}
/*! \brief Send force data to PP ranks */
-static void sendFToPP(void* sendbuf, PpRanks receiver, gmx_pme_pp *pme_pp, int *messages)
+static void sendFToPP(void* sendbuf, PpRanks receiver, gmx_pme_pp* pme_pp, int* messages)
{
if (pme_pp->useGpuDirectComm)
{
GMX_ASSERT((pme_pp->pmeForceSenderGpu != nullptr),
"The use of GPU direct communication for PME-PP is enabled, "
- "but the PME GPU force reciever object does not exist" );
+ "but the PME GPU force reciever object does not exist");
pme_pp->pmeForceSenderGpu->sendFToPpCudaDirect(receiver.rankId);
}
else
{
#if GMX_MPI
- //Send using MPI
- MPI_Isend(sendbuf, receiver.numAtoms*sizeof(rvec), MPI_BYTE, receiver.rankId,
- 0, pme_pp->mpi_comm_mysim, &pme_pp->req[*messages]);
- *messages = *messages+1;
+ // Send using MPI
+ MPI_Isend(sendbuf, receiver.numAtoms * sizeof(rvec), MPI_BYTE, receiver.rankId, 0,
+ pme_pp->mpi_comm_mysim, &pme_pp->req[*messages]);
+ *messages = *messages + 1;
#endif
}
-
}
/*! \brief Send the PME mesh force, virial and energy to the PP-only ranks. */
-static void gmx_pme_send_force_vir_ener(const gmx_pme_t &pme,
- gmx_pme_pp *pme_pp,
- const PmeOutput &output,
- real dvdlambda_q, real dvdlambda_lj,
- float cycles)
+static void gmx_pme_send_force_vir_ener(const gmx_pme_t& pme,
+ gmx_pme_pp* pme_pp,
+ const PmeOutput& output,
+ real dvdlambda_q,
+ real dvdlambda_lj,
+ float cycles)
{
#if GMX_MPI
gmx_pme_comm_vir_ene_t cve;
/* Now the evaluated forces have to be transferred to the PP nodes */
messages = 0;
ind_end = 0;
- for (const auto &receiver : pme_pp->ppRanks)
+ for (const auto& receiver : pme_pp->ppRanks)
{
- ind_start = ind_end;
- ind_end = ind_start + receiver.numAtoms;
- void *sendbuf = const_cast<void *>(static_cast<const void *>(output.forces_[ind_start]));
+ ind_start = ind_end;
+ ind_end = ind_start + receiver.numAtoms;
+ void* sendbuf = const_cast<void*>(static_cast<const void*>(output.forces_[ind_start]));
if (pme_pp->useGpuDirectComm)
{
- //Data will be transferred directly from GPU.
- rvec* d_f = reinterpret_cast<rvec*> (pme_gpu_get_device_f(&pme));
- sendbuf = reinterpret_cast<void*> (&d_f[ind_start]);
+ // Data will be transferred directly from GPU.
+ rvec* d_f = reinterpret_cast<rvec*>(pme_gpu_get_device_f(&pme));
+ sendbuf = reinterpret_cast<void*>(&d_f[ind_start]);
}
sendFToPP(sendbuf, receiver, pme_pp, &messages);
}
if (debug)
{
- fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n",
- pme_pp->peerRankId);
+ fprintf(debug, "PME rank sending to PP rank %d: virial and energy\n", pme_pp->peerRankId);
}
- MPI_Isend(&cve, sizeof(cve), MPI_BYTE,
- pme_pp->peerRankId, 1,
- pme_pp->mpi_comm_mysim, &pme_pp->req[messages++]);
+ MPI_Isend(&cve, sizeof(cve), MPI_BYTE, pme_pp->peerRankId, 1, pme_pp->mpi_comm_mysim,
+ &pme_pp->req[messages++]);
/* Wait for the forces to arrive */
MPI_Waitall(messages, pme_pp->req.data(), pme_pp->stat.data());
#endif
}
-int gmx_pmeonly(struct gmx_pme_t *pme,
- const t_commrec *cr, t_nrnb *mynrnb,
- gmx_wallcycle *wcycle,
+int gmx_pmeonly(struct gmx_pme_t* pme,
+ const t_commrec* cr,
+ t_nrnb* mynrnb,
+ gmx_wallcycle* wcycle,
gmx_walltime_accounting_t walltime_accounting,
- t_inputrec *ir, PmeRunMode runMode)
+ t_inputrec* ir,
+ PmeRunMode runMode)
{
- int ret;
- int natoms = 0;
- matrix box;
- real lambda_q = 0;
- real lambda_lj = 0;
- int maxshift_x = 0, maxshift_y = 0;
- real dvdlambda_q, dvdlambda_lj;
- float cycles;
- int count;
- gmx_bool bEnerVir = FALSE;
- int64_t step;
+ int ret;
+ int natoms = 0;
+ matrix box;
+ real lambda_q = 0;
+ real lambda_lj = 0;
+ int maxshift_x = 0, maxshift_y = 0;
+ real dvdlambda_q, dvdlambda_lj;
+ float cycles;
+ int count;
+ gmx_bool bEnerVir = FALSE;
+ int64_t step;
/* This data will only use with PME tuning, i.e. switching PME grids */
- std::vector<gmx_pme_t *> pmedata;
+ std::vector<gmx_pme_t*> pmedata;
pmedata.push_back(pme);
- auto pme_pp = gmx_pme_pp_init(cr);
- //TODO the variable below should be queried from the task assignment info
- const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed);
- const void *commandStream = useGpuForPme ? pme_gpu_get_device_stream(pme) : nullptr;
- const void *deviceContext = useGpuForPme ? pme_gpu_get_device_context(pme) : nullptr;
- const int paddingSize = pme_gpu_get_padding_size(pme);
+ auto pme_pp = gmx_pme_pp_init(cr);
+ // TODO the variable below should be queried from the task assignment info
+ const bool useGpuForPme = (runMode == PmeRunMode::GPU) || (runMode == PmeRunMode::Mixed);
+ const void* commandStream = useGpuForPme ? pme_gpu_get_device_stream(pme) : nullptr;
+ const void* deviceContext = useGpuForPme ? pme_gpu_get_device_context(pme) : nullptr;
+ const int paddingSize = pme_gpu_get_padding_size(pme);
if (useGpuForPme)
{
changePinningPolicy(&pme_pp->chargeA, pme_get_pinning_policy());
changePinningPolicy(&pme_pp->x, pme_get_pinning_policy());
if (c_enableGpuPmePpComms)
{
- pme_pp->pmeCoordinateReceiverGpu =
- std::make_unique<gmx::PmeCoordinateReceiverGpu>(pme_gpu_get_device_stream(pme),
- pme_pp->mpi_comm_mysim,
- pme_pp->ppRanks);
- pme_pp->pmeForceSenderGpu =
- std::make_unique<gmx::PmeForceSenderGpu>(pme_gpu_get_device_stream(pme),
- pme_pp->mpi_comm_mysim,
- pme_pp->ppRanks);
+ pme_pp->pmeCoordinateReceiverGpu = std::make_unique<gmx::PmeCoordinateReceiverGpu>(
+ pme_gpu_get_device_stream(pme), pme_pp->mpi_comm_mysim, pme_pp->ppRanks);
+ pme_pp->pmeForceSenderGpu = std::make_unique<gmx::PmeForceSenderGpu>(
+ pme_gpu_get_device_stream(pme), pme_pp->mpi_comm_mysim, pme_pp->ppRanks);
}
}
{
// TODO: Special PME-only constructor is used here. There is no mechanism to prevent from using the other constructor here.
// This should be made safer.
- stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(commandStream, deviceContext, GpuApiCallBehavior::Async, paddingSize);
+ stateGpu = std::make_unique<gmx::StatePropagatorDataGpu>(
+ commandStream, deviceContext, GpuApiCallBehavior::Async, paddingSize);
}
{
/* Domain decomposition */
ivec newGridSize;
- real ewaldcoeff_q = 0, ewaldcoeff_lj = 0;
- ret = gmx_pme_recv_coeffs_coords(pme,
- pme_pp.get(),
- &natoms,
- box,
- &maxshift_x, &maxshift_y,
- &lambda_q, &lambda_lj,
- &bEnerVir,
- &step,
- &newGridSize,
- &ewaldcoeff_q,
- &ewaldcoeff_lj,
- useGpuForPme,
- stateGpu.get(),
- runMode);
+ real ewaldcoeff_q = 0, ewaldcoeff_lj = 0;
+ ret = gmx_pme_recv_coeffs_coords(pme, pme_pp.get(), &natoms, box, &maxshift_x,
+ &maxshift_y, &lambda_q, &lambda_lj, &bEnerVir, &step,
+ &newGridSize, &ewaldcoeff_q, &ewaldcoeff_lj,
+ useGpuForPme, stateGpu.get(), runMode);
if (ret == pmerecvqxSWITCHGRID)
{
/* Reset the cycle and flop counters */
reset_pmeonly_counters(wcycle, walltime_accounting, mynrnb, step, useGpuForPme);
}
- }
- while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS);
+ } while (ret == pmerecvqxSWITCHGRID || ret == pmerecvqxRESETCOUNTERS);
if (ret == pmerecvqxFINISH)
{
{
const bool boxChanged = false;
const bool useGpuPmeForceReduction = pme_pp->useGpuDirectComm;
- //TODO this should be set properly by gmx_pme_recv_coeffs_coords,
+ // TODO this should be set properly by gmx_pme_recv_coeffs_coords,
// or maybe use inputrecDynamicBox(ir), at the very least - change this when this codepath is tested!
pme_gpu_prepare_computation(pme, boxChanged, box, wcycle, pmeFlags, useGpuPmeForceReduction);
if (!pme_pp->useGpuDirectComm)
}
else
{
- GMX_ASSERT(pme_pp->x.size() == static_cast<size_t>(natoms), "The coordinate buffer should have size natoms");
-
- gmx_pme_do(pme, pme_pp->x, pme_pp->f,
- pme_pp->chargeA.data(), pme_pp->chargeB.data(),
- pme_pp->sqrt_c6A.data(), pme_pp->sqrt_c6B.data(),
- pme_pp->sigmaA.data(), pme_pp->sigmaB.data(), box,
- cr, maxshift_x, maxshift_y, mynrnb, wcycle,
- output.coulombVirial_, output.lennardJonesVirial_,
- &output.coulombEnergy_, &output.lennardJonesEnergy_,
- lambda_q, lambda_lj, &dvdlambda_q, &dvdlambda_lj,
- pmeFlags);
+ GMX_ASSERT(pme_pp->x.size() == static_cast<size_t>(natoms),
+ "The coordinate buffer should have size natoms");
+
+ gmx_pme_do(pme, pme_pp->x, pme_pp->f, pme_pp->chargeA.data(), pme_pp->chargeB.data(),
+ pme_pp->sqrt_c6A.data(), pme_pp->sqrt_c6B.data(), pme_pp->sigmaA.data(),
+ pme_pp->sigmaB.data(), box, cr, maxshift_x, maxshift_y, mynrnb, wcycle,
+ output.coulombVirial_, output.lennardJonesVirial_, &output.coulombEnergy_,
+ &output.lennardJonesEnergy_, lambda_q, lambda_lj, &dvdlambda_q,
+ &dvdlambda_lj, pmeFlags);
output.forces_ = pme_pp->f;
}
cycles = wallcycle_stop(wcycle, ewcPMEMESH);
- gmx_pme_send_force_vir_ener(*pme, pme_pp.get(), output,
- dvdlambda_q, dvdlambda_lj, cycles);
+ gmx_pme_send_force_vir_ener(*pme, pme_pp.get(), output, dvdlambda_q, dvdlambda_lj, cycles);
count++;
} /***** end of quasi-loop, we stop with the break above */