* \author Sebastian Keller <keller@cscs.ch>
* \author Artem Zhmurov <zhmurov@gmail.com>
*/
+#include <algorithm>
+
#include "nblib/box.h"
#include "nblib/exception.h"
#include "nblib/pbc.hpp"
int nthr,
const Box& box) :
numThreads(nthr),
- masterForceBuffer_(bufferSize, Vec3{ 0, 0, 0 }),
+ threadedForceBuffers_(numThreads),
+ threadedShiftForceBuffers_(numThreads),
pbcHolder_(std::make_unique<PbcHolder>(PbcType::Xyz, box))
{
// split up the work
threadedInteractions_ = splitListedWork(interactions, bufferSize, numThreads);
// set up the reduction buffers
- int threadRange = bufferSize / numThreads;
+ int threadRange = (bufferSize + numThreads - 1) / numThreads;
+#pragma omp parallel for num_threads(numThreads) schedule(static)
for (int i = 0; i < numThreads; ++i)
{
int rangeStart = i * threadRange;
rangeEnd = bufferSize;
}
- threadedForceBuffers_.push_back(std::make_unique<ForceBuffer<Vec3>>(
- masterForceBuffer_.data(), rangeStart, rangeEnd));
+ threadedForceBuffers_[i] = ForceBufferProxy<Vec3>(rangeStart, rangeEnd);
+ threadedShiftForceBuffers_[i] = std::vector<Vec3>(gmx::c_numShiftVectors);
}
}
-void ListedForceCalculator::computeForcesAndEnergies(gmx::ArrayRef<const Vec3> x, bool usePbc)
+template<class ShiftForce>
+void ListedForceCalculator::computeForcesAndEnergies(gmx::ArrayRef<const Vec3> x,
+ gmx::ArrayRef<Vec3> forces,
+ [[maybe_unused]] gmx::ArrayRef<ShiftForce> shiftForces,
+ bool usePbc)
{
+ if (x.size() != forces.size())
+ {
+ throw InputException("Coordinates array and force buffer size mismatch");
+ }
+
energyBuffer_.fill(0);
std::vector<std::array<real, std::tuple_size<ListedInteractionData>::value>> energiesPerThread(numThreads);
+ constexpr bool haveShiftForces = !std::is_same_v<ShiftForce, std::nullptr_t>;
+ if constexpr (haveShiftForces)
+ {
+ if (shiftForces.size() != gmx::c_numShiftVectors)
+ {
+ throw InputException("Shift vectors array size mismatch");
+ }
+ }
+
#pragma omp parallel for num_threads(numThreads) schedule(static)
for (int thread = 0; thread < numThreads; ++thread)
{
+ std::conditional_t<haveShiftForces, gmx::ArrayRef<Vec3>, gmx::ArrayRef<std::nullptr_t>> shiftForceBuffer;
+ if constexpr (haveShiftForces)
+ {
+ shiftForceBuffer = gmx::ArrayRef<Vec3>(threadedShiftForceBuffers_[thread]);
+ std::fill(shiftForceBuffer.begin(), shiftForceBuffer.end(), Vec3{ 0, 0, 0 });
+ }
+
+ ForceBufferProxy<Vec3>* threadBuffer = &threadedForceBuffers_[thread];
+
+ // forces in range of this thread are directly written into the output buffer
+ threadBuffer->setMasterBuffer(forces);
+
+ // zero out the outliers in the thread buffer
+ threadBuffer->clearOutliers();
+
if (usePbc)
{
energiesPerThread[thread] = reduceListedForces(
- threadedInteractions_[thread], x, threadedForceBuffers_[thread].get(), *pbcHolder_);
+ threadedInteractions_[thread], x, threadBuffer, shiftForceBuffer, *pbcHolder_);
}
else
{
energiesPerThread[thread] = reduceListedForces(
- threadedInteractions_[thread], x, threadedForceBuffers_[thread].get(), NoPbc{});
+ threadedInteractions_[thread], x, threadBuffer, shiftForceBuffer, NoPbc{});
+ }
+ }
+
+ // reduce shift forces
+ // This is a potential candidate for OMP parallelization, but attention should be paid to the
+ // relative costs of thread synchronization overhead vs reduction cost in contexts where the
+ // number of threads could be large vs where number of threads could be small
+ if constexpr (haveShiftForces)
+ {
+ for (int i = 0; i < gmx::c_numShiftVectors; ++i)
+ {
+ Vec3 threadSum{ 0, 0, 0 };
+ for (int thread = 0; thread < numThreads; ++thread)
+ {
+ threadSum += (threadedShiftForceBuffers_[thread])[i];
+ }
+ shiftForces[i] += threadSum;
}
}
energyBuffer_[type] += energiesPerThread[thread][type];
}
}
-
// reduce forces
#pragma omp parallel for num_threads(numThreads) schedule(static)
for (int thread = 0; thread < numThreads; ++thread)
{
- auto& thisBuffer = *threadedForceBuffers_[thread];
+ auto& thisBuffer = threadedForceBuffers_[thread];
// access outliers from other threads
for (int otherThread = 0; otherThread < numThreads; ++otherThread)
{
- auto& otherBuffer = *threadedForceBuffers_[otherThread];
+ auto& otherBuffer = threadedForceBuffers_[otherThread];
for (const auto& outlier : otherBuffer)
{
int index = outlier.first;
if (thisBuffer.inRange(index))
{
auto force = outlier.second;
- masterForceBuffer_[index] += force;
+ forces[index] += force;
}
}
}
}
}
-void ListedForceCalculator::compute(gmx::ArrayRef<const Vec3> coordinates, gmx::ArrayRef<Vec3> forces, bool usePbc)
+void ListedForceCalculator::compute(gmx::ArrayRef<const Vec3> coordinates,
+ gmx::ArrayRef<Vec3> forces,
+ gmx::ArrayRef<Vec3> shiftForces,
+ gmx::ArrayRef<real> energies,
+ bool usePbc)
{
- if (coordinates.size() != forces.size())
- {
- throw InputException("Coordinates array and force buffer size mismatch");
- }
-
- // check if the force buffers have the same size
- if (masterForceBuffer_.size() != forces.size())
- {
- throw InputException("Input force buffer size mismatch with listed forces buffer");
- }
-
- // compute forces and fill in local buffers
- computeForcesAndEnergies(coordinates, usePbc);
-
- // add forces to output force buffers
- for (int pIndex = 0; pIndex < int(forces.size()); pIndex++)
+ computeForcesAndEnergies(coordinates, forces, shiftForces, usePbc);
+ if (!energies.empty())
{
- forces[pIndex] += masterForceBuffer_[pIndex];
+ std::copy(energyBuffer_.begin(), energyBuffer_.end(), energies.begin());
}
}
+
void ListedForceCalculator::compute(gmx::ArrayRef<const Vec3> coordinates,
gmx::ArrayRef<Vec3> forces,
- EnergyType& energies,
+ gmx::ArrayRef<real> energies,
bool usePbc)
{
- compute(coordinates, forces, usePbc);
-
- energies = energyBuffer_;
+ computeForcesAndEnergies(coordinates, forces, gmx::ArrayRef<std::nullptr_t>{}, usePbc);
+ if (!energies.empty())
+ {
+ std::copy(energyBuffer_.begin(), energyBuffer_.end(), energies.begin());
+ }
}
} // namespace nblib
class Box;
class PbcHolder;
template<class T>
-class ForceBuffer;
+class ForceBufferProxy;
/*! \internal \brief Object to calculate forces and energies of listed forces
*
* This function also stores the forces and energies from listed interactions in the internal
* buffer of the ListedForceCalculator object
*
- * \param[in] coordinates to be used for the force calculation
- * \param[out] forces buffer to store the output forces
+ * \param[in] coordinates input coordinates for the force calculation
+ * \param[inout] forces output for adding the forces
+ * \param[inout] shiftForces output for adding shift forces
+ * \param[out] energies output for potential energies
+ * \param[in] usePbc whether or not to consider periodic boundary conditions
*/
- void compute(gmx::ArrayRef<const Vec3> coordinates, gmx::ArrayRef<Vec3> forces, bool usePbc = false);
+ void compute(gmx::ArrayRef<const Vec3> coordinates,
+ gmx::ArrayRef<Vec3> forces,
+ gmx::ArrayRef<Vec3> shiftForces,
+ gmx::ArrayRef<real> energies,
+ bool usePbc = false);
- //! \brief Alternative overload with the energies in an output buffer
+ //! \brief Alternative overload without shift forces
void compute(gmx::ArrayRef<const Vec3> coordinates,
gmx::ArrayRef<Vec3> forces,
- EnergyType& energies,
+ gmx::ArrayRef<real> energies,
bool usePbc = false);
//! \brief default, but moved to separate compilation unit
private:
int numThreads;
- //! the main buffer to hold the final listed forces
- std::vector<gmx::RVec> masterForceBuffer_;
-
//! holds the array of energies computed
EnergyType energyBuffer_;
std::vector<ListedInteractionData> threadedInteractions_;
//! reduction force buffers
- std::vector<std::unique_ptr<ForceBuffer<gmx::RVec>>> threadedForceBuffers_;
+ std::vector<ForceBufferProxy<Vec3>> threadedForceBuffers_;
+
+ //! reduction shift force buffers
+ std::vector<std::vector<Vec3>> threadedShiftForceBuffers_;
//! PBC objects
std::unique_ptr<PbcHolder> pbcHolder_;
//! compute listed forces and energies, overwrites the internal buffers
- void computeForcesAndEnergies(gmx::ArrayRef<const Vec3> x, bool usePbc = false);
+ template<class ShiftForce>
+ void computeForcesAndEnergies(gmx::ArrayRef<const Vec3> x,
+ gmx::ArrayRef<Vec3> forces,
+ [[maybe_unused]] gmx::ArrayRef<ShiftForce> shiftForces,
+ bool usePbc = false);
};
} // namespace nblib
#include <unordered_map>
+#include "gromacs/utility/arrayref.h"
+
#include "nblib/pbc.hpp"
#include "definitions.h"
#include "nblib/util/util.hpp"
}
} // namespace detail
-/*! \internal \brief object to store forces for multithreaded listed forces computation
+/*! \internal \brief proxy object to access forces in an underlying buffer
+ *
+ * Depending on the index, either the underlying master buffer, or local
+ * storage for outliers is accessed. This object does not own the master buffer.
*
*/
template<class T>
-class ForceBuffer
+class ForceBufferProxy
{
using HashMap = std::unordered_map<int, T>;
public:
- ForceBuffer() : rangeStart(0), rangeEnd(0) { }
+ ForceBufferProxy() : rangeStart_(0), rangeEnd_(0) { }
- ForceBuffer(T* mbuf, int rs, int re) :
- masterForceBuffer(mbuf),
- rangeStart(rs),
- rangeEnd(re)
+ ForceBufferProxy(int rangeStart, int rangeEnd) : rangeStart_(rangeStart), rangeEnd_(rangeEnd)
{
}
- void clear() { outliers.clear(); }
+ void clearOutliers() { outliers.clear(); }
inline NBLIB_ALWAYS_INLINE T& operator[](int i)
{
- if (i >= rangeStart && i < rangeEnd)
+ if (i >= rangeStart_ && i < rangeEnd_)
{
return masterForceBuffer[i];
}
typename HashMap::const_iterator begin() { return outliers.begin(); }
typename HashMap::const_iterator end() { return outliers.end(); }
- [[nodiscard]] bool inRange(int index) const { return (index >= rangeStart && index < rangeEnd); }
+ [[nodiscard]] bool inRange(int index) const { return (index >= rangeStart_ && index < rangeEnd_); }
+
+ void setMasterBuffer(gmx::ArrayRef<T> buffer) { masterForceBuffer = buffer; }
private:
- T* masterForceBuffer;
- int rangeStart;
- int rangeEnd;
+ gmx::ArrayRef<T> masterForceBuffer;
+ int rangeStart_;
+ int rangeEnd_;
HashMap outliers;
};