2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2017,2018,2019,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements AlignedAllocator.
39 * \author Erik Lindahl <erik.lindahl@gmail.com>
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
41 * \ingroup module_utility
45 #include "alignedallocator.h"
54 # include <mm_malloc.h>
57 #elif HAVE_XMMINTRIN_H
58 # include <xmmintrin.h>
65 #if GMX_NATIVE_WINDOWS
66 # include <windows.h> // only for the page size query purposes
69 #include "gromacs/utility/gmxassert.h"
77 /*! \brief Allocate aligned memory in a fully portable way
79 * \param bytes Amount of memory (bytes) to allocate. The routine will return
80 * nullptr if the allocation fails. However, note that asking for
81 * zero bytes will return a pointer that is non-null and properly
82 * aligned (but obviously you cannot use it, since you promised
83 * not to access data beyond the 0 bytes you asked for).
85 * \param alignment Alignment specification in bytes, must be a power of 2.
87 * \return Nonzero pointer if the allocation worked, otherwise nullptr.
88 * This routine should only be called from alignedMalloc(), which also does
89 * the checking for valid values. This particular function is used for platforms
90 * where we have no control of the alignment of memory returned by the system.
91 * Instead, we increase the amount of memory requested internally such that we
92 * both can create a pointer inside this memory that fulfills the memory
93 * alignment requested, and that we have room to store the original pointer
94 * just before this area.
96 * \note This is an internal routine that should only be called from
97 * gmx::alignedMalloc(). Just like system-provided routines, it provides
98 * memory that is aligned - but not padded.
100 * \note This functionality is provided by C++17 std::aligned_alloc,
101 * and it would be preferable to use that instead, however it is not
102 * yet widely enough available to depend on. See #3968.
104 gmx_unused void* alignedMallocGeneric(std::size_t bytes, std::size_t alignment)
106 // The amount of extra memory (beyound what the user asked for) we need is:
107 // - sizeof(void *), to store the original pointer
108 // - alignment, to make sure we have an aligned pointer in the area
109 void* pMalloc = malloc(bytes + sizeof(void*) + alignment);
111 if (pMalloc == nullptr)
116 // Convert pMalloc to size_t (so we work with raw bytes), add the space we
117 // need to save the original pointer, and (alignment-1) bytes, and then mask
118 // out the lowest bits.
119 std::size_t mask = ~static_cast<std::size_t>(alignment - 1);
120 void* pAligned = reinterpret_cast<void*>(
121 (reinterpret_cast<std::size_t>(pMalloc) + sizeof(void*) + alignment - 1) & mask);
123 // Store original pointer. Since we allocated at least sizeof(void *) extra
124 // space this is always a valid memory location.
125 reinterpret_cast<void**>(pAligned)[-1] = pMalloc;
131 /*! \brief Free aligned memory
133 * \param p Memory pointer previously returned from
134 * gmx::internal::alignedFreePortable().
136 * Since this routine relies on the original pointer being stored just before
137 * the memory area p points to, bad things will happen if you call this routine
138 * with a pointer obtained any other way, or if you call the system free()
139 * with a pointer obtained from std::alignedMalloc().
141 * \note This is an internal routine that should only be called from
142 * gmx::alignedFree().
144 gmx_unused void alignedFreeGeneric(void* p)
148 // Pick up the pointer stored just below p, and use that to call free()
149 free(reinterpret_cast<void**>(p)[-1]);
153 //! Implement malloc of \c bytes of memory, aligned to \c alignment.
154 void* mallocImpl(std::size_t bytes, std::size_t alignment)
159 p = _mm_malloc(bytes, alignment);
160 #elif HAVE_POSIX_MEMALIGN
161 if (posix_memalign(&p, alignment, bytes) != 0)
166 p = memalign(alignment, bytes);
167 #elif HAVE__ALIGNED_MALLOC
168 p = _aligned_malloc(bytes, alignment);
170 p = internal::alignedMallocGeneric(bytes, alignment);
176 //! Free aligned memory allocated with mallocImpl().
177 void freeImpl(void* p)
183 #elif HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN
185 #elif HAVE__ALIGNED_MALLOC
188 internal::alignedFreeGeneric(p);
195 // === AlignedAllocationPolicy
197 std::size_t AlignedAllocationPolicy::alignment()
199 // For now we always use 128-byte alignment:
200 // 1) IBM Power already has cache lines of 128-bytes, and needs it.
201 // 2) x86 has 64 byte cache lines, but since a future AVX-1024 (rumored?)
202 // will need 1024/8=128 byte SIMD alignment, it is safer to use that
204 // 3) The old Pentium4 used 256-byte cache prefetching (but 64-byte lines).
205 // However, it's not worth worrying about performance for P4...
206 // 4) ARM & Sparc have 64 byte lines, but will be just fine with
207 // 128-byte alignment (nobody knows what the future brings)
209 // So, for now we're semi-lazy and just align to 128 bytes!
211 // TODO LINCS code is copying this assumption independently (for now)
215 void* AlignedAllocationPolicy::malloc(std::size_t bytes)
217 // Pad memory at the end with another alignment bytes to avoid false sharing
218 auto size = alignment();
221 return mallocImpl(bytes, size);
224 void AlignedAllocationPolicy::free(void* p)
229 // === PageAlignedAllocationPolicy
231 //! Return a page size, from a sysconf/WinAPI query if available, or a default guess (4096 bytes).
232 //! \todo Move this function into sysinfo.cpp where other OS-specific code/includes live
233 static std::size_t getPageSize()
236 #if GMX_NATIVE_WINDOWS
238 GetNativeSystemInfo(&si);
239 pageSize = si.dwPageSize;
240 #elif defined(_SC_PAGESIZE)
241 /* Note that sysconf returns -1 on its error conditions, which we
242 don't really need to check, nor can really handle at
243 initialization time. */
244 pageSize = sysconf(_SC_PAGESIZE);
245 #elif defined(_SC_PAGE_SIZE)
246 pageSize = sysconf(_SC_PAGE_SIZE);
250 return ((pageSize == -1) ? 4096 // A useful guess
251 : static_cast<std::size_t>(pageSize));
254 /* Implements the "construct on first use" idiom to avoid the static
255 * initialization order fiasco where a possible static page-aligned
256 * container would be initialized before the alignment variable was.
258 * Note that thread-safety of the initialization is guaranteed by the
259 * C++11 language standard.
261 * The size_t has no destructor, so there is no deinitialization
262 * issue. See https://isocpp.org/wiki/faq/ctors for discussion of
263 * alternatives and trade-offs. */
264 std::size_t PageAlignedAllocationPolicy::alignment()
266 static size_t thePageSize = getPageSize();
270 void* PageAlignedAllocationPolicy::malloc(std::size_t bytes)
272 return mallocImpl(bytes, alignment());
275 void PageAlignedAllocationPolicy::free(void* p)