2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "nonbonded.h"
47 #include "thread_mpi/threads.h"
49 #include "gromacs/gmxlib/nrnb.h"
50 #include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
51 #include "gromacs/gmxlib/nonbonded/nb_generic.h"
52 #include "gromacs/gmxlib/nonbonded/nb_generic_cg.h"
53 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
54 #include "gromacs/listed-forces/bonded.h"
55 #include "gromacs/math/utilities.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdtypes/enerdata.h"
58 #include "gromacs/mdtypes/forcerec.h"
59 #include "gromacs/mdtypes/md_enums.h"
60 #include "gromacs/mdtypes/mdatom.h"
61 #include "gromacs/mdtypes/nblist.h"
62 #include "gromacs/pbcutil/ishift.h"
63 #include "gromacs/pbcutil/mshift.h"
64 #include "gromacs/pbcutil/pbc.h"
65 #include "gromacs/tables/forcetable.h"
66 #include "gromacs/utility/arraysize.h"
67 #include "gromacs/utility/basedefinitions.h"
68 #include "gromacs/utility/cstringutil.h"
69 #include "gromacs/utility/fatalerror.h"
70 #include "gromacs/utility/smalloc.h"
72 /* Different default (c) and SIMD instructions interaction-specific kernels */
73 #if !GMX_CLANG_ANALYZER
74 #include "gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h"
77 #if GMX_SIMD_X86_SSE2 && !GMX_DOUBLE
78 # include "gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h"
80 #if GMX_SIMD_X86_SSE4_1 && !GMX_DOUBLE
81 # include "gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
83 #if GMX_SIMD_X86_AVX_128_FMA && !GMX_DOUBLE
84 # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
86 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && !GMX_DOUBLE
87 # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
89 #if GMX_SIMD_X86_SSE2 && GMX_DOUBLE
90 # include "gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h"
92 #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
93 # include "gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
95 #if GMX_SIMD_X86_AVX_128_FMA && GMX_DOUBLE
96 # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
98 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
99 # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
101 #if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
102 # include "gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
106 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
107 static gmx_bool nonbonded_setup_done = FALSE;
111 gmx_nonbonded_setup(t_forcerec * fr,
112 gmx_bool bGenericKernelOnly)
114 tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
115 /* Here we are guaranteed only one thread made it. */
116 if (!nonbonded_setup_done)
118 if (!bGenericKernelOnly)
120 /* Add the generic kernels to the structure stored statically in nb_kernel.c */
121 #if !GMX_CLANG_ANALYZER
122 nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
125 if (!(fr != nullptr && !fr->use_simd_kernels))
127 /* Add interaction-specific kernels for different architectures */
128 /* Single precision */
129 #if GMX_SIMD_X86_SSE2 && !GMX_DOUBLE
130 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
132 #if GMX_SIMD_X86_SSE4_1 && !GMX_DOUBLE
133 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
135 #if GMX_SIMD_X86_AVX_128_FMA && !GMX_DOUBLE
136 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
138 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && !GMX_DOUBLE
139 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
141 /* Double precision */
142 #if GMX_SIMD_X86_SSE2 && GMX_DOUBLE
143 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
145 #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
146 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
148 #if GMX_SIMD_X86_AVX_128_FMA && GMX_DOUBLE
149 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
151 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
152 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
154 #if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
155 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
157 ; /* empty statement to avoid a completely empty block */
160 /* Create a hash for faster lookups */
161 nb_kernel_list_hash_init();
163 nonbonded_setup_done = TRUE;
165 tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
171 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwSwitchDiffers)
174 const char * elec_mod;
176 const char * vdw_mod;
183 int simd_padding_width;
187 /* Single precision */
188 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && !GMX_DOUBLE
189 { "avx_256_single", 8 },
191 #if GMX_SIMD_X86_AVX_128_FMA && !GMX_DOUBLE
192 { "avx_128_fma_single", 4 },
194 #if GMX_SIMD_X86_SSE4_1 && !GMX_DOUBLE
195 { "sse4_1_single", 4 },
197 #if GMX_SIMD_X86_SSE2 && !GMX_DOUBLE
198 { "sse2_single", 4 },
200 /* Double precision */
201 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
202 { "avx_256_double", 4 },
204 #if GMX_SIMD_X86_AVX_128_FMA && GMX_DOUBLE
205 /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
206 * since the kernels execute a loop unrolled a factor 2, followed by
207 * a possible single odd-element epilogue.
209 { "avx_128_fma_double", 1 },
211 #if GMX_SIMD_X86_SSE2 && GMX_DOUBLE
212 /* No padding - see comment above */
213 { "sse2_double", 1 },
215 #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
216 /* No padding - see comment above */
217 { "sse4_1_double", 1 },
219 #if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
220 /* No padding - see comment above */
221 { "sparc64_hpc_ace_double", 1 },
225 int narch = asize(arch_and_padding);
228 if (!nonbonded_setup_done)
230 /* We typically call this setup routine before starting timers,
231 * but if that has not been done for whatever reason we do it now.
233 gmx_nonbonded_setup(nullptr, FALSE);
239 nl->kernelptr_vf = nullptr;
240 nl->kernelptr_v = nullptr;
241 nl->kernelptr_f = nullptr;
243 elec = gmx_nbkernel_elec_names[nl->ielec];
244 elec_mod = eintmod_names[nl->ielecmod];
245 vdw = gmx_nbkernel_vdw_names[nl->ivdw];
246 vdw_mod = eintmod_names[nl->ivdwmod];
247 geom = gmx_nblist_geometry_names[nl->igeometry];
249 if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
251 nl->kernelptr_vf = reinterpret_cast<void *>(gmx_nb_free_energy_kernel);
252 nl->kernelptr_f = reinterpret_cast<void *>(gmx_nb_free_energy_kernel);
253 nl->simd_padding_width = 1;
255 else if (!gmx_strcasecmp_min(geom, "CG-CG"))
257 nl->kernelptr_vf = reinterpret_cast<void *>(gmx_nb_generic_cg_kernel);
258 nl->kernelptr_f = reinterpret_cast<void *>(gmx_nb_generic_cg_kernel);
259 nl->simd_padding_width = 1;
263 /* Try to find a specific kernel first */
265 for (i = 0; i < narch && nl->kernelptr_vf == nullptr; i++)
267 nl->kernelptr_vf = reinterpret_cast<void *>(nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce"));
268 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
270 for (i = 0; i < narch && nl->kernelptr_f == nullptr; i++)
272 nl->kernelptr_f = reinterpret_cast<void *>(nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force"));
273 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
275 /* If there is not force-only optimized kernel, is there a potential & force one? */
276 if (nl->kernelptr_f == nullptr)
278 nl->kernelptr_f = reinterpret_cast<void *>(nb_kernel_list_findkernel(nullptr, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce"));
279 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
283 /* For now, the accelerated kernels cannot handle the combination of switch functions for both
284 * electrostatics and VdW that use different switch radius or switch cutoff distances
285 * (both of them enter in the switch function calculation). This would require
286 * us to evaluate two completely separate switch functions for every interaction.
287 * Instead, we disable such kernels by setting the pointer to NULL.
288 * This will cause the generic kernel (which can handle it) to be called instead.
290 * Note that we typically already enable tabulated coulomb interactions for this case,
291 * so this is mostly a safe-guard to make sure we call the generic kernel if the
292 * tables are disabled.
294 if ((nl->ielec != GMX_NBKERNEL_ELEC_NONE) && (nl->ielecmod == eintmodPOTSWITCH) &&
295 (nl->ivdw != GMX_NBKERNEL_VDW_NONE) && (nl->ivdwmod == eintmodPOTSWITCH) &&
296 bElecAndVdwSwitchDiffers)
298 nl->kernelptr_vf = nullptr;
299 nl->kernelptr_f = nullptr;
302 /* Give up, pick a generic one instead.
303 * We only do this for particle-particle kernels; by leaving the water-optimized kernel
304 * pointers to NULL, the water optimization will automatically be disabled for this interaction.
306 if (nl->kernelptr_vf == nullptr && !gmx_strcasecmp_min(geom, "Particle-Particle"))
308 nl->kernelptr_vf = reinterpret_cast<void *>(gmx_nb_generic_kernel);
309 nl->kernelptr_f = reinterpret_cast<void *>(gmx_nb_generic_kernel);
310 nl->simd_padding_width = 1;
314 "WARNING - Slow generic NB kernel used for neighborlist with\n"
315 " Elec: '%s', Modifier: '%s'\n"
316 " Vdw: '%s', Modifier: '%s'\n",
317 elec, elec_mod, vdw, vdw_mod);
323 void do_nonbonded(const t_forcerec *fr,
326 const t_mdatoms *mdatoms,
327 const t_blocka *excl,
328 gmx_grppairener_t *grppener,
337 int n, n0, n1, i, i0, i1;
339 nb_kernel_data_t kernel_data;
340 nb_kernel_t * kernelptr = nullptr;
343 kernel_data.flags = flags;
344 kernel_data.exclusions = excl;
345 kernel_data.lambda = lambda;
346 kernel_data.dvdl = dvdl;
350 gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
375 for (n = n0; (n < n1); n++)
377 nblists = &fr->nblists[n];
379 /* Tabulated kernels hard-code a lot of assumptions about the
380 * structure of these tables, but that's not worth fixing with
381 * the group scheme due for removal soon. As a token
382 * improvement, this assertion will stop code segfaulting if
383 * someone assumes that extending the group-scheme table-type
384 * enumeration is something that GROMACS supports. */
385 static_assert(etiNR == 3, "");
387 kernel_data.table_elec = nblists->table_elec;
388 kernel_data.table_vdw = nblists->table_vdw;
389 kernel_data.table_elec_vdw = nblists->table_elec_vdw;
394 if (!(flags & GMX_NONBONDED_DO_SR))
398 kernel_data.energygrp_elec = grppener->ener[egCOULSR];
399 kernel_data.energygrp_vdw = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
400 nlist = nblists->nlist_sr;
404 for (i = i0; (i < i1); i++)
406 if (nlist[i].nri > 0)
408 if (flags & GMX_NONBONDED_DO_POTENTIAL)
410 /* Potential and force */
411 kernelptr = reinterpret_cast<nb_kernel_t *>(nlist[i].kernelptr_vf);
415 /* Force only, no potential */
416 kernelptr = reinterpret_cast<nb_kernel_t *>(nlist[i].kernelptr_f);
419 if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
421 /* We don't need the non-perturbed interactions */
424 /* Neighborlists whose kernelptr==NULL will always be empty */
425 if (kernelptr != nullptr)
427 (*kernelptr)(&(nlist[i]), x, f, const_cast<t_forcerec*>(fr),
428 const_cast<t_mdatoms*>(mdatoms), &kernel_data, nrnb);
432 gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned.");