src/gromacs/gmxlib/nonbonded/nonbonded.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2017,2018, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "nonbonded.h"
  40
  41 #include "config.h"
  42
  43 #include <cassert>
  44 #include <cstdio>
  45 #include <cstdlib>
  46
  47 #include "thread_mpi/threads.h"
  48
  49 #include "gromacs/gmxlib/nrnb.h"
  50 #include "gromacs/gmxlib/nonbonded/nb_free_energy.h"
  51 #include "gromacs/gmxlib/nonbonded/nb_generic.h"
  52 #include "gromacs/gmxlib/nonbonded/nb_generic_cg.h"
  53 #include "gromacs/gmxlib/nonbonded/nb_kernel.h"
  54 #include "gromacs/listed-forces/bonded.h"
  55 #include "gromacs/math/utilities.h"
  56 #include "gromacs/math/vec.h"
  57 #include "gromacs/mdtypes/forcerec.h"
  58 #include "gromacs/mdtypes/md_enums.h"
  59 #include "gromacs/mdtypes/mdatom.h"
  60 #include "gromacs/mdtypes/nblist.h"
  61 #include "gromacs/pbcutil/ishift.h"
  62 #include "gromacs/pbcutil/mshift.h"
  63 #include "gromacs/pbcutil/pbc.h"
  64 #include "gromacs/tables/forcetable.h"
  65 #include "gromacs/utility/arraysize.h"
  66 #include "gromacs/utility/basedefinitions.h"
  67 #include "gromacs/utility/cstringutil.h"
  68 #include "gromacs/utility/fatalerror.h"
  69 #include "gromacs/utility/smalloc.h"
  70
  71 /* Different default (c) and SIMD instructions interaction-specific kernels */
  72 #include "gromacs/gmxlib/nonbonded/nb_kernel_c/nb_kernel_c.h"
  73
  74 #if GMX_SIMD_X86_SSE2 && !GMX_DOUBLE
  75 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sse2_single/nb_kernel_sse2_single.h"
  76 #endif
  77 #if GMX_SIMD_X86_SSE4_1 && !GMX_DOUBLE
  78 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_single/nb_kernel_sse4_1_single.h"
  79 #endif
  80 #if GMX_SIMD_X86_AVX_128_FMA && !GMX_DOUBLE
  81 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_single/nb_kernel_avx_128_fma_single.h"
  82 #endif
  83 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && !GMX_DOUBLE
  84 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.h"
  85 #endif
  86 #if GMX_SIMD_X86_SSE2 && GMX_DOUBLE
  87 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sse2_double/nb_kernel_sse2_double.h"
  88 #endif
  89 #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
  90 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sse4_1_double/nb_kernel_sse4_1_double.h"
  91 #endif
  92 #if GMX_SIMD_X86_AVX_128_FMA && GMX_DOUBLE
  93 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_128_fma_double/nb_kernel_avx_128_fma_double.h"
  94 #endif
  95 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
  96 #    include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h"
  97 #endif
  98 #if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
  99 #    include "gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h"
 100 #endif
 101
 102
 103 static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER;
 104 static gmx_bool            nonbonded_setup_done  = FALSE;
 105
 106
 107 void
 108 gmx_nonbonded_setup(t_forcerec *   fr,
 109                     gmx_bool       bGenericKernelOnly)
 110 {
 111     tMPI_Thread_mutex_lock(&nonbonded_setup_mutex);
 112     /* Here we are guaranteed only one thread made it. */
 113     if (nonbonded_setup_done == FALSE)
 114     {
 115         if (bGenericKernelOnly == FALSE)
 116         {
 117             /* Add the generic kernels to the structure stored statically in nb_kernel.c */
 118             nb_kernel_list_add_kernels(kernellist_c, kernellist_c_size);
 119
 120             if (!(fr != nullptr && fr->use_simd_kernels == FALSE))
 121             {
 122                 /* Add interaction-specific kernels for different architectures */
 123                 /* Single precision */
 124 #if GMX_SIMD_X86_SSE2 && !GMX_DOUBLE
 125                 nb_kernel_list_add_kernels(kernellist_sse2_single, kernellist_sse2_single_size);
 126 #endif
 127 #if GMX_SIMD_X86_SSE4_1 && !GMX_DOUBLE
 128                 nb_kernel_list_add_kernels(kernellist_sse4_1_single, kernellist_sse4_1_single_size);
 129 #endif
 130 #if GMX_SIMD_X86_AVX_128_FMA && !GMX_DOUBLE
 131                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_single, kernellist_avx_128_fma_single_size);
 132 #endif
 133 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && !GMX_DOUBLE
 134                 nb_kernel_list_add_kernels(kernellist_avx_256_single, kernellist_avx_256_single_size);
 135 #endif
 136                 /* Double precision */
 137 #if GMX_SIMD_X86_SSE2 && GMX_DOUBLE
 138                 nb_kernel_list_add_kernels(kernellist_sse2_double, kernellist_sse2_double_size);
 139 #endif
 140 #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
 141                 nb_kernel_list_add_kernels(kernellist_sse4_1_double, kernellist_sse4_1_double_size);
 142 #endif
 143 #if GMX_SIMD_X86_AVX_128_FMA && GMX_DOUBLE
 144                 nb_kernel_list_add_kernels(kernellist_avx_128_fma_double, kernellist_avx_128_fma_double_size);
 145 #endif
 146 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
 147                 nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size);
 148 #endif
 149 #if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
 150                 nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size);
 151 #endif
 152                 ; /* empty statement to avoid a completely empty block */
 153             }
 154         }
 155         /* Create a hash for faster lookups */
 156         nb_kernel_list_hash_init();
 157
 158         nonbonded_setup_done = TRUE;
 159     }
 160     tMPI_Thread_mutex_unlock(&nonbonded_setup_mutex);
 161 }
 162
 163
 164
 165 void
 166 gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwSwitchDiffers)
 167 {
 168     const char *     elec;
 169     const char *     elec_mod;
 170     const char *     vdw;
 171     const char *     vdw_mod;
 172     const char *     geom;
 173     const char *     other;
 174
 175     struct
 176     {
 177         const char *  arch;
 178         int           simd_padding_width;
 179     }
 180     arch_and_padding[] =
 181     {
 182         /* Single precision */
 183 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && !GMX_DOUBLE
 184         { "avx_256_single", 8 },
 185 #endif
 186 #if GMX_SIMD_X86_AVX_128_FMA && !GMX_DOUBLE
 187         { "avx_128_fma_single", 4 },
 188 #endif
 189 #if GMX_SIMD_X86_SSE4_1 && !GMX_DOUBLE
 190         { "sse4_1_single", 4 },
 191 #endif
 192 #if GMX_SIMD_X86_SSE2 && !GMX_DOUBLE
 193         { "sse2_single", 4 },
 194 #endif
 195         /* Double precision */
 196 #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE
 197         { "avx_256_double", 4 },
 198 #endif
 199 #if GMX_SIMD_X86_AVX_128_FMA && GMX_DOUBLE
 200         /* Sic. Double precision 2-way SIMD does not require neighbor list padding,
 201          * since the kernels execute a loop unrolled a factor 2, followed by
 202          * a possible single odd-element epilogue.
 203          */
 204         { "avx_128_fma_double", 1 },
 205 #endif
 206 #if GMX_SIMD_X86_SSE2 && GMX_DOUBLE
 207         /* No padding - see comment above */
 208         { "sse2_double", 1 },
 209 #endif
 210 #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE
 211         /* No padding - see comment above */
 212         { "sse4_1_double", 1 },
 213 #endif
 214 #if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE
 215         /* No padding - see comment above */
 216         { "sparc64_hpc_ace_double", 1 },
 217 #endif
 218         { "c", 1 },
 219     };
 220     int              narch = asize(arch_and_padding);
 221     int              i;
 222
 223     if (nonbonded_setup_done == FALSE)
 224     {
 225         /* We typically call this setup routine before starting timers,
 226          * but if that has not been done for whatever reason we do it now.
 227          */
 228         gmx_nonbonded_setup(nullptr, FALSE);
 229     }
 230
 231     /* Not used yet */
 232     other = "";
 233
 234     nl->kernelptr_vf = nullptr;
 235     nl->kernelptr_v  = nullptr;
 236     nl->kernelptr_f  = nullptr;
 237
 238     elec     = gmx_nbkernel_elec_names[nl->ielec];
 239     elec_mod = eintmod_names[nl->ielecmod];
 240     vdw      = gmx_nbkernel_vdw_names[nl->ivdw];
 241     vdw_mod  = eintmod_names[nl->ivdwmod];
 242     geom     = gmx_nblist_geometry_names[nl->igeometry];
 243
 244     if (nl->type == GMX_NBLIST_INTERACTION_FREE_ENERGY)
 245     {
 246         nl->kernelptr_vf       = (void *) gmx_nb_free_energy_kernel;
 247         nl->kernelptr_f        = (void *) gmx_nb_free_energy_kernel;
 248         nl->simd_padding_width = 1;
 249     }
 250     else if (!gmx_strcasecmp_min(geom, "CG-CG"))
 251     {
 252         nl->kernelptr_vf       = (void *) gmx_nb_generic_cg_kernel;
 253         nl->kernelptr_f        = (void *) gmx_nb_generic_cg_kernel;
 254         nl->simd_padding_width = 1;
 255     }
 256     else
 257     {
 258         /* Try to find a specific kernel first */
 259
 260         for (i = 0; i < narch && nl->kernelptr_vf == nullptr; i++)
 261         {
 262             nl->kernelptr_vf       = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 263             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 264         }
 265         for (i = 0; i < narch && nl->kernelptr_f == nullptr; i++)
 266         {
 267             nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(log, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "Force");
 268             nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 269
 270             /* If there is not force-only optimized kernel, is there a potential & force one? */
 271             if (nl->kernelptr_f == nullptr)
 272             {
 273                 nl->kernelptr_f        = (void *) nb_kernel_list_findkernel(nullptr, arch_and_padding[i].arch, elec, elec_mod, vdw, vdw_mod, geom, other, "PotentialAndForce");
 274                 nl->simd_padding_width = arch_and_padding[i].simd_padding_width;
 275             }
 276         }
 277
 278         /* For now, the accelerated kernels cannot handle the combination of switch functions for both
 279          * electrostatics and VdW that use different switch radius or switch cutoff distances
 280          * (both of them enter in the switch function calculation). This would require
 281          * us to evaluate two completely separate switch functions for every interaction.
 282          * Instead, we disable such kernels by setting the pointer to NULL.
 283          * This will cause the generic kernel (which can handle it) to be called instead.
 284          *
 285          * Note that we typically already enable tabulated coulomb interactions for this case,
 286          * so this is mostly a safe-guard to make sure we call the generic kernel if the
 287          * tables are disabled.
 288          */
 289         if ((nl->ielec != GMX_NBKERNEL_ELEC_NONE) && (nl->ielecmod == eintmodPOTSWITCH) &&
 290             (nl->ivdw  != GMX_NBKERNEL_VDW_NONE)  && (nl->ivdwmod  == eintmodPOTSWITCH) &&
 291             bElecAndVdwSwitchDiffers)
 292         {
 293             nl->kernelptr_vf = nullptr;
 294             nl->kernelptr_f  = nullptr;
 295         }
 296
 297         /* Give up, pick a generic one instead.
 298          * We only do this for particle-particle kernels; by leaving the water-optimized kernel
 299          * pointers to NULL, the water optimization will automatically be disabled for this interaction.
 300          */
 301         if (nl->kernelptr_vf == nullptr && !gmx_strcasecmp_min(geom, "Particle-Particle"))
 302         {
 303             nl->kernelptr_vf       = (void *) gmx_nb_generic_kernel;
 304             nl->kernelptr_f        = (void *) gmx_nb_generic_kernel;
 305             nl->simd_padding_width = 1;
 306             if (debug)
 307             {
 308                 fprintf(debug,
 309                         "WARNING - Slow generic NB kernel used for neighborlist with\n"
 310                         "    Elec: '%s', Modifier: '%s'\n"
 311                         "    Vdw:  '%s', Modifier: '%s'\n",
 312                         elec, elec_mod, vdw, vdw_mod);
 313             }
 314         }
 315     }
 316     return;
 317 }
 318
 319 void do_nonbonded(const t_forcerec  *fr,
 320                   rvec               x[],
 321                   rvec               f_shortrange[],
 322                   const t_mdatoms   *mdatoms,
 323                   const t_blocka    *excl,
 324                   gmx_grppairener_t *grppener,
 325                   t_nrnb            *nrnb,
 326                   real              *lambda,
 327                   real              *dvdl,
 328                   int                nls,
 329                   int                eNL,
 330                   int                flags)
 331 {
 332     t_nblist *        nlist;
 333     int               n, n0, n1, i, i0, i1;
 334     t_nblists *       nblists;
 335     nb_kernel_data_t  kernel_data;
 336     nb_kernel_t *     kernelptr = nullptr;
 337     rvec *            f;
 338
 339     kernel_data.flags                   = flags;
 340     kernel_data.exclusions              = excl;
 341     kernel_data.lambda                  = lambda;
 342     kernel_data.dvdl                    = dvdl;
 343
 344     if (fr->bAllvsAll)
 345     {
 346         gmx_incons("All-vs-all kernels have not been implemented in version 4.6");
 347         return;
 348     }
 349
 350     if (eNL >= 0)
 351     {
 352         i0 = eNL;
 353         i1 = i0+1;
 354     }
 355     else
 356     {
 357         i0 = 0;
 358         i1 = eNL_NR;
 359     }
 360
 361     if (nls >= 0)
 362     {
 363         n0 = nls;
 364         n1 = nls+1;
 365     }
 366     else
 367     {
 368         n0 = 0;
 369         n1 = fr->nnblists;
 370     }
 371
 372     for (n = n0; (n < n1); n++)
 373     {
 374         nblists = &fr->nblists[n];
 375
 376         /* Tabulated kernels hard-code a lot of assumptions about the
 377          * structure of these tables, but that's not worth fixing with
 378          * the group scheme due for removal soon. As a token
 379          * improvement, this assertion will stop code segfaulting if
 380          * someone assumes that extending the group-scheme table-type
 381          * enumeration is something that GROMACS supports. */
 382         /* cppcheck-suppress duplicateExpression */
 383         assert(etiNR == 3);
 384
 385         kernel_data.table_elec              = nblists->table_elec;
 386         kernel_data.table_vdw               = nblists->table_vdw;
 387         kernel_data.table_elec_vdw          = nblists->table_elec_vdw;
 388
 389         {
 390             {
 391                 /* Short-range */
 392                 if (!(flags & GMX_NONBONDED_DO_SR))
 393                 {
 394                     continue;
 395                 }
 396                 kernel_data.energygrp_elec          = grppener->ener[egCOULSR];
 397                 kernel_data.energygrp_vdw           = grppener->ener[fr->bBHAM ? egBHAMSR : egLJSR];
 398                 nlist = nblists->nlist_sr;
 399                 f                                   = f_shortrange;
 400             }
 401
 402             for (i = i0; (i < i1); i++)
 403             {
 404                 if (nlist[i].nri > 0)
 405                 {
 406                     if (flags & GMX_NONBONDED_DO_POTENTIAL)
 407                     {
 408                         /* Potential and force */
 409                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_vf;
 410                     }
 411                     else
 412                     {
 413                         /* Force only, no potential */
 414                         kernelptr = (nb_kernel_t *)nlist[i].kernelptr_f;
 415                     }
 416
 417                     if (nlist[i].type != GMX_NBLIST_INTERACTION_FREE_ENERGY && (flags & GMX_NONBONDED_DO_FOREIGNLAMBDA))
 418                     {
 419                         /* We don't need the non-perturbed interactions */
 420                         continue;
 421                     }
 422                     /* Neighborlists whose kernelptr==NULL will always be empty */
 423                     if (kernelptr != nullptr)
 424                     {
 425                         (*kernelptr)(&(nlist[i]), x, f, fr, mdatoms, &kernel_data, nrnb);
 426                     }
 427                     else
 428                     {
 429                         gmx_fatal(FARGS, "Non-empty neighborlist does not have any kernel pointer assigned.");
 430                     }
 431                 }
 432             }
 433         }
 434     }
 435 }