Merge branch release-2019 into release-2020

author Paul Bauer <paul.bauer.q@gmail.com>

Fri, 28 Feb 2020 09:56:27 +0000 (10:56 +0100)

committer Paul Bauer <paul.bauer.q@gmail.com>

Fri, 28 Feb 2020 09:56:27 +0000 (10:56 +0100)
author Paul Bauer <paul.bauer.q@gmail.com>
Fri, 28 Feb 2020 09:56:27 +0000 (10:56 +0100)
committer Paul Bauer <paul.bauer.q@gmail.com>
Fri, 28 Feb 2020 09:56:27 +0000 (10:56 +0100)
diff --cc src/gromacs/ewald/pme.cpp
Simple merge
diff --cc src/gromacs/gmxana/gmx_disre.cpp

index 66a79e7276350f1aeec58d8a40c7ac4b9d020d07,369cbed25129e73dc73310d9dc3a1f2c0b80dd83..608677f69fdc572f32c9b7009f0c0c681f5cb47c
--- 1/src/gromacs/gmxana/gmx_disre.cpp
--- 2/src/gromacs/gmxana/gmx_disre.cpp
+++ b/src/gromacs/gmxana/gmx_disre.cpp
@@@ -180,22 -172,46 +180,45 @@@ static void check_viol(FILE*       log
       {
           vvindex[j] = 0;
       }
- -    nat = interaction_function[F_DISRES].nratoms+1;
+ +    nat = interaction_function[F_DISRES].nratoms + 1;
+     // Check internal consistency of disres.label
+     // The label for a distance restraint should be at most one larger
+     // than the previous label.
+     int label_old = forceparams[forceatoms[0]].disres.label;
+     for (i = 0; (i < disres->nr); i += nat)
+     {
+         type  = forceatoms[i];
+         label = forceparams[type].disres.label;
- -        if ((label == label_old) || (label == label_old+1))
++        if ((label == label_old) || (label == label_old + 1))
+         {
+             label_old = label;
+         }
+         else
+         {
- -            gmx_fatal(FARGS, "Label mismatch in distance restrains. Label for restraint %d is %d, expected it to be either %d or %d",
- -                      i/nat, label, label_old, label_old+1);
++            gmx_fatal(FARGS,
++                      "Label mismatch in distance restrains. Label for restraint %d is %d, "
++                      "expected it to be either %d or %d",
++                      i / nat, label, label_old, label_old + 1);
+         }
+     }
+     // Get offset for label index
+     label_old = forceparams[forceatoms[0]].disres.label;
- -    for (i = 0; (i < disres->nr); )
+ +    for (i = 0; (i < disres->nr);)
       {
           type  = forceatoms[i];
           n     = 0;
-         label = forceparams[type].disres.label;
+         label = forceparams[type].disres.label - label_old;
           if (debug)
           {
- -            fprintf(debug, "DISRE: ndr = %d, label = %d  i=%d, n =%d\n",
- -                    ndr, label, i, n);
+ +            fprintf(debug, "DISRE: ndr = %d, label = %d  i=%d, n =%d\n", ndr, label, i, n);
           }
           do
           {
               n += nat;
-         } while (((i + n) < disres->nr) && (forceparams[forceatoms[i + n]].disres.label == label));
- -        }
- -        while (((i+n) < disres->nr) &&
- -               (forceparams[forceatoms[i+n]].disres.label == label+label_old));
++        } while (((i + n) < disres->nr)
++                 && (forceparams[forceatoms[i + n]].disres.label == label + label_old));
   
- -        calc_disres_R_6(nullptr, nullptr, n, &forceatoms[i],
- -                        x, pbc, fcd, nullptr);
+ +        calc_disres_R_6(nullptr, nullptr, n, &forceatoms[i], x, pbc, fcd, nullptr);
   
           if (fcd->disres.Rt_6[label] <= 0)
           {
diff --cc src/gromacs/gpu_utils/gpu_utils.cu

index b7e5e0f77ead42f116484c55e26a20a5fcce2cc8,41aedf9f13e9dc6e8a1153e9fa532785077a8747..216a930b67243c194feebecaf8948052d73ed397
--- 1/src/gromacs/gpu_utils/gpu_utils.cu
--- 2/src/gromacs/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gpu_utils/gpu_utils.cu
@@@ -1,8 -1,7 +1,8 @@@
   /*
    * This file is part of the GROMACS molecular simulation package.
    *
-  * Copyright (c) 2010-2018, The GROMACS development team.
-  * Copyright (c) 2019, by the GROMACS development team, led by
- - * Copyright (c) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020, by the GROMACS development team, led by
++ * Copyright (c) 2010,2011,2012,2013,2014,2015,2016, The GROMACS development team.
++ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
    * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
    * and including many others, as listed in the AUTHORS file in the
    * top-level source directory and at http://www.gromacs.org.
@@@ -68,38 -66,39 +68,35 @@@
    *
    * In reality it is 16 with CUDA <=v5.0, but let's stay on the safe side.
    */
- -static int  cuda_max_device_count = 32;
+ +static int cuda_max_device_count = 32;
   
- -static bool cudaProfilerRun      = ((getenv("NVPROF_ID") != nullptr));
+ +static bool cudaProfilerRun = ((getenv("NVPROF_ID") != nullptr));
   
   /** Dummy kernel used for sanity checking. */
- -static __global__ void k_dummy_test(void)
- -{
- -}
+ +static __global__ void k_dummy_test(void) {}
   
- static void checkCompiledTargetCompatibility(int deviceId, const cudaDeviceProp& deviceProp)
- -static cudaError_t checkCompiledTargetCompatibility(int                   deviceId,
- -                                                    const cudaDeviceProp &deviceProp)
++static cudaError_t checkCompiledTargetCompatibility(int deviceId, const cudaDeviceProp& deviceProp)
   {
       cudaFuncAttributes attributes;
       cudaError_t        stat = cudaFuncGetAttributes(&attributes, k_dummy_test);
   
       if (cudaErrorInvalidDeviceFunction == stat)
       {
-         gmx_fatal(FARGS,
-                   "The %s binary does not include support for the CUDA architecture of a "
-                   "detected GPU: %s, ID #%d (compute capability %d.%d). "
-                   "By default, GROMACS supports all architectures of compute "
-                   "capability >= 3.0, so your GPU "
-                   "might be rare, or some architectures were disabled in the build. "
-                   "Consult the install guide for how to use the GMX_CUDA_TARGET_SM and "
-                   "GMX_CUDA_TARGET_COMPUTE CMake variables to add this architecture. "
-                   "To work around this error, use the CUDA_VISIBLE_DEVICES environment"
-                   "variable to pass a list of GPUs that excludes the ID %d.",
-                   gmx::getProgramContext().displayName(), deviceProp.name, deviceId,
-                   deviceProp.major, deviceProp.minor, deviceId);
+         fprintf(stderr,
+                 "\nWARNING: The %s binary does not include support for the CUDA architecture of "
+                 "the GPU ID #%d (compute capability %d.%d) detected during detection. "
+                 "By default, GROMACS supports all architectures of compute "
+                 "capability >= 3.0, so your GPU "
+                 "might be rare, or some architectures were disabled in the build. \n"
+                 "Consult the install guide for how to use the GMX_CUDA_TARGET_SM and "
+                 "GMX_CUDA_TARGET_COMPUTE CMake variables to add this architecture. \n",
- -                gmx::getProgramContext().displayName(), deviceId,
- -                deviceProp.major, deviceProp.minor);
++                gmx::getProgramContext().displayName(), deviceId, deviceProp.major, deviceProp.minor);
       }
   
-     CU_RET_ERR(stat, "cudaFuncGetAttributes failed");
+     return stat;
   }
   
- -bool isHostMemoryPinned(const void *h_ptr)
+ +bool isHostMemoryPinned(const void* h_ptr)
   {
       cudaPointerAttributes memoryAttributes;
       cudaError_t           stat = cudaPointerGetAttributes(&memoryAttributes, h_ptr);
@@@ -200,13 -203,35 +197,36 @@@ static int do_sanity_checks(int dev_id
           }
       }
   
+     cu_err = checkCompiledTargetCompatibility(dev_id, dev_prop);
+     // Avoid triggering an error if GPU devices are in exclusive or prohibited mode;
+     // it is enough to check for cudaErrorDevicesUnavailable only here because
+     // if we encounter it that will happen in cudaFuncGetAttributes in the above function.
+     if (cu_err == cudaErrorDevicesUnavailable)
+     {
+         return -2;
+     }
+     else if (cu_err != cudaSuccess)
+     {
+         return -1;
+     }
+ 
       /* try to execute a dummy kernel */
-     checkCompiledTargetCompatibility(dev_id, dev_prop);
+     try
+     {
+         KernelLaunchConfig config;
- -        config.blockSize[0] = 512;
- -        const auto         dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
++        config.blockSize[0]       = 512;
++        const auto dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
+         launchGpuKernel(k_dummy_test, config, nullptr, "Dummy kernel", dummyArguments);
+     }
- -    catch (gmx::GromacsException &ex)
++    catch (gmx::GromacsException& ex)
+     {
+         // launchGpuKernel error is not fatal and should continue with marking the device bad
- -        fprintf(stderr, "Error occurred while running dummy kernel sanity check on device #%d:\n %s\n",
- -                id, formatExceptionMessageToString(ex).c_str());
++        fprintf(stderr,
++                "Error occurred while running dummy kernel sanity check on device #%d:\n %s\n", id,
++                formatExceptionMessageToString(ex).c_str());
+         return -1;
+     }
   
-     KernelLaunchConfig config;
-     config.blockSize[0]       = 512;
-     const auto dummyArguments = prepareGpuKernelArguments(k_dummy_test, config);
-     launchGpuKernel(k_dummy_test, config, nullptr, "Dummy kernel", dummyArguments);
       if (cudaDeviceSynchronize() != cudaSuccess)
       {
           return -1;
@@@ -315,26 -342,26 +335,30 @@@ static int is_gmx_supported_gpu_id(int 
        * the dummy test kernel fails to execute with a "device busy message" we
        * should appropriately report that the device is busy instead of insane.
        */
-     if (do_sanity_checks(deviceId, deviceProp) != 0)
+     const int checkResult = do_sanity_checks(deviceId, deviceProp);
+     switch (checkResult)
       {
-         return egpuInsane;
- -        case  0: return egpuCompatible;
++        case 0: return egpuCompatible;
+         case -1: return egpuInsane;
+         case -2: return egpuUnavailable;
- -        default: GMX_RELEASE_ASSERT(false, "Invalid do_sanity_checks() return value");
++        default:
++            GMX_RELEASE_ASSERT(false, "Invalid do_sanity_checks() return value");
+             return egpuCompatible;
       }
- 
-     return egpuCompatible;
   }
   
- -bool canDetectGpus(std::string *errorMessage)
+ +bool isGpuDetectionFunctional(std::string* errorMessage)
   {
- -    cudaError_t        stat;
- -    int                driverVersion = -1;
- -    stat = cudaDriverGetVersion(&driverVersion);
- -    GMX_ASSERT(stat != cudaErrorInvalidValue, "An impossible null pointer was passed to cudaDriverGetVersion");
- -    GMX_RELEASE_ASSERT(stat == cudaSuccess,
- -                       gmx::formatString("An unexpected value was returned from cudaDriverGetVersion %s: %s",
- -                                         cudaGetErrorName(stat), cudaGetErrorString(stat)).c_str());
+ +    cudaError_t stat;
+ +    int         driverVersion = -1;
+ +    stat                      = cudaDriverGetVersion(&driverVersion);
+ +    GMX_ASSERT(stat != cudaErrorInvalidValue,
+ +               "An impossible null pointer was passed to cudaDriverGetVersion");
+ +    GMX_RELEASE_ASSERT(
+ +            stat == cudaSuccess,
+ +            gmx::formatString("An unexpected value was returned from cudaDriverGetVersion %s: %s",
+ +                              cudaGetErrorName(stat), cudaGetErrorString(stat))
+ +                    .c_str());
       bool foundDriver = (driverVersion > 0);
       if (!foundDriver)
       {
diff --cc src/gromacs/hardware/gpu_hw_info.cpp

index cf913ed041b79a30cc7a94aa6065a5909abd9654,7afade840532d4683bade75d2bd0245447e3fd89..8c79bf432028c32c3f5475729ac81c6c0f73099f
--- 1/src/gromacs/hardware/gpu_hw_info.cpp
--- 2/src/gromacs/hardware/gpu_hw_info.cpp
+++ b/src/gromacs/hardware/gpu_hw_info.cpp
@@@ -1,7 -1,7 +1,7 @@@
   /*
    * This file is part of the GROMACS molecular simulation package.
    *
-  * Copyright (c) 2017,2018,2019, by the GROMACS development team, led by
- - * Copyright (c) 2017,2018,2020, by the GROMACS development team, led by
++ * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
    * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
    * and including many others, as listed in the AUTHORS file in the
    * top-level source directory and at http://www.gromacs.org.
@@@ -45,7 -45,7 +45,8 @@@
   // TODO If/when we unify CUDA and OpenCL support code, this should
   // move to a single place in gpu_utils.
   /* Names of the GPU detection/check results (see e_gpu_detect_res_t in hw_info.h). */
- -const char * const gpu_detect_res_str[egpuNR] =
- -{
- -    "compatible", "nonexistent", "incompatible", "incompatible (please recompile with GMX_OPENCL_NB_CLUSTER_SIZE=4)", "insane", "unavailable"
+ +const char* const gpu_detect_res_str[egpuNR] = {
-     "compatible", "nonexistent", "incompatible",
-     "incompatible (please recompile with GMX_OPENCL_NB_CLUSTER_SIZE=4)", "insane"
++    "compatible",   "nonexistent",
++    "incompatible", "incompatible (please recompile with GMX_OPENCL_NB_CLUSTER_SIZE=4)",
++    "insane",       "unavailable"
   };
diff --cc src/gromacs/hardware/gpu_hw_info.h

index 471eeb81e2e7160c3b185d39b3909099cc703a2c,b13934a02da54d5619583cdf8c684144256dde22..a8b3144ee5b32f88c52a24a30d4e8eda8f7fb569
--- 1/src/gromacs/hardware/gpu_hw_info.h
--- 2/src/gromacs/hardware/gpu_hw_info.h
+++ b/src/gromacs/hardware/gpu_hw_info.h
@@@ -43,38 -47,26 +43,42 @@@ struct gmx_device_info_t
    *
    * The egpuInsane value means that during the sanity checks an error
    * occurred that indicates malfunctioning of the device, driver, or
-  * incompatible driver/runtime. */
+  * incompatible driver/runtime.
+  * eGpuUnavailable indicates that CUDA devices are busy or unavailable
+  * typically due to use of cudaComputeModeExclusive, cudaComputeModeProhibited modes.
+  */
   typedef enum
   {
- -    egpuCompatible = 0,  egpuNonexistent,  egpuIncompatible, egpuIncompatibleClusterSize, egpuInsane, egpuUnavailable, egpuNR
+ +    egpuCompatible = 0,
+ +    egpuNonexistent,
+ +    egpuIncompatible,
+ +    egpuIncompatibleClusterSize,
+ +    egpuInsane,
++    egpuUnavailable,
+ +    egpuNR
   } e_gpu_detect_res_t;
   
- -/* Names of the GPU detection/check results */
- -extern const char * const gpu_detect_res_str[egpuNR];
+ +/*! \brief Names of the GPU detection/check results
+ + *
+ + * \todo Make a proper class enumeration with helper string */
+ +extern const char* const gpu_detect_res_str[egpuNR];
   
- -/* GPU device information -- includes either CUDA or OpenCL devices.
- - * The gmx_hardware_detect module initializes it. */
+ +/*! \brief Information about GPU devices on this physical node.
+ + *
+ + * Includes either CUDA or OpenCL devices.  The gmx_hardware_detect
+ + * module initializes it.
+ + *
+ + * \todo Use a std::vector */
   struct gmx_gpu_info_t
   {
- -    gmx_bool                  bDetectGPUs;      /* Did we try to detect GPUs? */
- -    int                       n_dev;            /* total number of GPU devices detected */
- -    struct gmx_device_info_t *gpu_dev;          /* GPU devices detected in the system (per node) */
- -    int                       n_dev_compatible; /* number of compatible GPUs */
+ +    //! Did we attempt GPU detection?
+ +    gmx_bool bDetectGPUs;
+ +    //! Total number of GPU devices detected on this physical node
+ +    int n_dev;
+ +    //! Information about each GPU device detected on this physical node
+ +    gmx_device_info_t* gpu_dev;
+ +    //! Number of GPU devices detected on this physical node that are compatible.
+ +    int n_dev_compatible;
   };
   
   #endif
diff --cc src/gromacs/mdrun/rerun.cpp

index 39b76695e825312a821cd871106e05715ef87af8,3a6469d0f5f92777f1edad686bfd0b9ea1789322..956bd1ab1cbdd33e3ca7e8f673e5ebe4586607b9
--- 1/src/gromacs/mdrun/rerun.cpp
--- 2/src/gromacs/mdrun/rerun.cpp
+++ b/src/gromacs/mdrun/rerun.cpp
@@@ -543,9 -548,12 +541,9 @@@ void gmx::LegacySimulator::do_rerun(
               update_mdatoms(mdatoms, state->lambda[efptMASS]);
           }
   
- -        force_flags = (GMX_FORCE_STATECHANGED |
- -                       GMX_FORCE_DYNAMICBOX |
- -                       GMX_FORCE_ALLFORCES |
- -                       GMX_FORCE_VIRIAL |  // TODO: Get rid of this once #2649 and #3400 are solved
- -                       GMX_FORCE_ENERGY |
- -                       (doFreeEnergyPerturbation ? GMX_FORCE_DHDL : 0));
+ +        force_flags = (GMX_FORCE_STATECHANGED | GMX_FORCE_DYNAMICBOX | GMX_FORCE_ALLFORCES
-                        | (GMX_GPU ? GMX_FORCE_VIRIAL : 0) | // TODO: Get rid of this once #2649 is solved
++                       | GMX_FORCE_VIRIAL | // TODO: Get rid of this once #2649 and #3400 are solved
+ +                       GMX_FORCE_ENERGY | (doFreeEnergyPerturbation ? GMX_FORCE_DHDL : 0));
   
           if (shellfc)
           {
author	Paul Bauer <paul.bauer.q@gmail.com>
	Fri, 28 Feb 2020 09:56:27 +0000 (10:56 +0100)
committer	Paul Bauer <paul.bauer.q@gmail.com>
	Fri, 28 Feb 2020 09:56:27 +0000 (10:56 +0100)
		1	2
src/gromacs/ewald/pme.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gmxana/gmx_disre.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/gpu_utils/gpu_utils.cu	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/hardware/gpu_hw_info.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/hardware/gpu_hw_info.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/gromacs/mdrun/rerun.cpp	patch \|	diff1 \|	diff2 \|	blob \| history