Move some verlet headers to mdlib

author Roland Schulz <roland@utk.edu>

Tue, 20 May 2014 04:56:53 +0000 (00:56 -0400)

committer Gerrit Code Review <gerrit@gerrit.gromacs.org>

Mon, 11 Aug 2014 22:22:31 +0000 (00:22 +0200)
author Roland Schulz <roland@utk.edu>
Tue, 20 May 2014 04:56:53 +0000 (00:56 -0400)
committer Gerrit Code Review <gerrit@gerrit.gromacs.org>
Mon, 11 Aug 2014 22:22:31 +0000 (00:22 +0200)
diff --git a/src/gromacs/legacyheaders/CMakeLists.txt b/src/gromacs/legacyheaders/CMakeLists.txt

index 586797fab9d1a017fe3ad7c66728dd80bf3b74da..aea277ee6f368c7da4acdf92c91841df878eb8d0 100644 (file)
--- a/src/gromacs/legacyheaders/CMakeLists.txt
+++ b/src/gromacs/legacyheaders/CMakeLists.txt
@@ -35,7 +35,7 @@
  # includes: Nothing to build, just installation
  file(GLOB ROOT_LEGACY_HEADERS          *.h)
  file(GLOB ROOT_LEGACY_HEADERS_PRIVATE  thread_mpi.h tmpi.h gmx_hash.h 
-     gmx_ga2la.h gpu_utils.h pmalloc_cuda.h nbnxn_cuda_data_mgmt.h)
+     gmx_ga2la.h gpu_utils.h pmalloc_cuda.h)
  file(GLOB TYPES_LEGACY_HEADERS         types/*.h)
  file(GLOB TYPES_LEGACY_HEADERS_PRIVATE types/commrec.h)
  list(REMOVE_ITEM ROOT_LEGACY_HEADERS   ${ROOT_LEGACY_HEADERS_PRIVATE})
diff --git a/src/gromacs/legacyheaders/force.h b/src/gromacs/legacyheaders/force.h

index 087e5d1ecbb0ba6b328ab9a13920212497036c18..1d3fb6bc796d299c60b60a8147d2b5e24f41eac8 100644 (file)
--- a/src/gromacs/legacyheaders/force.h
+++ b/src/gromacs/legacyheaders/force.h
@@ -149,9 +149,9 @@ gmx_bool nbnxn_acceleration_supported(FILE             *fplog,
   * message to fplog/stderr.
   */
  
-gmx_bool uses_simple_tables(int                 cutoff_scheme,
-                            nonbonded_verlet_t *nbv,
-                            int                 group);
+gmx_bool uses_simple_tables(int                        cutoff_scheme,
+                            struct nonbonded_verlet_t *nbv,
+                            int                        group);
  /* Returns whether simple tables (i.e. not for use with GPUs) are used
   * with the type of kernel indicated.
   */
@@ -285,6 +285,9 @@ extern void do_force_lowlevel(FILE         *fplog,
                                float        *cycles_pme);
  /* Call all the force routines */
  
+void free_gpu_resources(const t_forcerec *fr,
+                        const t_commrec  *cr);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/gromacs/legacyheaders/sim_util.h b/src/gromacs/legacyheaders/sim_util.h

index e21a7d278b5aaa09889e8e6342b7f154f8f24b9a..5a404cc40076c932d9c966dc679ba5ad72ff9c65 100644 (file)
--- a/src/gromacs/legacyheaders/sim_util.h
+++ b/src/gromacs/legacyheaders/sim_util.h
@@ -109,7 +109,7 @@ void finish_run(FILE *log, t_commrec *cr,
                  t_inputrec *inputrec,
                  t_nrnb nrnb[], gmx_wallcycle_t wcycle,
                  gmx_walltime_accounting_t walltime_accounting,
-                wallclock_gpu_t *gputimes,
+                struct nonbonded_verlet_t *nbv,
                  gmx_bool bWriteStat);
  
  void calc_enervirdiff(FILE *fplog, int eDispCorr, t_forcerec *fr);
@@ -139,6 +139,8 @@ void init_md(FILE *fplog,
               gmx_bool *bSimAnn, t_vcm **vcm, unsigned long Flags);
  /* Routine in sim_util.c */
  
+gmx_bool use_GPU(const struct nonbonded_verlet_t *nbv);
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/src/gromacs/legacyheaders/typedefs.h b/src/gromacs/legacyheaders/typedefs.h

index 242435d4e94f7059007924d6bcf903c12b819e0f..72661f7e9547943646a14e4f7adac30515845a4f 100644 (file)
--- a/src/gromacs/legacyheaders/typedefs.h
+++ b/src/gromacs/legacyheaders/typedefs.h
@@ -52,7 +52,6 @@
  #include "types/inputrec.h"
  #include "types/nrnb.h"
  #include "types/nblist.h"
-#include "types/nbnxn_pairlist.h"
  #include "types/nsgrid.h"
  #include "types/forcerec.h"
  #include "types/fcdata.h"
diff --git a/src/gromacs/legacyheaders/types/forcerec.h b/src/gromacs/legacyheaders/types/forcerec.h

index 9be952063348af49d37c1ab975e771a9b5f42fb6..16b0e87841c8fda544614307d848af219cc1fb77 100644 (file)
--- a/src/gromacs/legacyheaders/types/forcerec.h
+++ b/src/gromacs/legacyheaders/types/forcerec.h
@@ -39,7 +39,6 @@
  #include "genborn.h"
  #include "qmmmrec.h"
  #include "../../topology/idef.h"
-#include "nb_verlet.h"
  #include "interaction_const.h"
  #include "hw_info.h"
  
@@ -52,8 +51,7 @@ extern "C" {
  
  /* Abstract type for PME that is defined only in the routine that use them. */
  typedef struct gmx_pme *gmx_pme_t;
-
-
+struct nonbonded_verlet_t;
  
  /* Structure describing the data in a single table */
  typedef struct
@@ -315,13 +313,13 @@ typedef struct {
      rvec        *shift_vec;
  
      /* The neighborlists including tables */
-    int                 nnblists;
-    int                *gid2nblists;
-    t_nblists          *nblists;
+    int                        nnblists;
+    int                       *gid2nblists;
+    t_nblists                 *nblists;
  
-    int                 cutoff_scheme; /* group- or Verlet-style cutoff */
-    gmx_bool            bNonbonded;    /* true if nonbonded calculations are *not* turned off */
-    nonbonded_verlet_t *nbv;
+    int                        cutoff_scheme; /* group- or Verlet-style cutoff */
+    gmx_bool                   bNonbonded;    /* true if nonbonded calculations are *not* turned off */
+    struct nonbonded_verlet_t *nbv;
  
      /* The wall tables (if used) */
      int            nwall;
diff --git a/src/gromacs/mdlib/domdec.c b/src/gromacs/mdlib/domdec.c

index a70ab1c522d098a3c69872512ccf6af986e7e9c3..b94aa743fc697c28a79b2e435c8c39ca4d434123 100644 (file)
--- a/src/gromacs/mdlib/domdec.c
+++ b/src/gromacs/mdlib/domdec.c
@@ -69,6 +69,7 @@
  #include "gromacs/fileio/gmxfio.h"
  #include "gromacs/fileio/pdbio.h"
  #include "gromacs/imd/imd.h"
+#include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/pulling/pull.h"
diff --git a/src/gromacs/mdlib/forcerec.c b/src/gromacs/mdlib/forcerec.c

index f2620a9de7cbad5c1b510c29ace00f13e9298b78..031d517886fb4e13738ce3747305113fab85968b 100644 (file)
--- a/src/gromacs/mdlib/forcerec.c
+++ b/src/gromacs/mdlib/forcerec.c
@@ -76,8 +76,9 @@
  
  #include "types/nbnxn_cuda_types_ext.h"
  #include "gpu_utils.h"
-#include "nbnxn_cuda_data_mgmt.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
  #include "pmalloc_cuda.h"
+#include "nb_verlet.h"
  
  t_forcerec *mk_forcerec(void)
  {
@@ -3321,3 +3322,44 @@ void forcerec_set_excl_load(t_forcerec           *fr,
          fr->excl_load[t] = i;
      }
  }
+
+/* Frees GPU memory and destroys the CUDA context.
+ *
+ * Note that this function needs to be called even if GPUs are not used
+ * in this run because the PME ranks have no knowledge of whether GPUs
+ * are used or not, but all ranks need to enter the barrier below.
+ */
+void free_gpu_resources(const t_forcerec *fr,
+                        const t_commrec  *cr)
+{
+    gmx_bool bIsPPrankUsingGPU;
+    char     gpu_err_str[STRLEN];
+
+    bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr && fr->nbv && fr->nbv->bUseGPU;
+
+    if (bIsPPrankUsingGPU)
+    {
+        /* free nbnxn data in GPU memory */
+        nbnxn_cuda_free(fr->nbv->cu_nbv);
+
+        /* With tMPI we need to wait for all ranks to finish deallocation before
+         * destroying the context in free_gpu() as some ranks may be sharing
+         * GPU and context.
+         * Note: as only PP ranks need to free GPU resources, so it is safe to
+         * not call the barrier on PME ranks.
+         */
+#ifdef GMX_THREAD_MPI
+        if (PAR(cr))
+        {
+            gmx_barrier(cr);
+        }
+#endif  /* GMX_THREAD_MPI */
+
+        /* uninitialize GPU (by destroying the context) */
+        if (!free_gpu(gpu_err_str))
+        {
+            gmx_warning("On rank %d failed to free GPU #%d: %s",
+                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
+        }
+    }
+}
diff --git a/src/gromacs/legacyheaders/types/nb_verlet.h b/src/gromacs/mdlib/nb_verlet.h

similarity index 97%

rename from src/gromacs/legacyheaders/types/nb_verlet.h

rename to src/gromacs/mdlib/nb_verlet.h

index e1e8ab0ca82bcc8b85469e79686039191c4b6262..4f51797e29ab06595e24c47c1a5baa92b472df2f 100644 (file)
--- a/src/gromacs/legacyheaders/types/nb_verlet.h
+++ b/src/gromacs/mdlib/nb_verlet.h
@@ -37,7 +37,7 @@
  #define NB_VERLET_H
  
  #include "nbnxn_pairlist.h"
-#include "nbnxn_cuda_types_ext.h"
+#include "types/nbnxn_cuda_types_ext.h"
  
  #ifdef __cplusplus
  extern "C" {
@@ -88,7 +88,7 @@ enum {
      enbvClearFNo, enbvClearFYes
  };
  
-typedef struct {
+typedef struct nonbonded_verlet_group_t {
      nbnxn_pairlist_set_t  nbl_lists;   /* pair list(s)                       */
      nbnxn_atomdata_t     *nbat;        /* atom data                          */
      int                   kernel_type; /* non-bonded kernel - see enum above */
@@ -96,7 +96,7 @@ typedef struct {
  } nonbonded_verlet_group_t;
  
  /* non-bonded data structure with Verlet-type cut-off */
-typedef struct {
+typedef struct nonbonded_verlet_t {
      nbnxn_search_t           nbs;             /* n vs n atom pair searching data       */
      int                      ngrp;            /* number of interaction groups          */
      nonbonded_verlet_group_t grp[2];          /* local and non-local interaction group */
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.c b/src/gromacs/mdlib/nbnxn_atomdata.c

index 1babe769f786877e1c0b1f27e127cce92bc5cabe..5e4dfef0c02504882933c4f8886ea2b6a188cb21 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_atomdata.c
+++ b/src/gromacs/mdlib/nbnxn_atomdata.c
@@ -49,6 +49,7 @@
  #include "gmx_omp_nthreads.h"
  #include "thread_mpi/atomic.h"
  
+#include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/utility/gmxomp.h"
  #include "gromacs/utility/smalloc.h"
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.h b/src/gromacs/mdlib/nbnxn_atomdata.h

index 5855e5b50d1e5845a514553b85ffb0866da850d3..efe86a5b3a3d3a0ef0adb28114fa95d4a5cf8e55 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_atomdata.h
+++ b/src/gromacs/mdlib/nbnxn_atomdata.h
@@ -37,6 +37,7 @@
  #define _nbnxn_atomdata_h
  
  #include "typedefs.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/src/gromacs/mdlib/nbnxn_consts.h b/src/gromacs/mdlib/nbnxn_consts.h

index f5bd3d01ab096ef770fb074dcf9e6af423111ac9..719e47b8f4f6c99d94b70fc2f74cbe96d9e57466 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_consts.h
+++ b/src/gromacs/mdlib/nbnxn_consts.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013,2014 by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -63,7 +63,7 @@ extern "C" {
  #define NBNXN_GPU_CLUSTER_SIZE         8
  
  /* With GPU kernels we group cluster pairs in 4 to optimize memory usage.
- * To change this, also change nbnxn_cj4_t in include/types/nbnxn_pairlist.h.
+ * To change this, also change nbnxn_cj4_t in gromacs/mdlib/nbnxn_pairlist.h.
   */
  #define NBNXN_GPU_JGROUP_SIZE       4
  #define NBNXN_GPU_JGROUP_SIZE_2LOG  2
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu

index 40d86e1b08a215db30aac7f67a111d5d2e441e5d..fa2eb36b1c6d5a2ba72f20c6922195895a3433b5 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu
@@ -44,8 +44,8 @@
  #include <cuda.h>
  
  #include "types/simple.h"
-#include "types/nbnxn_pairlist.h"
-#include "types/nb_verlet.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
+#include "gromacs/mdlib/nb_verlet.h"
  #include "types/force_flags.h"
  #include "../nbnxn_consts.h"
  
@@ -56,7 +56,7 @@
  #include "nbnxn_cuda_types.h"
  #include "../../gmxlib/cuda_tools/cudautils.cuh"
  #include "nbnxn_cuda.h"
-#include "nbnxn_cuda_data_mgmt.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
  
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/utility/cstringutil.h"
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu

index 37679494faa52ce66d6024b401a100f77dc3e5e8..57fd906c10a08e4c13533873805512e024588769 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu
@@ -44,7 +44,7 @@
  #include "tables.h"
  #include "typedefs.h"
  #include "types/enums.h"
-#include "types/nb_verlet.h"
+#include "gromacs/mdlib/nb_verlet.h"
  #include "types/interaction_const.h"
  #include "types/force_flags.h"
  #include "../nbnxn_consts.h"
@@ -52,7 +52,7 @@
  
  #include "nbnxn_cuda_types.h"
  #include "../../gmxlib/cuda_tools/cudautils.cuh"
-#include "nbnxn_cuda_data_mgmt.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
  #include "pmalloc_cuda.h"
  #include "gpu_utils.h"
  
@@ -426,10 +426,15 @@ static void init_nbparam(cu_nbparam_t              *nbp,
  
  /*! Re-generate the GPU Ewald force table, resets rlist, and update the
   *  electrostatic type switching to twin cut-off (or back) if needed. */
-void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t           cu_nb,
-                                         const interaction_const_t *ic)
+void nbnxn_cuda_pme_loadbal_update_param(const nonbonded_verlet_t    *nbv,
+                                         const interaction_const_t   *ic)
  {
-    cu_nbparam_t *nbp = cu_nb->nbparam;
+    if (!nbv || nbv->grp[0].kernel_type != nbnxnk8x8x8_CUDA)
+    {
+        return;
+    }
+    nbnxn_cuda_ptr_t cu_nb = nbv->cu_nbv;
+    cu_nbparam_t    *nbp   = cu_nb->nbparam;
  
      set_cutoff_parameters(nbp, ic);
  
@@ -1077,11 +1082,11 @@ wallclock_gpu_t * nbnxn_cuda_get_timings(nbnxn_cuda_ptr_t cu_nb)
      return (cu_nb != NULL && cu_nb->bDoTime) ? cu_nb->timings : NULL;
  }
  
-void nbnxn_cuda_reset_timings(nbnxn_cuda_ptr_t cu_nb)
+void nbnxn_cuda_reset_timings(nonbonded_verlet_t* nbv)
  {
-    if (cu_nb->bDoTime)
+    if (nbv->cu_nbv && nbv->cu_nbv->bDoTime)
      {
-        init_timings(cu_nb->timings);
+        init_timings(nbv->cu_nbv->timings);
      }
  }
  
diff --git a/src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h

similarity index 84%

rename from src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h

rename to src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h

index 919e1355b09e20534dbadda4835cff8921ca1ca8..f77a91583bb5bc3e1effcd505c4d5bcab64772a0 100644 (file)
--- a/src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h
@@ -40,7 +40,6 @@
  #include "types/interaction_const.h"
  #include "types/nbnxn_cuda_types_ext.h"
  #include "types/hw_info.h"
-#include "types/nb_verlet.h"
  
  #ifdef GMX_GPU
  #define FUNC_TERM ;
@@ -54,6 +53,10 @@
  extern "C" {
  #endif
  
+struct nonbonded_verlet_group_t;
+struct nbnxn_pairlist_t;
+struct nbnxn_atomdata_t;
+
  /** Initializes the data structures related to CUDA nonbonded calculations. */
  FUNC_QUALIFIER
  void nbnxn_cuda_init(FILE gmx_unused                 *fplog,
@@ -66,30 +69,30 @@ void nbnxn_cuda_init(FILE gmx_unused                 *fplog,
  
  /** Initializes simulation constant data. */
  FUNC_QUALIFIER
-void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t               gmx_unused  cu_nb,
-                           const interaction_const_t      gmx_unused *ic,
-                           const nonbonded_verlet_group_t gmx_unused *nbv_group) FUNC_TERM
+void nbnxn_cuda_init_const(nbnxn_cuda_ptr_t               gmx_unused         cu_nb,
+                           const interaction_const_t      gmx_unused        *ic,
+                           const struct nonbonded_verlet_group_t gmx_unused *nbv_group) FUNC_TERM
  
  /** Initializes pair-list data for GPU, called at every pair search step. */
  FUNC_QUALIFIER
-void nbnxn_cuda_init_pairlist(nbnxn_cuda_ptr_t       gmx_unused  cu_nb,
-                              const nbnxn_pairlist_t gmx_unused *h_nblist,
-                              int                    gmx_unused  iloc) FUNC_TERM
+void nbnxn_cuda_init_pairlist(nbnxn_cuda_ptr_t       gmx_unused         cu_nb,
+                              const struct nbnxn_pairlist_t gmx_unused *h_nblist,
+                              int                    gmx_unused         iloc) FUNC_TERM
  
  /** Initializes atom-data on the GPU, called at every pair search step. */
  FUNC_QUALIFIER
-void nbnxn_cuda_init_atomdata(nbnxn_cuda_ptr_t       gmx_unused  cu_nb,
-                              const nbnxn_atomdata_t gmx_unused *atomdata) FUNC_TERM
+void nbnxn_cuda_init_atomdata(const nbnxn_cuda_ptr_t       gmx_unused   cu_nb,
+                              const struct nbnxn_atomdata_t gmx_unused *atomdata) FUNC_TERM
  
  /*! \brief Update parameters during PP-PME load balancing. */
  FUNC_QUALIFIER
-void nbnxn_cuda_pme_loadbal_update_param(nbnxn_cuda_ptr_t          gmx_unused  cu_nb,
-                                         const interaction_const_t gmx_unused *ic) FUNC_TERM
+void nbnxn_cuda_pme_loadbal_update_param(const struct nonbonded_verlet_t gmx_unused *nbv,
+                                         const interaction_const_t gmx_unused       *ic) FUNC_TERM
  
  /** Uploads shift vector to the GPU if the box is dynamic (otherwise just returns). */
  FUNC_QUALIFIER
-void nbnxn_cuda_upload_shiftvec(nbnxn_cuda_ptr_t       gmx_unused  cu_nb,
-                                const nbnxn_atomdata_t gmx_unused *nbatom) FUNC_TERM
+void nbnxn_cuda_upload_shiftvec(nbnxn_cuda_ptr_t       gmx_unused         cu_nb,
+                                const struct nbnxn_atomdata_t gmx_unused *nbatom) FUNC_TERM
  
  /** Clears GPU outputs: nonbonded force, shift force and energy. */
  FUNC_QUALIFIER
@@ -113,7 +116,7 @@ wallclock_gpu_t * nbnxn_cuda_get_timings(nbnxn_cuda_ptr_t gmx_unused cu_nb)
  
  /** Resets nonbonded GPU timings. */
  FUNC_QUALIFIER
-void nbnxn_cuda_reset_timings(nbnxn_cuda_ptr_t gmx_unused cu_nb) FUNC_TERM
+void nbnxn_cuda_reset_timings(struct nonbonded_verlet_t gmx_unused *nbv) FUNC_TERM
  
  /** Calculates the minimum size of proximity lists to improve SM load balance
   *  with CUDA non-bonded kernels. */
diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h

index 0fa40d2466800d9fc85f4185c0a920e4e6d02a0b..74df69eb34f6ad940b4cd05129c32ec66c9fb4ce 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h
+++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h
@@ -47,7 +47,7 @@
  #define NBNXN_CUDA_TYPES_H
  
  #include "types/interaction_const.h"
-#include "types/nbnxn_pairlist.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
  #include "types/nbnxn_cuda_types_ext.h"
  #include "../../gmxlib/cuda_tools/cudautils.cuh"
  
diff --git a/src/gromacs/mdlib/nbnxn_internal.h b/src/gromacs/mdlib/nbnxn_internal.h

index fc42e6093566bca679cda924585feaff2fece613..352253ec9d0c4b5811235fd356700aa6eefd65cb 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_internal.h
+++ b/src/gromacs/mdlib/nbnxn_internal.h
@@ -40,6 +40,7 @@
  #include "nbnxn_simd.h"
  #include "domdec.h"
  #include "gromacs/timing/cyclecounter.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
  
  
  /* Bounding box calculations are (currently) always in single precision, so
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h

index 7855b310fe10fb3f50cd51f4a9831f8362862dc4..89b365f55b3b3b6abcdb688147eee61aa3a70562 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -37,6 +37,7 @@
  #define _nbnxn_kernel_common_h
  
  #include "typedefs.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c

index 898d300da1c5e136bca40df0243d8398069df24e..a606329e56f464e619767c49ff621a257cd4d1a0 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c
@@ -47,6 +47,7 @@
  #include "nbnxn_kernel_gpu_ref.h"
  #include "../nbnxn_consts.h"
  #include "nbnxn_kernel_common.h"
+#include "gromacs/mdlib/nb_verlet.h"
  
  #define NCL_PER_SUPERCL         (NBNXN_GPU_NCLUSTER_PER_SUPERCLUSTER)
  #define CL_SIZE                 (NBNXN_GPU_CLUSTER_SIZE)
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h

index 18f4e9d01f3693f07bb17b62a33bcf38d4aeccbd..2fda7440a6591c4f530f4142408781e2f096b456 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -37,6 +37,7 @@
  #define _nbnxn_kernel_gpu_ref_h
  
  #include "typedefs.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c

index 4638d1b6a48ba6d893a6a69bc76c04ec6d0a4001..ca00dc79ac75d4bec707c41681b8fdde792434c6 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c
@@ -48,6 +48,7 @@
  #include "nbnxn_kernel_ref.h"
  #include "../nbnxn_consts.h"
  #include "nbnxn_kernel_common.h"
+#include "gromacs/mdlib/nb_verlet.h"
  
  /*! \brief Typedefs for declaring lookup tables of kernel functions.
   */
diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h

index bfcfee5b77419f694331b8815f0af526484be897..16e864d9a3548eb047cdb13dd9825ecd2aec7a7d 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012,2013, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -37,6 +37,7 @@
  #define _nbnxn_kernel_ref_h
  
  #include "typedefs.h"
+#include "gromacs/mdlib/nbnxn_pairlist.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c

index a6f53dffa6be1df64763d6554ed3c89e51270b70..135f9605c4e8b1ee2350e16f21dd2be26eb23834 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c
@@ -41,6 +41,7 @@
  
  #include "typedefs.h"
  
+#include "gromacs/mdlib/nbnxn_pairlist.h"
  #include "gromacs/mdlib/nbnxn_simd.h"
  
  #ifdef GMX_NBNXN_SIMD_2XNN
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h

index c7ec9bcbd7db63438412c1d7a5581ef39c34fe63..6baaa8aadb90f5b2ddc2a77dd6e0d1482143ae7d 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h
@@ -39,6 +39,7 @@
  
  #include "typedefs.h"
  
+#include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/mdlib/nbnxn_simd.h"
  
  #ifdef __cplusplus
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c

index de84c80532155000e04158b15ebef129e7c11a29..8e1db2d10aa18dbc6680dfdb6e171523911ff981 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c
@@ -41,6 +41,7 @@
  
  #include "typedefs.h"
  
+#include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/mdlib/nbnxn_simd.h"
  
  #ifdef GMX_NBNXN_SIMD_4XN
diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h

index e6e475765a84e69ec2279edf674ac31d59f64281..7fcc431a3a78c7613a92c391add07c08c54f34a8 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h
+++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h
@@ -39,6 +39,7 @@
  
  #include "typedefs.h"
  
+#include "gromacs/mdlib/nbnxn_pairlist.h"
  #include "gromacs/mdlib/nbnxn_simd.h"
  
  #ifdef __cplusplus
diff --git a/src/gromacs/legacyheaders/types/nbnxn_pairlist.h b/src/gromacs/mdlib/nbnxn_pairlist.h

similarity index 98%

rename from src/gromacs/legacyheaders/types/nbnxn_pairlist.h

rename to src/gromacs/mdlib/nbnxn_pairlist.h

index dec56d38f17f0c5dfe6f5bbe416f2dfc4d61a493..f03009d9a83e28d827b29ace352812b060f83343 100644 (file)
--- a/src/gromacs/legacyheaders/types/nbnxn_pairlist.h
+++ b/src/gromacs/mdlib/nbnxn_pairlist.h
@@ -36,7 +36,8 @@
  #ifndef _nbnxn_pairlist_h
  #define _nbnxn_pairlist_h
  
-#include "nblist.h"
+#include "thread_mpi/atomic.h"
+#include "types/nblist.h"
  
  #ifdef __cplusplus
  extern "C" {
@@ -124,7 +125,7 @@ typedef struct {
                              */
  } nbnxn_excl_t;
  
-typedef struct {
+typedef struct nbnxn_pairlist_t {
      gmx_cache_protect_t cp0;
  
      nbnxn_alloc_t      *alloc;
@@ -222,10 +223,7 @@ enum {
      ljcrGEOM, ljcrLB, ljcrNONE, ljcrNR
  };
  
-/* TODO: Remove need for forward declare */
-struct tMPI_Atomic;
-
-typedef struct {
+typedef struct nbnxn_atomdata_t {
      nbnxn_alloc_t           *alloc;
      nbnxn_free_t            *free;
      int                      ntype;           /* The number of different atom types                 */
@@ -269,7 +267,7 @@ typedef struct {
      gmx_bool                 bUseBufferFlags;        /* Use the flags or operate on all atoms     */
      nbnxn_buffer_flags_t     buffer_flags;           /* Flags for buffer zeroing+reduc.  */
      gmx_bool                 bUseTreeReduce;         /* Use tree for force reduction */
-    struct tMPI_Atomic      *syncStep;               /* Synchronization step for tree reduce */
+    tMPI_Atomic_t           *syncStep;               /* Synchronization step for tree reduce */
  } nbnxn_atomdata_t;
  
  #ifdef __cplusplus
diff --git a/src/gromacs/mdlib/nbnxn_search.c b/src/gromacs/mdlib/nbnxn_search.c

index 0826017bab62cad0285c13546270b01a9f8b85a8..620dc0dd745eb84f86b86bb151c4c6e491927357 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_search.c
+++ b/src/gromacs/mdlib/nbnxn_search.c
@@ -56,6 +56,7 @@
  #include "ns.h"
  
  #include "gromacs/pbcutil/ishift.h"
+#include "gromacs/mdlib/nb_verlet.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/utility/smalloc.h"
  
diff --git a/src/gromacs/mdlib/nbnxn_search.h b/src/gromacs/mdlib/nbnxn_search.h

index 6b3ab7c8d2c8744dad189d8c331cfbc02b14c79a..500c7188ac60c3ab0431fa0d12be8dca83a28f76 100644 (file)
--- a/src/gromacs/mdlib/nbnxn_search.h
+++ b/src/gromacs/mdlib/nbnxn_search.h
@@ -37,6 +37,7 @@
  #define _nbnxn_search_h
  
  #include "typedefs.h"
+#include "nbnxn_pairlist.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/src/gromacs/mdlib/sim_util.c b/src/gromacs/mdlib/sim_util.c

index 06bc1a858b1c5ea383be87dd75a2001b550529f9..ada853a849d42955e8d14fda6891f4b11c365162 100644 (file)
--- a/src/gromacs/mdlib/sim_util.c
+++ b/src/gromacs/mdlib/sim_util.c
@@ -79,6 +79,7 @@
  #include "../gmxlib/nonbonded/nb_free_energy.h"
  
  #include "gromacs/legacyheaders/types/commrec.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
  #include "gromacs/pbcutil/ishift.h"
  #include "gromacs/pbcutil/mshift.h"
  #include "gromacs/timing/wallcycle.h"
@@ -94,9 +95,10 @@
  
  #include "gmx_omp_nthreads.h"
  
-#include "nbnxn_cuda_data_mgmt.h"
  #include "nbnxn_cuda/nbnxn_cuda.h"
  
+#include "nb_verlet.h"
+
  void print_time(FILE                     *out,
                  gmx_walltime_accounting_t walltime_accounting,
                  gmx_int64_t               step,
@@ -797,6 +799,11 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists,
      wallcycle_sub_stop(wcycle, ewcsNONBONDED);
  }
  
+gmx_bool use_GPU(const nonbonded_verlet_t *nbv)
+{
+    return nbv != NULL && nbv->bUseGPU;
+}
+
  void do_force_cutsVERLET(FILE *fplog, t_commrec *cr,
                           t_inputrec *inputrec,
                           gmx_int64_t step, t_nrnb *nrnb, gmx_wallcycle_t wcycle,
@@ -2645,7 +2652,7 @@ void finish_run(FILE *fplog, t_commrec *cr,
                  t_inputrec *inputrec,
                  t_nrnb nrnb[], gmx_wallcycle_t wcycle,
                  gmx_walltime_accounting_t walltime_accounting,
-                wallclock_gpu_t *gputimes,
+                nonbonded_verlet_t *nbv,
                  gmx_bool bWriteStat)
  {
      int     i, j;
@@ -2709,6 +2716,8 @@ void finish_run(FILE *fplog, t_commrec *cr,
  
      if (SIMMASTER(cr))
      {
+        wallclock_gpu_t* gputimes = use_GPU(nbv) ?
+            nbnxn_cuda_get_timings(nbv->cu_nbv) : NULL;
          wallcycle_print(fplog, cr->nnodes, cr->npmenodes,
                          elapsed_time_over_all_ranks,
                          wcycle, gputimes);
diff --git a/src/programs/mdrun/md.cpp b/src/programs/mdrun/md.cpp

index 934443bcf3bfba26b9b07125ec3400be18173bd4..011b84a3d81ee5318b8083773115b72d1acf9c8e 100644 (file)
--- a/src/programs/mdrun/md.cpp
+++ b/src/programs/mdrun/md.cpp
@@ -76,7 +76,7 @@
  #include "membed.h"
  #include "types/nlistheuristics.h"
  #include "types/iteratedconstraints.h"
-#include "nbnxn_cuda_data_mgmt.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
  
  #include "gromacs/fileio/confio.h"
  #include "gromacs/fileio/mdoutf.h"
@@ -103,7 +103,7 @@ static void reset_all_counters(FILE *fplog, t_commrec *cr,
                                 gmx_int64_t *step_rel, t_inputrec *ir,
                                 gmx_wallcycle_t wcycle, t_nrnb *nrnb,
                                 gmx_walltime_accounting_t walltime_accounting,
-                               nbnxn_cuda_ptr_t cu_nbv)
+                               struct nonbonded_verlet_t *nbv)
  {
      char sbuf[STEPSTRSIZE];
  
@@ -111,10 +111,7 @@ static void reset_all_counters(FILE *fplog, t_commrec *cr,
      md_print_warn(cr, fplog, "step %s: resetting all time and cycle counters\n",
                    gmx_step_str(step, sbuf));
  
-    if (cu_nbv)
-    {
-        nbnxn_cuda_reset_timings(cu_nbv);
-    }
+    nbnxn_cuda_reset_timings(nbv);
  
      wallcycle_stop(wcycle, ewcRUN);
      wallcycle_reset_all(wcycle);
@@ -477,7 +474,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
       */
      if ((Flags & MD_TUNEPME) &&
          EEL_PME(fr->eeltype) &&
-        ( (fr->cutoff_scheme == ecutsVERLET && fr->nbv->bUseGPU) || !(cr->duty & DUTY_PME)) &&
+        ( use_GPU(fr->nbv) || !(cr->duty & DUTY_PME)) &&
          !bRerunMD)
      {
          pme_loadbal_init(&pme_loadbal, ir, state->box, fr->ic, fr->pmedata);
@@ -1919,7 +1916,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
          {
              /* Reset all the counters related to performance over the run */
              reset_all_counters(fplog, cr, step, &step_rel, ir, wcycle, nrnb, walltime_accounting,
-                               fr->nbv != NULL && fr->nbv->bUseGPU ? fr->nbv->cu_nbv : NULL);
+                               use_GPU(fr->nbv) ? fr->nbv : NULL);
              wcycle_set_reset_counters(wcycle, -1);
              if (!(cr->duty & DUTY_PME))
              {
@@ -1974,7 +1971,7 @@ double do_md(FILE *fplog, t_commrec *cr, int nfile, const t_filenm fnm[],
      if (pme_loadbal != NULL)
      {
          pme_loadbal_done(pme_loadbal, cr, fplog,
-                         fr->nbv != NULL && fr->nbv->bUseGPU);
+                         use_GPU(fr->nbv));
      }
  
      if (shellfc && fplog)
diff --git a/src/programs/mdrun/pme_loadbal.c b/src/programs/mdrun/pme_loadbal.c

index 2996bc621098a0fd59571c60ad2c2e73936b8fd4..75c6c789179fb2ceb8ec9132ab16ec6297c40ce2 100644 (file)
--- a/src/programs/mdrun/pme_loadbal.c
+++ b/src/programs/mdrun/pme_loadbal.c
@@ -39,13 +39,14 @@
  #include "calcgrid.h"
  #include "pme.h"
  #include "domdec.h"
-#include "nbnxn_cuda_data_mgmt.h"
+#include "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h"
  #include "force.h"
  #include "macros.h"
  #include "md_logging.h"
  #include "pme_loadbal.h"
  
  #include "gromacs/math/vec.h"
+#include "gromacs/legacyheaders/sim_util.h"
  #include "gromacs/pbcutil/pbc.h"
  #include "gromacs/utility/cstringutil.h"
  #include "gromacs/utility/smalloc.h"
@@ -428,17 +429,17 @@ static void switch_to_stage1(pme_load_balancing_t pme_lb)
      pme_lb->cur = pme_lb->start - 1;
  }
  
-gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
-                          t_commrec           *cr,
-                          FILE                *fp_err,
-                          FILE                *fp_log,
-                          t_inputrec          *ir,
-                          t_state             *state,
-                          double               cycles,
-                          interaction_const_t *ic,
-                          nonbonded_verlet_t  *nbv,
-                          gmx_pme_t           *pmedata,
-                          gmx_int64_t          step)
+gmx_bool pme_load_balance(pme_load_balancing_t        pme_lb,
+                          t_commrec                  *cr,
+                          FILE                       *fp_err,
+                          FILE                       *fp_log,
+                          t_inputrec                 *ir,
+                          t_state                    *state,
+                          double                      cycles,
+                          interaction_const_t        *ic,
+                          struct nonbonded_verlet_t  *nbv,
+                          gmx_pme_t                  *pmedata,
+                          gmx_int64_t                 step)
  {
      gmx_bool     OK;
      pme_setup_t *set;
@@ -690,30 +691,26 @@ gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
      }
  
      bUsesSimpleTables = uses_simple_tables(ir->cutoff_scheme, nbv, 0);
-    if (pme_lb->cutoff_scheme == ecutsVERLET &&
-        nbv->grp[0].kernel_type == nbnxnk8x8x8_CUDA)
-    {
-        nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv, ic);
-
-        /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
-         * also sharing texture references. To keep the code simple, we don't
-         * treat texture references as shared resources, but this means that
-         * the coulomb_tab texture ref will get updated by multiple threads.
-         * Hence, to ensure that the non-bonded kernels don't start before all
-         * texture binding operations are finished, we need to wait for all ranks
-         * to arrive here before continuing.
-         *
-         * Note that we could omit this barrier if GPUs are not shared (or
-         * texture objects are used), but as this is initialization code, there
-         * is not point in complicating things.
-         */
+    nbnxn_cuda_pme_loadbal_update_param(nbv, ic);
+
+    /* With tMPI + GPUs some ranks may be sharing GPU(s) and therefore
+     * also sharing texture references. To keep the code simple, we don't
+     * treat texture references as shared resources, but this means that
+     * the coulomb_tab texture ref will get updated by multiple threads.
+     * Hence, to ensure that the non-bonded kernels don't start before all
+     * texture binding operations are finished, we need to wait for all ranks
+     * to arrive here before continuing.
+     *
+     * Note that we could omit this barrier if GPUs are not shared (or
+     * texture objects are used), but as this is initialization code, there
+     * is not point in complicating things.
+     */
  #ifdef GMX_THREAD_MPI
-        if (PAR(cr))
-        {
-            gmx_barrier(cr);
-        }
-#endif  /* GMX_THREAD_MPI */
+    if (PAR(cr) && use_GPU(nbv))
+    {
+        gmx_barrier(cr);
      }
+#endif  /* GMX_THREAD_MPI */
  
      /* Usually we won't need the simple tables with GPUs.
       * But we do with hybrid acceleration and with free energy.
diff --git a/src/programs/mdrun/pme_loadbal.h b/src/programs/mdrun/pme_loadbal.h

index eddb4d67eb9c035c01cf5aebbc461140fedf6a64..a96881eac3550caea84c56ca50d5e468eb01b628 100644 (file)
--- a/src/programs/mdrun/pme_loadbal.h
+++ b/src/programs/mdrun/pme_loadbal.h
@@ -60,17 +60,17 @@ void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
   * factors as well as DD load balancing.
   * Returns TRUE the load balancing continues, FALSE is the balancing is done.
   */
-gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
-                          t_commrec           *cr,
-                          FILE                *fp_err,
-                          FILE                *fp_log,
-                          t_inputrec          *ir,
-                          t_state             *state,
-                          double               cycles,
-                          interaction_const_t *ic,
-                          nonbonded_verlet_t  *nbv,
-                          gmx_pme_t           *pmedata,
-                          gmx_int64_t          step);
+gmx_bool pme_load_balance(pme_load_balancing_t        pme_lb,
+                          t_commrec                  *cr,
+                          FILE                       *fp_err,
+                          FILE                       *fp_log,
+                          t_inputrec                 *ir,
+                          t_state                    *state,
+                          double                      cycles,
+                          interaction_const_t        *ic,
+                          struct nonbonded_verlet_t  *nbv,
+                          gmx_pme_t                  *pmedata,
+                          gmx_int64_t                 step);
  
  /* Restart the PME load balancing discarding all timings gathered up till now */
  void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n);
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp

index 9a00d35da440ccd1fa8dbcadb636d1465c5866b6..4281e4e03511e062147b8d80ef10578cb4e711b6 100644 (file)
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -97,7 +97,6 @@
  #endif
  
  #include "gpu_utils.h"
-#include "nbnxn_cuda_data_mgmt.h"
  
  typedef struct {
      gmx_integrator_t *func;
@@ -1037,47 +1036,6 @@ static void override_nsteps_cmdline(FILE            *fplog,
      }
  }
  
-/* Frees GPU memory and destroys the CUDA context.
- *
- * Note that this function needs to be called even if GPUs are not used
- * in this run because the PME ranks have no knowledge of whether GPUs
- * are used or not, but all ranks need to enter the barrier below.
- */
-static void free_gpu_resources(const t_forcerec *fr,
-                               const t_commrec  *cr)
-{
-    gmx_bool bIsPPrankUsingGPU;
-    char     gpu_err_str[STRLEN];
-
-    bIsPPrankUsingGPU = (cr->duty & DUTY_PP) && fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU;
-
-    if (bIsPPrankUsingGPU)
-    {
-        /* free nbnxn data in GPU memory */
-        nbnxn_cuda_free(fr->nbv->cu_nbv);
-
-        /* With tMPI we need to wait for all ranks to finish deallocation before
-         * destroying the context in free_gpu() as some ranks may be sharing
-         * GPU and context.
-         * Note: as only PP ranks need to free GPU resources, so it is safe to
-         * not call the barrier on PME ranks.
-         */
-#ifdef GMX_THREAD_MPI
-        if (PAR(cr))
-        {
-            gmx_barrier(cr);
-        }
-#endif  /* GMX_THREAD_MPI */
-
-        /* uninitialize GPU (by destroying the context) */
-        if (!free_gpu(gpu_err_str))
-        {
-            gmx_warning("On rank %d failed to free GPU #%d: %s",
-                        cr->nodeid, get_current_gpu_device_id(), gpu_err_str);
-        }
-    }
-}
-
  int mdrunner(gmx_hw_opt_t *hw_opt,
               FILE *fplog, t_commrec *cr, int nfile,
               const t_filenm fnm[], const output_env_t oenv, gmx_bool bVerbose,
@@ -1793,8 +1751,7 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
       */
      finish_run(fplog, cr,
                 inputrec, nrnb, wcycle, walltime_accounting,
-               fr != NULL && fr->nbv != NULL && fr->nbv->bUseGPU ?
-               nbnxn_cuda_get_timings(fr->nbv->cu_nbv) : NULL,
+               fr ? fr->nbv : NULL,
                 EI_DYNAMICS(inputrec->eI) && !MULTISIM(cr));
author	Roland Schulz <roland@utk.edu>
	Tue, 20 May 2014 04:56:53 +0000 (00:56 -0400)
committer	Gerrit Code Review <gerrit@gerrit.gromacs.org>
	Mon, 11 Aug 2014 22:22:31 +0000 (00:22 +0200)
src/gromacs/legacyheaders/CMakeLists.txt		patch \| blob \| history
src/gromacs/legacyheaders/force.h		patch \| blob \| history
src/gromacs/legacyheaders/sim_util.h		patch \| blob \| history
src/gromacs/legacyheaders/typedefs.h		patch \| blob \| history
src/gromacs/legacyheaders/types/forcerec.h		patch \| blob \| history
src/gromacs/mdlib/domdec.c		patch \| blob \| history
src/gromacs/mdlib/forcerec.c		patch \| blob \| history
src/gromacs/mdlib/nb_verlet.h	[moved from src/gromacs/legacyheaders/types/nb_verlet.h with 97% similarity]	patch \| blob \| history
src/gromacs/mdlib/nbnxn_atomdata.c		patch \| blob \| history
src/gromacs/mdlib/nbnxn_atomdata.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_consts.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu		patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.h	[moved from src/gromacs/legacyheaders/nbnxn_cuda_data_mgmt.h with 84% similarity]	patch \| blob \| history
src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_internal.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.c		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.c		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.c		patch \| blob \| history
src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.h		patch \| blob \| history
src/gromacs/mdlib/nbnxn_pairlist.h	[moved from src/gromacs/legacyheaders/types/nbnxn_pairlist.h with 98% similarity]	patch \| blob \| history
src/gromacs/mdlib/nbnxn_search.c		patch \| blob \| history
src/gromacs/mdlib/nbnxn_search.h		patch \| blob \| history
src/gromacs/mdlib/sim_util.c		patch \| blob \| history
src/programs/mdrun/md.cpp		patch \| blob \| history
src/programs/mdrun/pme_loadbal.c		patch \| blob \| history
src/programs/mdrun/pme_loadbal.h		patch \| blob \| history
src/programs/mdrun/runner.cpp		patch \| blob \| history