Sort all includes in src/gromacs

[alexxy/gromacs.git] / src / gromacs / gmxlib / cuda_tools / cudautils.cu
diff --git a/src/gromacs/gmxlib/cuda_tools/cudautils.cu b/src/gromacs/gmxlib/cuda_tools/cudautils.cu

index 467c3ce0bb3a1143af52daa634d8217e1ec02412..44a18eaec48ca2bf09ef6900aa3b95c44f7a9657 100644 (file)
--- a/src/gromacs/gmxlib/cuda_tools/cudautils.cu
+++ b/src/gromacs/gmxlib/cuda_tools/cudautils.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2012, by the GROMACS development team, led by
+ * Copyright (c) 2012,2014, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -33,26 +33,30 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  
-#include <stdlib.h>
+#include "gmxpre.h"
  
-#include "gmx_fatal.h"
-#include "smalloc.h"
-#include "typedefs.h"
  #include "cudautils.cuh"
  
+#include <stdlib.h>
+
+#include "gromacs/legacyheaders/typedefs.h"
+#include "gromacs/utility/smalloc.h"
+
  /*** Generic CUDA data operation wrappers ***/
  
  /*! Launches synchronous or asynchronous host to device memory copy.
   *
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
-static int cu_copy_D2H_generic(void * h_dest, void * d_src, size_t bytes, 
+static int cu_copy_D2H_generic(void * h_dest, void * d_src, size_t bytes,
                                 bool bAsync = false, cudaStream_t s = 0)
  {
      cudaError_t stat;
-    
+
      if (h_dest == NULL || d_src == NULL || bytes == 0)
+    {
          return -1;
+    }
  
      if (bAsync)
      {
@@ -83,9 +87,11 @@ int cu_copy_D2H_async(void * h_dest, void * d_src, size_t bytes, cudaStream_t s
  }
  
  int cu_copy_D2H_alloc(void ** h_dest, void * d_src, size_t bytes)
-{ 
+{
      if (h_dest == NULL || d_src == NULL || bytes == 0)
+    {
          return -1;
+    }
  
      smalloc(*h_dest, bytes);
  
@@ -96,13 +102,15 @@ int cu_copy_D2H_alloc(void ** h_dest, void * d_src, size_t bytes)
   *
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
-static int cu_copy_H2D_generic(void * d_dest, void * h_src, size_t bytes, 
+static int cu_copy_H2D_generic(void * d_dest, void * h_src, size_t bytes,
                                 bool bAsync = false, cudaStream_t s = 0)
  {
      cudaError_t stat;
  
      if (d_dest == NULL || h_src == NULL || bytes == 0)
+    {
          return -1;
+    }
  
      if (bAsync)
      {
@@ -119,7 +127,7 @@ static int cu_copy_H2D_generic(void * d_dest, void * h_src, size_t bytes,
  }
  
  int cu_copy_H2D(void * d_dest, void * h_src, size_t bytes)
-{   
+{
      return cu_copy_H2D_generic(d_dest, h_src, bytes, false);
  }
  
@@ -127,7 +135,7 @@ int cu_copy_H2D(void * d_dest, void * h_src, size_t bytes)
   *  The copy is launched in stream s or if not specified, in stream 0.
   */
  int cu_copy_H2D_async(void * d_dest, void * h_src, size_t bytes, cudaStream_t s = 0)
-{   
+{
      return cu_copy_H2D_generic(d_dest, h_src, bytes, true, s);
  }
  
@@ -136,7 +144,9 @@ int cu_copy_H2D_alloc(void ** d_dest, void * h_src, size_t bytes)
      cudaError_t stat;
  
      if (d_dest == NULL || h_src == NULL || bytes == 0)
+    {
          return -1;
+    }
  
      stat = cudaMalloc(d_dest, bytes);
      CU_RET_ERR(stat, "cudaMalloc failed in cu_copy_H2D_alloc");
@@ -146,7 +156,7 @@ int cu_copy_H2D_alloc(void ** d_dest, void * h_src, size_t bytes)
  
  float cu_event_elapsed(cudaEvent_t start, cudaEvent_t end)
  {
-    float t = 0.0;
+    float       t = 0.0;
      cudaError_t stat;
  
      stat = cudaEventElapsedTime(&t, start, end);
@@ -165,10 +175,10 @@ int cu_wait_event(cudaEvent_t e)
      return 0;
  }
  
-/*! 
+/*!
   *  If time != NULL it also calculates the time elapsed between start and end and
   *  return this is milliseconds.
- */ 
+ */
  int cu_wait_event_time(cudaEvent_t end, cudaEvent_t start, float *time)
  {
      cudaError_t s;
@@ -211,11 +221,11 @@ void cu_free_buffered(void *d_ptr, int *n, int *nalloc)
  }
  
  /*!
- *  Reallocation of the memory pointed by d_ptr and copying of the data from 
- *  the location pointed by h_src host-side pointer is done. Allocation is 
- *  buffered and therefore freeing is only needed if the previously allocated 
+ *  Reallocation of the memory pointed by d_ptr and copying of the data from
+ *  the location pointed by h_src host-side pointer is done. Allocation is
+ *  buffered and therefore freeing is only needed if the previously allocated
   *  space is not enough.
- *  The H2D copy is launched in stream s and can be done synchronously or 
+ *  The H2D copy is launched in stream s and can be done synchronously or
   *  asynchronously (the default is the latter).
   */
  void cu_realloc_buffered(void **d_dest, void *h_src,
@@ -232,7 +242,7 @@ void cu_realloc_buffered(void **d_dest, void *h_src,
          return;
      }
  
-    /* reallocate only if the data does not fit = allocation size is smaller 
+    /* reallocate only if the data does not fit = allocation size is smaller
         than the current requested size */
      if (req_size > *curr_alloc_size)
      {