Code beautification with uncrustify

[alexxy/gromacs.git] / src / gromacs / gmxlib / thread_mpi / winthreads.c
diff --git a/src/gromacs/gmxlib/thread_mpi/winthreads.c b/src/gromacs/gmxlib/thread_mpi/winthreads.c

index bedfef5b5d2ceb62c4e1b4219652c6eafa89b58f..c944a434444c23ff01f8e2545b5546ba3eb179b6 100644 (file)
--- a/src/gromacs/gmxlib/thread_mpi/winthreads.c
+++ b/src/gromacs/gmxlib/thread_mpi/winthreads.c
@@ -1,39 +1,39 @@
  /*
-This source code file is part of thread_mpi.  
-Written by Sander Pronk, Erik Lindahl, and possibly others. 
+   This source code file is part of thread_mpi.
+   Written by Sander Pronk, Erik Lindahl, and possibly others.
  
-Copyright (c) 2009, Sander Pronk, Erik Lindahl.
-All rights reserved.
+   Copyright (c) 2009, Sander Pronk, Erik Lindahl.
+   All rights reserved.
  
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-1) Redistributions of source code must retain the above copyright
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+   1) Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.
-2) Redistributions in binary form must reproduce the above copyright
+   2) Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.
-3) Neither the name of the copyright holders nor the
+   3) Neither the name of the copyright holders nor the
     names of its contributors may be used to endorse or promote products
     derived from this software without specific prior written permission.
  
-THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-If you want to redistribute modifications, please consider that
-scientific software is very special. Version control is crucial -
-bugs must be traceable. We will be happy to consider code for
-inclusion in the official distribution, but derived work should not
-be called official thread_mpi. Details are found in the README & COPYING
-files.
-*/
+   THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
+   EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
+   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   If you want to redistribute modifications, please consider that
+   scientific software is very special. Version control is crucial -
+   bugs must be traceable. We will be happy to consider code for
+   inclusion in the official distribution, but derived work should not
+   be called official thread_mpi. Details are found in the README & COPYING
+   files.
+ */
  
  
  
@@ -69,9 +69,9 @@ files.
  
  #include "winthreads.h"
  
-/*! \brief System mutex for all one-time initialization 
+/*! \brief System mutex for all one-time initialization
   *
- *  This static variable is necessary in order to make the header file 
+ *  This static variable is necessary in order to make the header file
   *  independent of the thread library implementation. Anyway, it
   *  will only be locked a handful of times at the start of program execution.
   */
@@ -83,65 +83,65 @@ static CRITICAL_SECTION barrier_init; /* mutex for initializing barriers */
  
  
  /* spinlock for initializing the above mutexes */
-static tMPI_Spinlock_t init_init=TMPI_SPINLOCK_INITIALIZER;
+static tMPI_Spinlock_t init_init = TMPI_SPINLOCK_INITIALIZER;
  
  /* whether tMPI_Thread_create has initialized these mutexes */
-static tMPI_Atomic_t init_inited={ 0 };
+static tMPI_Atomic_t init_inited = { 0 };
  
  /* whether the main thread affinity has been set */
-static tMPI_Spinlock_t main_thread_aff_lock=TMPI_SPINLOCK_INITIALIZER;
-static tMPI_Atomic_t main_thread_aff_set={ 0 };
+static tMPI_Spinlock_t main_thread_aff_lock = TMPI_SPINLOCK_INITIALIZER;
+static tMPI_Atomic_t   main_thread_aff_set  = { 0 };
  
  /* mutex for managing  thread IDs */
  static CRITICAL_SECTION thread_id_list_lock;
-typedef struct 
-{ 
-    DWORD thread_id; /* the thread ID as returned by GetCurrentTreadID() */
-    struct tMPI_Thread* th; /* the associated tMPI thread structure */
+typedef struct
+{
+    DWORD               thread_id; /* the thread ID as returned by GetCurrentTreadID() */
+    struct tMPI_Thread* th;        /* the associated tMPI thread structure */
  } thread_id_list_t;
  /* the size of the thrread id list */
-static int Nalloc_thread_id_list = 0;
+static int               Nalloc_thread_id_list = 0;
  /* the number of elements in the thread id list */
-static int N_thread_id_list = 0;
+static int               N_thread_id_list = 0;
  /* the thread ID list */
  static thread_id_list_t *thread_id_list;
  
  
  
  /* data structure to keep track of thread key destructors. */
-typedef struct 
+typedef struct
  {
-    void (*destructor) (void*);
+    void (*destructor)(void*);
      DWORD key;
  } thread_key_destructors;
  
-static thread_key_destructors *destructors=NULL;
+static thread_key_destructors *destructors = NULL;
  
  
  
  /*
      NUMA and Processor Group awareness support.
  
-    NUMA support is implemented to maximize the chance that memory access 
+    NUMA support is implemented to maximize the chance that memory access
      patterns remain Local to the NUMA node.
-    NUMA node processor affinity is utilized to prevent scheduler associated 
+    NUMA node processor affinity is utilized to prevent scheduler associated
      drift across NUMA nodes.
-    Process Group support is implemented to enable > 64 processors to be 
+    Process Group support is implemented to enable > 64 processors to be
      utilized.  This is only supported when building 64bit.
  
      The high level approach is:
-    1. Build a description of CPU topology, including processor numbers, NUMA 
+    1. Build a description of CPU topology, including processor numbers, NUMA
          node numbers, and affinity masks.
-    2. For processor intensive worker threads, create threads such that 
+    2. For processor intensive worker threads, create threads such that
          the processor affinity and thread stack is kept local within a NUMA node.
-    3. Employ simple round-robin affinity and node assignment approach when 
+    3. Employ simple round-robin affinity and node assignment approach when
          creating threads.
-    4. Use GetProcAddress() to obtain function pointers to functions that 
-        are operating system version dependent, to allow maximum binary 
-        compatibility. 
+    4. Use GetProcAddress() to obtain function pointers to functions that
+        are operating system version dependent, to allow maximum binary
+        compatibility.
  
-    Scott Field (sfield@microsoft.com)      Jan-2011    
-*/
+    Scott Field (sfield@microsoft.com)      Jan-2011
+ */
  
  
  typedef struct {
@@ -152,60 +152,60 @@ typedef struct {
  
  
  /* thread/processor index, to allow setting round-robin affinity. */
-volatile ULONG g_ulThreadIndex;                 
+volatile ULONG           g_ulThreadIndex;
  /* a value of zero implies the system is not NUMA */
-ULONG g_ulHighestNumaNodeNumber=0;
+ULONG                    g_ulHighestNumaNodeNumber = 0;
  /* total number of processors in g_MPI_ProcessInfo array */
-ULONG g_ulTotalProcessors;
+ULONG                    g_ulTotalProcessors;
  /* array describing available processors, affinity masks, and NUMA node */
-MPI_NUMA_PROCESSOR_INFO *g_MPI_ProcessorInfo=NULL;   
+MPI_NUMA_PROCESSOR_INFO *g_MPI_ProcessorInfo = NULL;
  
-/* function prototypes and variables to support obtaining function addresses 
+/* function prototypes and variables to support obtaining function addresses
     dynamically -- supports down-level operating systems */
  
-typedef BOOL (WINAPI *func_GetNumaHighestNodeNumber_t)( PULONG 
+typedef BOOL (WINAPI *func_GetNumaHighestNodeNumber_t)( PULONG
                                                          HighestNodeNumber );
  typedef DWORD (WINAPI *func_SetThreadIdealProcessor_t)( HANDLE hThread,
-                                                       DWORD dwIdealProcessor );
-typedef BOOL (WINAPI *func_SetThreadGroupAffinity_t)( HANDLE hThread, 
-                            const GROUP_AFFINITY *GroupAffinity, 
-                            PGROUP_AFFINITY PreviousGroupAffinity );
-typedef BOOL (WINAPI *func_SetThreadIdealProcessorEx_t)( HANDLE hThread, 
-                            PPROCESSOR_NUMBER lpIdealProcessor, 
-                            PPROCESSOR_NUMBER lpPreviousIdealProcessor );
-typedef BOOL (WINAPI *func_GetNumaNodeProcessorMaskEx_t)( USHORT Node, 
-                            PGROUP_AFFINITY ProcessorMask );
-typedef BOOL (WINAPI *func_GetNumaProcessorNodeEx_t)( 
-                            PPROCESSOR_NUMBER Processor, 
-                            PUSHORT NodeNumber );
-typedef VOID (WINAPI *func_GetCurrentProcessorNumberEx_t)( 
-                            PPROCESSOR_NUMBER ProcNumber );
+                                                        DWORD dwIdealProcessor );
+typedef BOOL (WINAPI *func_SetThreadGroupAffinity_t)( HANDLE hThread,
+                                                      const GROUP_AFFINITY *GroupAffinity,
+                                                      PGROUP_AFFINITY PreviousGroupAffinity );
+typedef BOOL (WINAPI *func_SetThreadIdealProcessorEx_t)( HANDLE hThread,
+                                                         PPROCESSOR_NUMBER lpIdealProcessor,
+                                                         PPROCESSOR_NUMBER lpPreviousIdealProcessor );
+typedef BOOL (WINAPI *func_GetNumaNodeProcessorMaskEx_t)( USHORT Node,
+                                                          PGROUP_AFFINITY ProcessorMask );
+typedef BOOL (WINAPI *func_GetNumaProcessorNodeEx_t)(
+        PPROCESSOR_NUMBER Processor,
+        PUSHORT NodeNumber );
+typedef VOID (WINAPI *func_GetCurrentProcessorNumberEx_t)(
+        PPROCESSOR_NUMBER ProcNumber );
  
  typedef HANDLE (WINAPI *func_CreateRemoteThreadEx_t)(
-                            HANDLE hProcess,
-                            LPSECURITY_ATTRIBUTES lpThreadAttributes,
-                            SIZE_T dwStackSize,
-                            LPTHREAD_START_ROUTINE lpStartAddress,
-                            LPVOID lpParameter,
-                            DWORD dwCreationFlags,
-                            LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList,
-                            LPDWORD lpThreadId);
+        HANDLE hProcess,
+        LPSECURITY_ATTRIBUTES lpThreadAttributes,
+        SIZE_T dwStackSize,
+        LPTHREAD_START_ROUTINE lpStartAddress,
+        LPVOID lpParameter,
+        DWORD dwCreationFlags,
+        LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList,
+        LPDWORD lpThreadId);
  
  typedef BOOL (WINAPI *func_InitializeProcThreadAttributeList_t)(
-                            LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList, 
-                            DWORD dwAttributeCount, 
-                            DWORD dwFlags, 
-                            PSIZE_T lpSize);
+        LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList,
+        DWORD dwAttributeCount,
+        DWORD dwFlags,
+        PSIZE_T lpSize);
  typedef BOOL (WINAPI *func_UpdateProcThreadAttribute_t)(
-                            LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList,
-                            DWORD dwFlags,
-                            DWORD_PTR Attribute,
-                            PVOID lpValue,
-                            SIZE_T cbSize,
-                            PVOID lpPreviousValue,
-                            PSIZE_T lpReturnSize);
+        LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList,
+        DWORD dwFlags,
+        DWORD_PTR Attribute,
+        PVOID lpValue,
+        SIZE_T cbSize,
+        PVOID lpPreviousValue,
+        PSIZE_T lpReturnSize);
  typedef VOID (WINAPI *func_DeleteProcThreadAttributeList_t)(
-                            LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList);
+        LPPROC_THREAD_ATTRIBUTE_LIST lpAttributeList);
  typedef DWORD (WINAPI *func_GetActiveProcessorCount_t)(WORD GroupNumber);
  typedef WORD (WINAPI *func_GetActiveProcessorGroupCount_t)(void);
  
@@ -214,42 +214,42 @@ typedef WORD (WINAPI *func_GetActiveProcessorGroupCount_t)(void);
  func_GetNumaHighestNodeNumber_t             func_GetNumaHighestNodeNumber;
  func_SetThreadIdealProcessor_t              func_SetThreadIdealProcessor;
  /* Windows 7, WinSrv 2008R2 */
-func_SetThreadGroupAffinity_t               func_SetThreadGroupAffinity;                
-func_SetThreadIdealProcessorEx_t            func_SetThreadIdealProcessorEx; 
+func_SetThreadGroupAffinity_t               func_SetThreadGroupAffinity;
+func_SetThreadIdealProcessorEx_t            func_SetThreadIdealProcessorEx;
  func_GetNumaNodeProcessorMaskEx_t           func_GetNumaNodeProcessorMaskEx;
-func_GetNumaProcessorNodeEx_t               func_GetNumaProcessorNodeEx;    
+func_GetNumaProcessorNodeEx_t               func_GetNumaProcessorNodeEx;
  func_GetCurrentProcessorNumberEx_t          func_GetCurrentProcessorNumberEx;
-func_GetActiveProcessorCount_t              func_GetActiveProcessorCount;    
+func_GetActiveProcessorCount_t              func_GetActiveProcessorCount;
  func_GetActiveProcessorGroupCount_t         func_GetActiveProcessorGroupCount;
-func_CreateRemoteThreadEx_t                 func_CreateRemoteThreadEx;        
-/* Windows Vista, WinSrv 2008 */ 
-func_InitializeProcThreadAttributeList_t    func_InitializeProcThreadAttributeList;     
-func_UpdateProcThreadAttribute_t            func_UpdateProcThreadAttribute;             
-func_DeleteProcThreadAttributeList_t        func_DeleteProcThreadAttributeList;         
+func_CreateRemoteThreadEx_t                 func_CreateRemoteThreadEx;
+/* Windows Vista, WinSrv 2008 */
+func_InitializeProcThreadAttributeList_t    func_InitializeProcThreadAttributeList;
+func_UpdateProcThreadAttribute_t            func_UpdateProcThreadAttribute;
+func_DeleteProcThreadAttributeList_t        func_DeleteProcThreadAttributeList;
  
  
  
  /*  returns 0 on success.
-    Success is returned if the system is non-NUMA, OR the system doesn't 
-    support appropriate NUMA APIs, OR the system is NUMA and we successfully 
+    Success is returned if the system is non-NUMA, OR the system doesn't
+    support appropriate NUMA APIs, OR the system is NUMA and we successfully
      initialized support.
-    
+
      returns -1 on error.
-    This can happen if an API returned an error, a memory allocation failed, or 
+    This can happen if an API returned an error, a memory allocation failed, or
      we failed to initialize affinity mapping information.
-*/
+ */
  int tMPI_Init_NUMA(void)
  {
      /* module handle to kernel32.dll -- we already reference it, so it's already loaded */
-    HMODULE hModKernel32 = NULL;                    
+    HMODULE hModKernel32 = NULL;
      /* 0-based NUMA node count -- does not imply all nodes have available (eg: hot-plug) processors */
-    ULONG ulHighestNumaNodeNumber;                  
+    ULONG   ulHighestNumaNodeNumber;
      /* total number of processors available per affinity masks */
-    DWORD dwTotalProcessors = 0;                    
-    ULONG i = 0;
+    DWORD   dwTotalProcessors = 0;
+    ULONG   i                 = 0;
  
      /* calling thread PROCESSOR_NUMBER */
-    PROCESSOR_NUMBER CurrentProcessorNumber;      
+    PROCESSOR_NUMBER CurrentProcessorNumber;
      /* calling thread GROUP_AFFINITY */
      /*GROUP_AFFINITY CurrentThreadGroupAffinity; */
      /* calling thread NUMA node */
@@ -259,72 +259,72 @@ int tMPI_Init_NUMA(void)
      WORD GroupIndex;
  
      /* array of processor information structures */
-    MPI_NUMA_PROCESSOR_INFO *pMPI_ProcessorInfo = NULL; 
+    MPI_NUMA_PROCESSOR_INFO *pMPI_ProcessorInfo = NULL;
  
      /* assume an error condition */
      int iRet = -1;
  
      hModKernel32 = GetModuleHandleA("kernel32.dll");
  
-    if( hModKernel32 == NULL )
+    if (hModKernel32 == NULL)
      {
          return 0;
      }
  
-    /* obtain addresses of relevant NUMA functions, most of which are 
+    /* obtain addresses of relevant NUMA functions, most of which are
         Windows 7 / Windows Server 2008R2 only functions
-       this is done using GetProcAddress to enable the binary to run on older 
+       this is done using GetProcAddress to enable the binary to run on older
         Windows versions.
-    */
+     */
  
      func_GetNumaHighestNodeNumber = (func_GetNumaHighestNodeNumber_t) GetProcAddress( hModKernel32, "GetNumaHighestNodeNumber" );
-    func_SetThreadIdealProcessor = (func_SetThreadIdealProcessor_t) GetProcAddress( hModKernel32, "SetThreadIdealProcessor" );
+    func_SetThreadIdealProcessor  = (func_SetThreadIdealProcessor_t) GetProcAddress( hModKernel32, "SetThreadIdealProcessor" );
  
-    if( func_GetNumaHighestNodeNumber == NULL )
+    if (func_GetNumaHighestNodeNumber == NULL)
      {
          return 0;
      }
  
-    /* determine if we're on a NUMA system and if so, determine the number of 
+    /* determine if we're on a NUMA system and if so, determine the number of
         (potential) nodes */
  
-    if(!func_GetNumaHighestNodeNumber( &ulHighestNumaNodeNumber ))
+    if (!func_GetNumaHighestNodeNumber( &ulHighestNumaNodeNumber ))
      {
          return -1;
      }
  
  
  
-    func_SetThreadGroupAffinity = (func_SetThreadGroupAffinity_t)GetProcAddress( hModKernel32, "SetThreadGroupAffinity" );
-    func_SetThreadIdealProcessorEx = (func_SetThreadIdealProcessorEx_t)GetProcAddress( hModKernel32, "SetThreadIdealProcessorEx" );
-    func_CreateRemoteThreadEx = (func_CreateRemoteThreadEx_t)GetProcAddress( hModKernel32, "CreateRemoteThreadEx" );
-    func_GetNumaNodeProcessorMaskEx = (func_GetNumaNodeProcessorMaskEx_t)GetProcAddress( hModKernel32, "GetNumaNodeProcessorMaskEx" );
-    func_GetNumaProcessorNodeEx = (func_GetNumaProcessorNodeEx_t)GetProcAddress( hModKernel32, "GetNumaProcessorNodeEx" );
-    func_GetCurrentProcessorNumberEx = (func_GetCurrentProcessorNumberEx_t)GetProcAddress( hModKernel32, "GetCurrentProcessorNumberEx" );
-    func_GetActiveProcessorCount = (func_GetActiveProcessorCount_t)GetProcAddress( hModKernel32, "GetActiveProcessorCount" );
-    func_GetActiveProcessorGroupCount = (func_GetActiveProcessorGroupCount_t)GetProcAddress( hModKernel32, "GetActiveProcessorGroupCount" );
+    func_SetThreadGroupAffinity            = (func_SetThreadGroupAffinity_t)GetProcAddress( hModKernel32, "SetThreadGroupAffinity" );
+    func_SetThreadIdealProcessorEx         = (func_SetThreadIdealProcessorEx_t)GetProcAddress( hModKernel32, "SetThreadIdealProcessorEx" );
+    func_CreateRemoteThreadEx              = (func_CreateRemoteThreadEx_t)GetProcAddress( hModKernel32, "CreateRemoteThreadEx" );
+    func_GetNumaNodeProcessorMaskEx        = (func_GetNumaNodeProcessorMaskEx_t)GetProcAddress( hModKernel32, "GetNumaNodeProcessorMaskEx" );
+    func_GetNumaProcessorNodeEx            = (func_GetNumaProcessorNodeEx_t)GetProcAddress( hModKernel32, "GetNumaProcessorNodeEx" );
+    func_GetCurrentProcessorNumberEx       = (func_GetCurrentProcessorNumberEx_t)GetProcAddress( hModKernel32, "GetCurrentProcessorNumberEx" );
+    func_GetActiveProcessorCount           = (func_GetActiveProcessorCount_t)GetProcAddress( hModKernel32, "GetActiveProcessorCount" );
+    func_GetActiveProcessorGroupCount      = (func_GetActiveProcessorGroupCount_t)GetProcAddress( hModKernel32, "GetActiveProcessorGroupCount" );
      func_InitializeProcThreadAttributeList = (func_InitializeProcThreadAttributeList_t)GetProcAddress( hModKernel32, "InitializeProcThreadAttributeList" );
-    func_UpdateProcThreadAttribute = (func_UpdateProcThreadAttribute_t)GetProcAddress( hModKernel32, "UpdateProcThreadAttribute" );
-    func_DeleteProcThreadAttributeList = (func_DeleteProcThreadAttributeList_t)GetProcAddress( hModKernel32, "DeleteProcThreadAttributeList" );
-
-    if( (func_SetThreadGroupAffinity == NULL) ||
-        (func_SetThreadIdealProcessorEx == NULL) ||
-        (func_CreateRemoteThreadEx == NULL) ||
-        (func_GetNumaNodeProcessorMaskEx == NULL) ||
-        (func_GetNumaProcessorNodeEx == NULL) ||
-        (func_GetCurrentProcessorNumberEx == NULL) ||
-        (func_GetActiveProcessorCount == NULL) ||
-        (func_GetActiveProcessorGroupCount == NULL) ||
-        (func_InitializeProcThreadAttributeList == NULL) ||
-        (func_UpdateProcThreadAttribute == NULL) ||
-        (func_DeleteProcThreadAttributeList == NULL) )
+    func_UpdateProcThreadAttribute         = (func_UpdateProcThreadAttribute_t)GetProcAddress( hModKernel32, "UpdateProcThreadAttribute" );
+    func_DeleteProcThreadAttributeList     = (func_DeleteProcThreadAttributeList_t)GetProcAddress( hModKernel32, "DeleteProcThreadAttributeList" );
+
+    if ( (func_SetThreadGroupAffinity == NULL) ||
+         (func_SetThreadIdealProcessorEx == NULL) ||
+         (func_CreateRemoteThreadEx == NULL) ||
+         (func_GetNumaNodeProcessorMaskEx == NULL) ||
+         (func_GetNumaProcessorNodeEx == NULL) ||
+         (func_GetCurrentProcessorNumberEx == NULL) ||
+         (func_GetActiveProcessorCount == NULL) ||
+         (func_GetActiveProcessorGroupCount == NULL) ||
+         (func_InitializeProcThreadAttributeList == NULL) ||
+         (func_UpdateProcThreadAttribute == NULL) ||
+         (func_DeleteProcThreadAttributeList == NULL) )
      {
-        /* if any addresses couldn't be located, assume NUMA functionality 
+        /* if any addresses couldn't be located, assume NUMA functionality
             isn't supported */
          return 0;
      }
  #if 0
-       if( ulHighestNumaNodeNumber == 0 )
+    if (ulHighestNumaNodeNumber == 0)
      {
          /* system is not NUMA */
          return 0;
@@ -336,20 +336,20 @@ int tMPI_Init_NUMA(void)
      func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber);
  
      wActiveGroupCount = func_GetActiveProcessorGroupCount();
-    
+
      dwTotalProcessors = func_GetActiveProcessorCount( ALL_PROCESSOR_GROUPS );
  
  #if !((defined WIN64 || defined _WIN64))
-    /* WOW64 doesn't allow setting the affinity correctly beyond 32 
+    /* WOW64 doesn't allow setting the affinity correctly beyond 32
         processors -- the KAFFINITY mask is only 32 bits wide
-       This check is only here for completeness -- large systems should be 
-       running 64bit Gromacs code, where the processor quantity is not 
+       This check is only here for completeness -- large systems should be
+       running 64bit Gromacs code, where the processor quantity is not
         constrained.
-       By failing here, the WOW64 32bit client will use normal CreateThread(), 
+       By failing here, the WOW64 32bit client will use normal CreateThread(),
         which can schedule up to 64 un-affinitized threads
-    */
+     */
  
-    if( dwTotalProcessors > 32 )
+    if (dwTotalProcessors > 32)
      {
          return 0;
      }
@@ -357,55 +357,55 @@ int tMPI_Init_NUMA(void)
  
      /* allocate array of processor info blocks */
  
-    pMPI_ProcessorInfo = tMPI_Malloc( sizeof(MPI_NUMA_PROCESSOR_INFO) * 
+    pMPI_ProcessorInfo = tMPI_Malloc( sizeof(MPI_NUMA_PROCESSOR_INFO) *
                                        dwTotalProcessors );
-    if(pMPI_ProcessorInfo == NULL)
+    if (pMPI_ProcessorInfo == NULL)
      {
-        tMPI_Fatal_error(TMPI_FARGS,"tMPI_Malloc failed for processor information");
+        tMPI_Fatal_error(TMPI_FARGS, "tMPI_Malloc failed for processor information");
          goto cleanup;
      }
  
      /* zero fill to cover reserved must be-zero fields */
      memset(pMPI_ProcessorInfo, 0, sizeof(MPI_NUMA_PROCESSOR_INFO) * dwTotalProcessors);
  
-    /* loop through each processor group, and for each group, capture the 
+    /* loop through each processor group, and for each group, capture the
         processor numbers and NUMA node information. */
  
-    for(GroupIndex = 0 ; GroupIndex < wActiveGroupCount ; GroupIndex++)
+    for (GroupIndex = 0; GroupIndex < wActiveGroupCount; GroupIndex++)
      {
          DWORD dwGroupProcessorCount;
-        BYTE ProcessorIndex;
+        BYTE  ProcessorIndex;
  
          dwGroupProcessorCount = func_GetActiveProcessorCount( GroupIndex );
  
-        for(ProcessorIndex = 0 ; ProcessorIndex < dwGroupProcessorCount ; 
-            ProcessorIndex++)
+        for (ProcessorIndex = 0; ProcessorIndex < dwGroupProcessorCount;
+             ProcessorIndex++)
          {
              PROCESSOR_NUMBER *pProcessorNumber = &(pMPI_ProcessorInfo[i].ProcessorNumber);
-            GROUP_AFFINITY *pGroupAffinity = &(pMPI_ProcessorInfo[i].GroupAffinity);
-            USHORT *pNodeNumber = &(pMPI_ProcessorInfo[i].NumaNodeNumber);
+            GROUP_AFFINITY   *pGroupAffinity   = &(pMPI_ProcessorInfo[i].GroupAffinity);
+            USHORT           *pNodeNumber      = &(pMPI_ProcessorInfo[i].NumaNodeNumber);
  
-            pProcessorNumber->Group = GroupIndex;
+            pProcessorNumber->Group  = GroupIndex;
              pProcessorNumber->Number = ProcessorIndex;
  
              /* save an index to the processor array entry for the current processor
-               this is used to enable subsequent threads to be created in a round 
+               this is used to enable subsequent threads to be created in a round
                 robin fashion starting at the next array entry
-            */
+             */
  
-            if( (CurrentProcessorNumber.Group == pProcessorNumber->Group ) &&
-                (CurrentProcessorNumber.Number == pProcessorNumber->Number) )
+            if ( (CurrentProcessorNumber.Group == pProcessorNumber->Group ) &&
+                 (CurrentProcessorNumber.Number == pProcessorNumber->Number) )
              {
                  /* set global: current thread index into processor array */
                  g_ulThreadIndex = i;
              }
  
              /* capture the node number and group affinity associated with processor entry
-               any failures here are assumed to be catastrophic and disable 
+               any failures here are assumed to be catastrophic and disable
                 the group & NUMA aware thread support
-            */
+             */
  
-            if(!func_GetNumaProcessorNodeEx(pProcessorNumber, pNodeNumber))
+            if (!func_GetNumaProcessorNodeEx(pProcessorNumber, pNodeNumber))
              {
                  tMPI_Fatal_error(TMPI_FARGS,
                                   "Processor enumeration, GetNumaProcessorNodeEx failed, error code=%d",
@@ -413,7 +413,7 @@ int tMPI_Init_NUMA(void)
                  goto cleanup;
              }
  
-            if(!func_GetNumaNodeProcessorMaskEx(*pNodeNumber, pGroupAffinity))
+            if (!func_GetNumaNodeProcessorMaskEx(*pNodeNumber, pGroupAffinity))
              {
                  tMPI_Fatal_error(TMPI_FARGS,
                                   "Processor enumeration, GetNumaNodeProcessorMaskEx failed, error code=%d",
@@ -421,7 +421,7 @@ int tMPI_Init_NUMA(void)
                  goto cleanup;
              }
  
-            /* future enhancement: construct GroupAffinity (single) processor 
+            /* future enhancement: construct GroupAffinity (single) processor
                 mask within NUMA node for this processor entry */
  
              /* increment processor array index */
@@ -429,28 +429,28 @@ int tMPI_Init_NUMA(void)
  
              /* sanity check, should never happen */
  
-            if(i > dwTotalProcessors)
+            if (i > dwTotalProcessors)
              {
-                tMPI_Fatal_error(TMPI_FARGS,"Processor enumeration exceeds allocated memory!");
+                tMPI_Fatal_error(TMPI_FARGS, "Processor enumeration exceeds allocated memory!");
                  goto cleanup;
              }
          }
      }
  
  
-    /* capture number of processors, highest NUMA node number, and processor 
+    /* capture number of processors, highest NUMA node number, and processor
         array */
-    g_ulTotalProcessors = dwTotalProcessors;
+    g_ulTotalProcessors       = dwTotalProcessors;
      g_ulHighestNumaNodeNumber = ulHighestNumaNodeNumber;
-    g_MPI_ProcessorInfo = pMPI_ProcessorInfo;
+    g_MPI_ProcessorInfo       = pMPI_ProcessorInfo;
  
-    iRet = 0 ;
+    iRet = 0;
  
  cleanup:
  
-    if( iRet != 0 )
+    if (iRet != 0)
      {
-        if( pMPI_ProcessorInfo )
+        if (pMPI_ProcessorInfo)
          {
              tMPI_Free( pMPI_ProcessorInfo );
          }
@@ -462,42 +462,42 @@ cleanup:
  static void tMPI_Thread_id_list_init(void)
  {
      EnterCriticalSection( &thread_id_list_lock );
-    
-    N_thread_id_list=0; 
-    Nalloc_thread_id_list=4; /* number of initial allocation*/
-    thread_id_list=(thread_id_list_t*)tMPI_Malloc(
-                            sizeof(thread_id_list_t)*
-                            Nalloc_thread_id_list);
+
+    N_thread_id_list      = 0;
+    Nalloc_thread_id_list = 4; /* number of initial allocation*/
+    thread_id_list        = (thread_id_list_t*)tMPI_Malloc(
+                sizeof(thread_id_list_t)*
+                Nalloc_thread_id_list);
  
      LeaveCriticalSection( &thread_id_list_lock );
  }
  
  
  /* add an entry to the thread ID list, assuming it's locked */
-static void tMPI_Thread_id_list_add_locked(DWORD thread_id, 
-                                             struct tMPI_Thread *th)
+static void tMPI_Thread_id_list_add_locked(DWORD               thread_id,
+                                           struct tMPI_Thread *th)
  {
      if (Nalloc_thread_id_list < N_thread_id_list + 1)
      {
          thread_id_list_t* new_list;
-        int i;
+        int               i;
  
          /* double the size */
-        Nalloc_thread_id_list*=2; 
-        new_list=(thread_id_list_t*)tMPI_Malloc(
-                            sizeof(thread_id_list_t)*
-                            Nalloc_thread_id_list);
+        Nalloc_thread_id_list *= 2;
+        new_list               = (thread_id_list_t*)tMPI_Malloc(
+                    sizeof(thread_id_list_t)*
+                    Nalloc_thread_id_list);
          /* and copy over all elements */
-        for (i=0;i<N_thread_id_list;i++)
+        for (i = 0; i < N_thread_id_list; i++)
          {
              new_list[i] = thread_id_list[i];
          }
          /* free the old list */
          tMPI_Free(thread_id_list);
-        thread_id_list=new_list;
+        thread_id_list = new_list;
      }
      thread_id_list[ N_thread_id_list ].thread_id = thread_id;
-    thread_id_list[ N_thread_id_list ].th = th;
+    thread_id_list[ N_thread_id_list ].th        = th;
      N_thread_id_list++;
  
  
@@ -515,22 +515,24 @@ static void tMPI_Thread_id_list_add(DWORD thread_id, struct tMPI_Thread *th)
  /* Remove an entry from the thread_id list, assuming it's locked */
  static void tMPI_Thread_id_list_remove_locked(DWORD thread_id)
  {
-    int i;
-    tmpi_bool found=FALSE;
+    int       i;
+    tmpi_bool found = FALSE;
  
      /* move the last thread_id_list item to the one we want to remove */
-    for(i=0;i<N_thread_id_list;i++)
+    for (i = 0; i < N_thread_id_list; i++)
      {
          if (thread_id_list[i].thread_id == thread_id)
          {
              thread_id_list[i] = thread_id_list[N_thread_id_list - 1];
-            found=TRUE;
+            found             = TRUE;
              break;
          }
      }
  
      if (found)
+    {
          N_thread_id_list--;
+    }
  }
  
  
@@ -549,15 +551,15 @@ static void tMPI_Thread_id_list_remove(DWORD thread_id)
     such thread id in the list. Assumes the list is locked.*/
  static struct tMPI_Thread *tMPI_Thread_id_list_find_locked(DWORD thread_id)
  {
-    int i;
-    struct tMPI_Thread *ret=NULL;
+    int                 i;
+    struct tMPI_Thread *ret = NULL;
  
      /* this is a linear search but it's only O(Nthreads). */
-    for(i=0;i<N_thread_id_list;i++)
+    for (i = 0; i < N_thread_id_list; i++)
      {
          if (thread_id_list[i].thread_id == thread_id)
          {
-            ret=thread_id_list[i].th;
+            ret = thread_id_list[i].th;
              break;
          }
      }
@@ -569,10 +571,10 @@ static struct tMPI_Thread *tMPI_Thread_id_list_find_locked(DWORD thread_id)
     such thread id in the list.*/
  static struct tMPI_Thread *tMPI_Thread_id_list_find(DWORD thread_id)
  {
-    struct tMPI_Thread *ret=NULL;
+    struct tMPI_Thread *ret = NULL;
  
      EnterCriticalSection( &thread_id_list_lock );
-    ret=tMPI_Thread_id_list_find_locked(thread_id);
+    ret = tMPI_Thread_id_list_find_locked(thread_id);
  
      LeaveCriticalSection( &thread_id_list_lock );
      return ret;
@@ -582,31 +584,31 @@ static struct tMPI_Thread *tMPI_Thread_id_list_find(DWORD thread_id)
     associated with this thread.*/
  static struct tMPI_Thread *tMPI_Thread_id_list_add_self(void)
  {
-    DWORD thread_id;
-    struct tMPI_Thread *th=NULL;
+    DWORD               thread_id;
+    struct tMPI_Thread *th = NULL;
  
      EnterCriticalSection( &thread_id_list_lock );
  
      thread_id = GetCurrentThreadId();
-    th=tMPI_Thread_id_list_find_locked(thread_id);
+    th        = tMPI_Thread_id_list_find_locked(thread_id);
      if (th == NULL)
      {
          /* if not, create an ID, set it and return it */
-        th=(struct tMPI_Thread*)tMPI_Malloc(sizeof(struct tMPI_Thread)*1);
+        th = (struct tMPI_Thread*)tMPI_Malloc(sizeof(struct tMPI_Thread)*1);
  
          /* to create a handle that can be used outside of the current
             thread, the handle from GetCurrentThread() must first
             be duplicated.. */
-        DuplicateHandle(GetCurrentProcess(), 
-                        GetCurrentThread(), 
+        DuplicateHandle(GetCurrentProcess(),
+                        GetCurrentThread(),
                          GetCurrentProcess(),
-                        &th->th, 
+                        &th->th,
                          0,
                          FALSE,
                          DUPLICATE_SAME_ACCESS);
  
-        /* This causes a small memory leak that is hard to fix. */ 
-        th->started_by_tmpi=0;
+        /* This causes a small memory leak that is hard to fix. */
+        th->started_by_tmpi = 0;
          tMPI_Thread_id_list_add_locked(thread_id, th);
      }
      LeaveCriticalSection( &thread_id_list_lock );
@@ -624,7 +626,7 @@ static void tMPI_Init_initers(void)
          /* this can be a spinlock because the chances of collision are low. */
          tMPI_Spinlock_lock( &init_init );
  
-        state=tMPI_Atomic_get(&init_inited);
+        state = tMPI_Atomic_get(&init_inited);
          tMPI_Atomic_memory_barrier_acq();
          if (state == 0)
          {
@@ -634,7 +636,7 @@ static void tMPI_Init_initers(void)
              InitializeCriticalSection(&barrier_init);
              InitializeCriticalSection(&thread_id_list_lock);
  
-            /* fatal errors are handled by the routine by calling 
+            /* fatal errors are handled by the routine by calling
                 tMPI_Fatal_error() */
              tMPI_Init_NUMA();
  
@@ -660,7 +662,7 @@ void tMPI_Fatal_error(const char *file, int line, const char *message, ...)
      va_start(ap, message);
      vfprintf(stderr, message, ap);
      va_end(ap);
-    fprintf(stderr,"\n");
+    fprintf(stderr, "\n");
      abort();
  }
  
@@ -673,15 +675,15 @@ enum tMPI_Thread_support tMPI_Thread_support(void)
  
  struct tMPI_Thread_starter_param
  {
-    void *(*start_routine)(void*); /* the function */
-    void *param; /* its parameter */
+    void               *(*start_routine)(void*); /* the function */
+    void               *param;                   /* its parameter */
      struct tMPI_Thread *thread;
  };
  
-static DWORD WINAPI tMPI_Win32_thread_starter( LPVOID lpParam ) 
+static DWORD WINAPI tMPI_Win32_thread_starter( LPVOID lpParam )
  {
-    struct tMPI_Thread_starter_param *prm=
-              (struct tMPI_Thread_starter_param*)lpParam;
+    struct tMPI_Thread_starter_param *prm =
+        (struct tMPI_Thread_starter_param*)lpParam;
  
      (prm->start_routine)(prm->param);
      return 0;
@@ -690,12 +692,12 @@ static DWORD WINAPI tMPI_Win32_thread_starter( LPVOID lpParam )
  
  int tMPI_Thread_get_hw_number(void)
  {
-    int ret;
+    int         ret;
  
      SYSTEM_INFO sysinfo;
      GetSystemInfo( &sysinfo );
  
-    ret=sysinfo.dwNumberOfProcessors;
+    ret = sysinfo.dwNumberOfProcessors;
      return ret;
  }
  
@@ -710,37 +712,37 @@ int tMPI_Thread_create(tMPI_Thread_t *thread,
  
      tMPI_Init_initers();
  
-    /* a small memory leak to be sure that it doesn't get deallocated 
+    /* a small memory leak to be sure that it doesn't get deallocated
         once this function ends, before the newly created thread uses it. */
-    prm=(struct tMPI_Thread_starter_param*)
-              tMPI_Malloc(sizeof(struct tMPI_Thread_starter_param));
-    prm->start_routine= start_routine;
-    prm->param=arg;
+    prm = (struct tMPI_Thread_starter_param*)
+        tMPI_Malloc(sizeof(struct tMPI_Thread_starter_param));
+    prm->start_routine = start_routine;
+    prm->param         = arg;
  
-    *thread=(struct tMPI_Thread*)tMPI_Malloc(sizeof(struct tMPI_Thread)*1);
+    *thread = (struct tMPI_Thread*)tMPI_Malloc(sizeof(struct tMPI_Thread)*1);
  
-    if(thread==NULL)
+    if (thread == NULL)
      {
-        tMPI_Fatal_error(TMPI_FARGS,"Invalid thread pointer.");
+        tMPI_Fatal_error(TMPI_FARGS, "Invalid thread pointer.");
          return EINVAL;
      }
-    /* this must be locked before the thread is created to prevent a race 
+    /* this must be locked before the thread is created to prevent a race
         condition if the thread immediately wants to create its own entry */
      EnterCriticalSection( &thread_id_list_lock );
      /* just create a plain thread. */
-    (*thread)->started_by_tmpi=1;
-    (*thread)->th = CreateThread(NULL,
-                                 0,
-                                 tMPI_Win32_thread_starter,
-                                 prm,
-                                 0, 
-                                 &thread_id);
-    (*thread)->id=thread_id;
-
-    if((*thread)->th==NULL)
+    (*thread)->started_by_tmpi = 1;
+    (*thread)->th              = CreateThread(NULL,
+                                              0,
+                                              tMPI_Win32_thread_starter,
+                                              prm,
+                                              0,
+                                              &thread_id);
+    (*thread)->id = thread_id;
+
+    if ((*thread)->th == NULL)
      {
          tMPI_Free(thread);
-        tMPI_Fatal_error(TMPI_FARGS,"Failed to create thread, error code=%d",
+        tMPI_Fatal_error(TMPI_FARGS, "Failed to create thread, error code=%d",
                           GetLastError());
          return -1;
      }
@@ -748,8 +750,8 @@ int tMPI_Thread_create(tMPI_Thread_t *thread,
      LeaveCriticalSection( &thread_id_list_lock );
  
      /* inherit the thread priority from the parent thread. */
-    /* TODO: is there value in setting this, vs. just allowing it to default 
-       from the process?  currently, this limits the effectivenes of changing 
+    /* TODO: is there value in setting this, vs. just allowing it to default
+       from the process?  currently, this limits the effectivenes of changing
         the priority in eg: TaskManager. */
      SetThreadPriority(((*thread)->th), GetThreadPriority(GetCurrentThread()));
  
@@ -764,13 +766,13 @@ int tMPI_Thread_create(tMPI_Thread_t *thread,
  
  int tMPI_Thread_join(tMPI_Thread_t thread, void **value_ptr)
  {
-    DWORD ret,retval;
+    DWORD ret, retval;
  
      ret = WaitForSingleObject(thread->th, INFINITE);
  
      if (ret != 0)
      {
-        tMPI_Fatal_error(TMPI_FARGS,"Failed to join thread. error code=%d",
+        tMPI_Fatal_error(TMPI_FARGS, "Failed to join thread. error code=%d",
                           GetLastError());
          return -1;
      }
@@ -808,7 +810,7 @@ int tMPI_Thread_cancel(tMPI_Thread_t thread)
  {
      if (!TerminateThread( thread, -1) )
      {
-        tMPI_Fatal_error(TMPI_FARGS,"Failed thread_cancel, error code=%d",
+        tMPI_Fatal_error(TMPI_FARGS, "Failed thread_cancel, error code=%d",
                           GetLastError());
          return -1;
      }
@@ -822,7 +824,7 @@ tMPI_Thread_t tMPI_Thread_self(void)
      tMPI_Thread_t th;
      tMPI_Init_initers();
  
-    th=tMPI_Thread_id_list_add_self();
+    th = tMPI_Thread_id_list_add_self();
  
      return th;
  }
@@ -842,52 +844,52 @@ enum tMPI_Thread_setaffinity_support tMPI_Thread_setaffinity_support(void)
  
  int tMPI_Thread_setaffinity_single(tMPI_Thread_t thread, unsigned int nr)
  {
-    GROUP_AFFINITY GroupAffinity;
+    GROUP_AFFINITY   GroupAffinity;
      PROCESSOR_NUMBER IdealProcessorNumber;
      /* thread NUMA node */
-    USHORT NumaNodeNumber;
+    USHORT           NumaNodeNumber;
  
-    /* check for a processor info array. This exists if NUMA 
+    /* check for a processor info array. This exists if NUMA
         style calls have been succesfully initialized. */
-    if( g_MPI_ProcessorInfo != NULL )
+    if (g_MPI_ProcessorInfo != NULL)
      {
  
          /*func_GetCurrentProcessorNumberEx(&CurrentProcessorNumber);*/
          /* group, mask. */
-        memcpy(&GroupAffinity, 
-               &(g_MPI_ProcessorInfo[nr].GroupAffinity), 
+        memcpy(&GroupAffinity,
+               &(g_MPI_ProcessorInfo[nr].GroupAffinity),
                 sizeof(GROUP_AFFINITY));
  
          /* group, processor number */
  
-        memcpy(&IdealProcessorNumber, 
-               &(g_MPI_ProcessorInfo[nr].ProcessorNumber), 
-               sizeof(PROCESSOR_NUMBER)); 
+        memcpy(&IdealProcessorNumber,
+               &(g_MPI_ProcessorInfo[nr].ProcessorNumber),
+               sizeof(PROCESSOR_NUMBER));
  
  
          /* set the NUMA node affinity for the current thread
-           failures to set the current thread affinity are ignored, 
-           as a fringe case can arise on >32 processor systems with a 32bit 
+           failures to set the current thread affinity are ignored,
+           as a fringe case can arise on >32 processor systems with a 32bit
             build/code.
-           */
+         */
          func_SetThreadIdealProcessorEx(thread->th,
                                         &IdealProcessorNumber,
                                         NULL);
  
-        if(func_GetNumaProcessorNodeEx(&IdealProcessorNumber,
-                                       &NumaNodeNumber))
+        if (func_GetNumaProcessorNodeEx(&IdealProcessorNumber,
+                                        &NumaNodeNumber))
          {
-            /* for the NUMA node number associated with the current processor 
+            /* for the NUMA node number associated with the current processor
                 number, get the group affinity mask */
-            if(func_GetNumaNodeProcessorMaskEx(NumaNodeNumber,
-                                               &GroupAffinity))
+            if (func_GetNumaNodeProcessorMaskEx(NumaNodeNumber,
+                                                &GroupAffinity))
              {
                  /* set the current thread affinity to prevent it from running
                     on other NUMA nodes */
                  func_SetThreadGroupAffinity(thread->th,
                                              &GroupAffinity,
                                              NULL);
-                               return 0;
+                return 0;
              }
          }
          return 1;
@@ -905,23 +907,23 @@ int tMPI_Thread_setaffinity_single(tMPI_Thread_t thread, unsigned int nr)
  
  
  
-int tMPI_Thread_mutex_init(tMPI_Thread_mutex_t *mtx) 
+int tMPI_Thread_mutex_init(tMPI_Thread_mutex_t *mtx)
  {
-    if(mtx==NULL)
+    if (mtx == NULL)
      {
          return EINVAL;
      }
  
-    mtx->mutex=(struct tMPI_Mutex*)tMPI_Malloc(sizeof(struct tMPI_Mutex)*1);
+    mtx->mutex = (struct tMPI_Mutex*)tMPI_Malloc(sizeof(struct tMPI_Mutex)*1);
      InitializeCriticalSection(&(mtx->mutex->cs));
  
      return 0;
  }
  
  
-int tMPI_Thread_mutex_destroy(tMPI_Thread_mutex_t *mtx) 
+int tMPI_Thread_mutex_destroy(tMPI_Thread_mutex_t *mtx)
  {
-    if(mtx == NULL)
+    if (mtx == NULL)
      {
          return EINVAL;
      }
@@ -937,14 +939,14 @@ int tMPI_Thread_mutex_destroy(tMPI_Thread_mutex_t *mtx)
  
  static int tMPI_Thread_mutex_init_once(tMPI_Thread_mutex_t *mtx)
  {
-    int ret=0;
+    int ret = 0;
  
      /* This is essentially a copy of the code from the one-time
       * initialization, but with a call to the mutex init routine instead.
       * It might seem like overkill, but it will only be executed the first
-     * time you call a static mutex, and it is important to get all the 
+     * time you call a static mutex, and it is important to get all the
       * memory barriers right. Trust me, you don't want a deadlock here...
-     */ 
+     */
  
      /* initialize the initializers */
      tMPI_Init_initers();
@@ -957,7 +959,7 @@ static int tMPI_Thread_mutex_init_once(tMPI_Thread_mutex_t *mtx)
          /* No need to keep the lock during execution -
           * Only one thread can do it anyway.
           */
-        ret=tMPI_Thread_mutex_init(mtx);
+        ret = tMPI_Thread_mutex_init(mtx);
      }
      LeaveCriticalSection( &mutex_init );
  
@@ -994,7 +996,7 @@ int tMPI_Thread_mutex_trylock(tMPI_Thread_mutex_t *mtx)
      }
  
      /* The mutex is now guaranteed to be valid. */
-    ret=TryEnterCriticalSection( &(mtx->mutex->cs) );
+    ret = TryEnterCriticalSection( &(mtx->mutex->cs) );
  
      return (ret != 0);
  }
@@ -1013,20 +1015,20 @@ int tMPI_Thread_mutex_unlock(tMPI_Thread_mutex_t *mtx)
  
  int tMPI_Thread_key_create(tMPI_Thread_key_t *key, void (*destructor)(void *))
  {
-    if(key==NULL)
+    if (key == NULL)
      {
-        tMPI_Fatal_error(TMPI_FARGS,"Invalid key pointer.");
+        tMPI_Fatal_error(TMPI_FARGS, "Invalid key pointer.");
          return EINVAL;
      }
  
  
      /* TODO: make list of destructors for thread-local storage */
-    key->key=(struct tMPI_Thread_key*)tMPI_Malloc(sizeof(struct 
-                                                         tMPI_Thread_key)*1);
- 
-    (key)->key->wkey=TlsAlloc();
+    key->key = (struct tMPI_Thread_key*)tMPI_Malloc(sizeof(struct
+                                                           tMPI_Thread_key)*1);
  
-    if ( (key)->key->wkey == TLS_OUT_OF_INDEXES ) 
+    (key)->key->wkey = TlsAlloc();
+
+    if ( (key)->key->wkey == TLS_OUT_OF_INDEXES)
      {
          tMPI_Fatal_error(TMPI_FARGS,
                           "Failed to create thread key, error code=%d.",
@@ -1052,7 +1054,7 @@ void * tMPI_Thread_getspecific(tMPI_Thread_key_t key)
  {
      void *p = NULL;
  
-    p=TlsGetValue(key.key->wkey);
+    p = TlsGetValue(key.key->wkey);
  
      return p;
  }
@@ -1064,16 +1066,16 @@ int tMPI_Thread_setspecific(tMPI_Thread_key_t key, void *value)
  
      ret = TlsSetValue(key.key->wkey, value);
  
-    return ret==0;
+    return ret == 0;
  }
  
  #if 0
  /* use once Vista is minimum required version */
  static BOOL CALLBACK InitHandleWrapperFunction(PINIT_ONCE InitOnce,
-                                               PVOID Parameter,
-                                               PVOID *lpContext)
+                                               PVOID      Parameter,
+                                               PVOID     *lpContext)
  {
-    void (*fn)(void)=(void (*)(void))Parameter;
+    void (*fn)(void) = (void (*)(void))Parameter;
  
      fn();
  
@@ -1081,22 +1083,22 @@ static BOOL CALLBACK InitHandleWrapperFunction(PINIT_ONCE InitOnce,
  }
  
  CRITICAL_SECTION tMPI_Once_cs;
-tMPI_Spinlock_t tMPI_Once_cs_lock=TMPI_SPINLOCK_INITIALIZER;
-volatile int tMPI_Once_init=0;
+tMPI_Spinlock_t  tMPI_Once_cs_lock = TMPI_SPINLOCK_INITIALIZER;
+volatile int     tMPI_Once_init    = 0;
  #endif
  
-int tMPI_Thread_once(tMPI_Thread_once_t *once_control, 
-                     void (*init_routine)(void))
+int tMPI_Thread_once(tMPI_Thread_once_t *once_control,
+                     void                (*init_routine)(void))
  {
  #if 0
      /* use once Vista is minimum required version */
      BOOL bStatus;
-    bStatus = InitOnceExecuteOnce(once_control, InitHandleWrapperFunction, 
+    bStatus = InitOnceExecuteOnce(once_control, InitHandleWrapperFunction,
                                    init_routine, NULL);
  
      if (!bStatus)
      {
-        tMPI_Fatal_error(TMPI_FARGS,"Failed to run thread_once routine");
+        tMPI_Fatal_error(TMPI_FARGS, "Failed to run thread_once routine");
          return -1;
      }
  #else
@@ -1117,31 +1119,31 @@ int tMPI_Thread_once(tMPI_Thread_once_t *once_control,
  
  
  
-int tMPI_Thread_cond_init(tMPI_Thread_cond_t *cond) 
+int tMPI_Thread_cond_init(tMPI_Thread_cond_t *cond)
  {
-    if(cond==NULL)
+    if (cond == NULL)
      {
          return EINVAL;
      }
  
-    cond->condp=(struct tMPI_Thread_cond*)
-              tMPI_Malloc(sizeof(struct tMPI_Thread_cond)*1);
+    cond->condp = (struct tMPI_Thread_cond*)
+        tMPI_Malloc(sizeof(struct tMPI_Thread_cond)*1);
  #if 0
      /* use this code once Vista is the minimum version required */
      InitializeConditionVariable( &(cond->cv) );
  #else
-    cond->condp->Nwaiters=0;
+    cond->condp->Nwaiters = 0;
      InitializeCriticalSection(&(cond->condp->wtr_lock));
-    cond->condp->Nrelease=0;
-    cond->condp->cycle=0;
+    cond->condp->Nrelease = 0;
+    cond->condp->cycle    = 0;
      /* a manual reset, unsignalled event */
-    cond->condp->ev = CreateEvent(NULL, TRUE, FALSE, NULL); 
+    cond->condp->ev = CreateEvent(NULL, TRUE, FALSE, NULL);
  #endif
      return 0;
  }
  
  
-int tMPI_Thread_cond_destroy(tMPI_Thread_cond_t *cond) 
+int tMPI_Thread_cond_destroy(tMPI_Thread_cond_t *cond)
  {
  #if 0
      /* use this code once Vista is the minimum version required */
@@ -1155,27 +1157,27 @@ int tMPI_Thread_cond_destroy(tMPI_Thread_cond_t *cond)
  
  
  
-/*! \brief Static init routine for pthread barrier 
+/*! \brief Static init routine for pthread barrier
   *
   * \internal
   *
   * This is only used as a wrapper to enable static initialization
   * of posix thread types together with out abstraction layer for tMPI_Thread.h
- * 
+ *
   * \param cond  Condition variable, must be statically initialized
- *  
+ *
   * \return status - 0 on success, or a standard error code.
   */
  static int tMPI_Thread_cond_init_once(tMPI_Thread_cond_t *cond)
  {
-    int ret=0;
+    int ret = 0;
  
      /* This is essentially a copy of the code from the one-time
       * initialization, but with a call to the cond init routine instead.
       * It might seem like overkill, but it will only be executed the first
-     * time you call a static condition variable, and it is important to get 
+     * time you call a static condition variable, and it is important to get
       * the memory barriers right. Trust me, you don't want a deadlock here...
-     */ 
+     */
  
      /* initialize the initializers */
      tMPI_Init_initers();
@@ -1187,7 +1189,7 @@ static int tMPI_Thread_cond_init_once(tMPI_Thread_cond_t *cond)
      {
          /* No need to keep the lock during execution -
           * Only one thread can do it anyway.  */
-        ret=tMPI_Thread_cond_init(cond);
+        ret = tMPI_Thread_cond_init(cond);
      }
      LeaveCriticalSection( &cond_init );
  
@@ -1199,9 +1201,9 @@ static int tMPI_Thread_cond_init_once(tMPI_Thread_cond_t *cond)
  
  int tMPI_Thread_cond_wait(tMPI_Thread_cond_t *cond, tMPI_Thread_mutex_t *mtx)
  {
-    BOOL wait_done=FALSE;
-    BOOL last_waiter=FALSE;
-    int my_cycle;
+    BOOL wait_done   = FALSE;
+    BOOL last_waiter = FALSE;
+    int  my_cycle;
  
      /* check whether the condition is initialized */
      if (tMPI_Atomic_get( &(cond->initialized)  ) == 0)
@@ -1212,11 +1214,11 @@ int tMPI_Thread_cond_wait(tMPI_Thread_cond_t *cond, tMPI_Thread_mutex_t *mtx)
  
  #if 0
      /* use this code once Vista is the minimum version required */
-    ret=SleepConditionVariableCS (&(cond->cv), &(mtx->cs), INFINITE);
+    ret = SleepConditionVariableCS (&(cond->cv), &(mtx->cs), INFINITE);
  
      if (!ret)
      {
-        tMPI_Fatal_error(TMPI_FARGS,"Failed wait for condition, error code=%d",
+        tMPI_Fatal_error(TMPI_FARGS, "Failed wait for condition, error code=%d",
                           GetLastError());
          return -1;
      }
@@ -1236,20 +1238,20 @@ int tMPI_Thread_cond_wait(tMPI_Thread_cond_t *cond, tMPI_Thread_mutex_t *mtx)
      do
      {
          /* do the actual waiting */
-        if (WaitForSingleObject( cond->condp->ev, INFINITE )== WAIT_FAILED)
+        if (WaitForSingleObject( cond->condp->ev, INFINITE ) == WAIT_FAILED)
          {
-            tMPI_Fatal_error(TMPI_FARGS,"Failed event reset, error code=%d",
+            tMPI_Fatal_error(TMPI_FARGS, "Failed event reset, error code=%d",
                               GetLastError());
              return -1;
          }
  
          /* serially check whether we got the right event.  */
          EnterCriticalSection(&(cond->condp->wtr_lock));
-        wait_done = (cond->condp->Nrelease > 0) && 
-                    (cond->condp->cycle!=my_cycle);
+        wait_done = (cond->condp->Nrelease > 0) &&
+            (cond->condp->cycle != my_cycle);
          LeaveCriticalSection(&(cond->condp->wtr_lock));
      }
-    while(!wait_done);
+    while (!wait_done);
  
      /* We obtain the mutex from the function call */
      EnterCriticalSection(&(mtx->mutex->cs));
@@ -1258,7 +1260,7 @@ int tMPI_Thread_cond_wait(tMPI_Thread_cond_t *cond, tMPI_Thread_mutex_t *mtx)
      EnterCriticalSection(&(cond->condp->wtr_lock));
      cond->condp->Nwaiters--;
      cond->condp->Nrelease--;
-    last_waiter=(cond->condp->Nrelease==0);
+    last_waiter = (cond->condp->Nrelease == 0);
      LeaveCriticalSection(&(cond->condp->wtr_lock));
  
      /* manually release the event if everybody's done with it */
@@ -1266,7 +1268,7 @@ int tMPI_Thread_cond_wait(tMPI_Thread_cond_t *cond, tMPI_Thread_mutex_t *mtx)
      {
          if (!ResetEvent( cond->condp->ev ))
          {
-            tMPI_Fatal_error(TMPI_FARGS,"Failed event reset, error code=%d",
+            tMPI_Fatal_error(TMPI_FARGS, "Failed event reset, error code=%d",
                               GetLastError());
              return -1;
          }
@@ -1297,10 +1299,10 @@ int tMPI_Thread_cond_signal(tMPI_Thread_cond_t *cond)
      {
          cond->condp->Nrelease++;
          cond->condp->cycle++;
-        if (!SetEvent(cond->condp->ev)) /* actually release the 
+        if (!SetEvent(cond->condp->ev)) /* actually release the
                                             waiting threads */
          {
-            tMPI_Fatal_error(TMPI_FARGS,"Failed SetEvent, error code=%d",
+            tMPI_Fatal_error(TMPI_FARGS, "Failed SetEvent, error code=%d",
                               GetLastError());
              return -1;
          }
@@ -1329,12 +1331,12 @@ int tMPI_Thread_cond_broadcast(tMPI_Thread_cond_t *cond)
      /* check whether there are any waiters */
      if (cond->condp->Nwaiters > 0)
      {
-        cond->condp->Nrelease=cond->condp->Nwaiters;
+        cond->condp->Nrelease = cond->condp->Nwaiters;
          cond->condp->cycle++;
-        if (!SetEvent(cond->condp->ev)) /* actually release the 
+        if (!SetEvent(cond->condp->ev)) /* actually release the
                                             waiting threads */
          {
-            tMPI_Fatal_error(TMPI_FARGS,"Failed SetEvent, error code=%d",
+            tMPI_Fatal_error(TMPI_FARGS, "Failed SetEvent, error code=%d",
                               GetLastError());
              return -1;
          }
@@ -1349,16 +1351,16 @@ int tMPI_Thread_cond_broadcast(tMPI_Thread_cond_t *cond)
  
  int tMPI_Thread_barrier_init(tMPI_Thread_barrier_t *barrier, int n)
  {
-    if(barrier==NULL)
+    if (barrier == NULL)
      {
          return EINVAL;
      }
  
-    barrier->barrierp=(struct tMPI_Thread_barrier*)
-              tMPI_Malloc(sizeof(struct tMPI_Thread_barrier)*1);
+    barrier->barrierp = (struct tMPI_Thread_barrier*)
+        tMPI_Malloc(sizeof(struct tMPI_Thread_barrier)*1);
  
  #if 0
- /* use this once Vista is the oldest supported windows version: */
+    /* use this once Vista is the oldest supported windows version: */
      InitializeCriticalSection(&(barrier->barrierp->cs));
      InitializeConditionVariable(&(barrier->barrierp->cv));
  #else
@@ -1376,8 +1378,8 @@ int tMPI_Thread_barrier_init(tMPI_Thread_barrier_t *barrier, int n)
  
  
  int tMPI_Thread_barrier_destroy(tMPI_Thread_barrier_t *barrier)
-{   
-    if(barrier==NULL)
+{
+    if (barrier == NULL)
      {
          return EINVAL;
      }
@@ -1397,7 +1399,7 @@ int tMPI_Thread_barrier_destroy(tMPI_Thread_barrier_t *barrier)
  
  
  
-/*! \brief Static init routine for pthread barrier 
+/*! \brief Static init routine for pthread barrier
   *
   * \internal
   *
@@ -1406,7 +1408,7 @@ int tMPI_Thread_barrier_destroy(tMPI_Thread_barrier_t *barrier)
   *
   * \param barrier Statically initialized barrier type
   * \param n       Number of members in barrier
- * 
+ *
   * \return status - 0 on success, or a standard error code.
   */
  static int tMPI_Thread_barrier_init_once(tMPI_Thread_barrier_t *barrier, int n)
@@ -1416,9 +1418,9 @@ static int tMPI_Thread_barrier_init_once(tMPI_Thread_barrier_t *barrier, int n)
      /* This is essentially a copy of the code from the one-time
       * initialization, but with a call to the cond init routine instead.
       * It might seem like overkill, but it will only be executed the first
-     * time you call a static condition variable, and it is important to get 
+     * time you call a static condition variable, and it is important to get
       * the memory barriers right. Trust me, you don't want a deadlock here...
-     */ 
+     */
  
  
      /* initialize the initializers */
@@ -1432,7 +1434,7 @@ static int tMPI_Thread_barrier_init_once(tMPI_Thread_barrier_t *barrier, int n)
      {
          /* No need to keep the lock during execution -
           * Only one thread can do it anyway.  */
-        ret=tMPI_Thread_barrier_init(barrier, n);
+        ret = tMPI_Thread_barrier_init(barrier, n);
      }
      LeaveCriticalSection( &barrier_init );
  
@@ -1443,15 +1445,15 @@ static int tMPI_Thread_barrier_init_once(tMPI_Thread_barrier_t *barrier, int n)
  
  int tMPI_Thread_barrier_wait(tMPI_Thread_barrier_t *barrier)
  {
-    int    cycle;
-    BOOL    rc=FALSE;
-    int     ret=0;
+    int     cycle;
+    BOOL    rc  = FALSE;
+    int     ret = 0;
      /*tMPI_Thread_pthread_barrier_t *p;*/
  
      /* check whether the barrier is initialized */
      if (tMPI_Atomic_get( &(barrier->initialized)  ) == 0)
      {
-        tMPI_Thread_barrier_init_once(barrier,barrier->threshold);        
+        tMPI_Thread_barrier_init_once(barrier, barrier->threshold);
      }
  
  #if 0
@@ -1467,8 +1469,8 @@ int tMPI_Thread_barrier_wait(tMPI_Thread_barrier_t *barrier)
      /* Decrement the count atomically and check if it is zero.
       * This will only be true for the last thread calling us.
       */
-    if( --(barrier->count) <= 0 )
-    { 
+    if (--(barrier->count) <= 0)
+    {
          barrier->cycle = !barrier->cycle;
          barrier->count = barrier->threshold;
  #if 0
@@ -1479,21 +1481,24 @@ int tMPI_Thread_barrier_wait(tMPI_Thread_barrier_t *barrier)
      }
      else
      {
-        while(cycle == barrier->cycle)
+        while (cycle == barrier->cycle)
          {
  #if 0
-            rc=SleepConditionVariableCS (&(barrier->barrierp->cv), 
-                                         &(barrier->barrierp->cs), 
-                                         INFINITE);
-            if(!rc) 
+            rc = SleepConditionVariableCS (&(barrier->barrierp->cv),
+                                           &(barrier->barrierp->cs),
+                                           INFINITE);
+            if (!rc)
              {
-                ret=-1;
+                ret = -1;
                  break;
              }
  #else
              rc = tMPI_Thread_cond_wait(&barrier->barrierp->cv,
                                         &barrier->barrierp->cs);
-            if(rc != 0) break;
+            if (rc != 0)
+            {
+                break;
+            }
  #endif
          }
      }
@@ -1508,7 +1513,6 @@ int tMPI_Thread_barrier_wait(tMPI_Thread_barrier_t *barrier)
  #else
  
  /* just to have some symbols */
-int tMPI_Thread_winthreads=0;
+int tMPI_Thread_winthreads = 0;
  
  #endif /* THREAD_WINDOWS  */
-