Code beautification with uncrustify

[alexxy/gromacs.git] / src / gromacs / mdlib / gmx_wallcycle.c
diff --git a/src/gromacs/mdlib/gmx_wallcycle.c b/src/gromacs/mdlib/gmx_wallcycle.c

index cebefc09c5e1eeb69a408f52250957fe00e987ad..eb60d3fe0565402b1207eb1c231f804ca32ca7a6 100644 (file)
--- a/src/gromacs/mdlib/gmx_wallcycle.c
+++ b/src/gromacs/mdlib/gmx_wallcycle.c
@@ -1,34 +1,34 @@
  /*  -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   *
- * 
+ *
   *                This source code is part of
- * 
+ *
   *                 G   R   O   M   A   C   S
- * 
+ *
   *          GROningen MAchine for Chemical Simulations
- * 
+ *
   * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
   * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   * Copyright (c) 2001-2008, The GROMACS development team,
   * check out http://www.gromacs.org for more information.
- 
+
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License
   * as published by the Free Software Foundation; either version 2
   * of the License, or (at your option) any later version.
- * 
+ *
   * If you want to redistribute modifications, please consider that
   * scientific software is very special. Version control is crucial -
   * bugs must be traceable. We will be happy to consider code for
   * inclusion in the official distribution, but derived work must not
   * be called official GROMACS. Details are found in the README & COPYING
   * files - if they are missing, get the official version at www.gromacs.org.
- * 
+ *
   * To help us fund GROMACS development, we humbly ask that you cite
   * the papers on the package - you can find them in the top README file.
- * 
+ *
   * For more info, check our website at http://www.gromacs.org
- * 
+ *
   * And Hey:
   * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
   */
@@ -69,65 +69,68 @@ typedef struct
  
  typedef struct gmx_wallcycle
  {
-    wallcc_t     *wcc;
+    wallcc_t        *wcc;
      /* variables for testing/debugging */
      gmx_bool         wc_barrier;
-    wallcc_t     *wcc_all;
-    int          wc_depth;
+    wallcc_t        *wcc_all;
+    int              wc_depth;
  #ifdef DEBUG_WCYCLE
  #define DEPTH_MAX 6
-    int          counterlist[DEPTH_MAX];
-    int          count_depth;
+    int               counterlist[DEPTH_MAX];
+    int               count_depth;
  #endif
-    int          ewc_prev;
-    gmx_cycles_t cycle_prev;
+    int               ewc_prev;
+    gmx_cycles_t      cycle_prev;
      gmx_large_int_t   reset_counters;
  #ifdef GMX_MPI
-    MPI_Comm     mpi_comm_mygroup;
+    MPI_Comm          mpi_comm_mygroup;
  #endif
-    int          nthreads_pp;
-    int          nthreads_pme;
+    int               nthreads_pp;
+    int               nthreads_pme;
  #ifdef GMX_CYCLE_SUBCOUNTERS
-    wallcc_t     *wcsc;
+    wallcc_t         *wcsc;
  #endif
-    double       *cycles_sum;
+    double           *cycles_sum;
  } gmx_wallcycle_t_t;
  
  /* Each name should not exceed 19 characters */
  static const char *wcn[ewcNR] =
-{ "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load",
-  "DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
-  "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
-  "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve",
-  "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.",
-  "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies",
-  "Enforced rotation", "Add rot. forces", "Test" };
+{
+    "Run", "Step", "PP during PME", "Domain decomp.", "DD comm. load",
+    "DD comm. bounds", "Vsite constr.", "Send X to PME", "Neighbor search", "Launch GPU ops.",
+    "Comm. coord.", "Born radii", "Force", "Wait + Comm. F", "PME mesh",
+    "PME redist. X/F", "PME spread/gather", "PME 3D-FFT", "PME 3D-FFT Comm.", "PME solve",
+    "PME wait for PP", "Wait + Recv. PME F", "Wait GPU nonlocal", "Wait GPU local", "NB X/F buffer ops.",
+    "Vsite spread", "Write traj.", "Update", "Constraints", "Comm. energies",
+    "Enforced rotation", "Add rot. forces", "Test"
+};
  
  static const char *wcsn[ewcsNR] =
-{ "DD redist.", "DD NS grid + sort", "DD setup comm.",
-  "DD make top.", "DD make constr.", "DD top. other",
-  "NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.",
-  "Bonded F", "Nonbonded F", "Ewald F correction",
-  "NB X buffer ops.", "NB F buffer ops."
+{
+    "DD redist.", "DD NS grid + sort", "DD setup comm.",
+    "DD make top.", "DD make constr.", "DD top. other",
+    "NS grid local", "NS grid non-loc.", "NS search local", "NS search non-loc.",
+    "Bonded F", "Nonbonded F", "Ewald F correction",
+    "NB X buffer ops.", "NB F buffer ops."
  };
  
  gmx_bool wallcycle_have_counter(void)
  {
-  return gmx_cycles_have_counter();
+    return gmx_cycles_have_counter();
  }
  
-gmx_wallcycle_t wallcycle_init(FILE *fplog,int resetstep,t_commrec *cr, 
+gmx_wallcycle_t wallcycle_init(FILE *fplog, int resetstep, t_commrec *cr,
                                 int nthreads_pp, int nthreads_pme)
  {
      gmx_wallcycle_t wc;
-    
-    
+
+
      if (!wallcycle_have_counter())
      {
          return NULL;
      }
  
-    snew(wc,1);
+    snew(wc, 1);
  
      wc->wc_barrier          = FALSE;
      wc->wcc_all             = NULL;
@@ -141,27 +144,27 @@ gmx_wallcycle_t wallcycle_init(FILE *fplog,int resetstep,t_commrec *cr,
  #ifdef GMX_MPI
      if (PAR(cr) && getenv("GMX_CYCLE_BARRIER") != NULL)
      {
-        if (fplog) 
+        if (fplog)
          {
-            fprintf(fplog,"\nWill call MPI_Barrier before each cycle start/stop call\n\n");
+            fprintf(fplog, "\nWill call MPI_Barrier before each cycle start/stop call\n\n");
          }
-        wc->wc_barrier = TRUE;
+        wc->wc_barrier       = TRUE;
          wc->mpi_comm_mygroup = cr->mpi_comm_mygroup;
      }
  #endif
  
-    snew(wc->wcc,ewcNR);
+    snew(wc->wcc, ewcNR);
      if (getenv("GMX_CYCLE_ALL") != NULL)
      {
-        if (fplog) 
+        if (fplog)
          {
-            fprintf(fplog,"\nWill time all the code during the run\n\n");
+            fprintf(fplog, "\nWill time all the code during the run\n\n");
          }
-        snew(wc->wcc_all,ewcNR*ewcNR);
+        snew(wc->wcc_all, ewcNR*ewcNR);
      }
  
  #ifdef GMX_CYCLE_SUBCOUNTERS
-    snew(wc->wcsc,ewcsNR);
+    snew(wc->wcsc, ewcsNR);
  #endif
  
  #ifdef DEBUG_WCYCLE
@@ -177,7 +180,7 @@ void wallcycle_destroy(gmx_wallcycle_t wc)
      {
          return;
      }
-    
+
      if (wc->wcc != NULL)
      {
          sfree(wc->wcc);
@@ -195,13 +198,13 @@ void wallcycle_destroy(gmx_wallcycle_t wc)
      sfree(wc);
  }
  
-static void wallcycle_all_start(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
+static void wallcycle_all_start(gmx_wallcycle_t wc, int ewc, gmx_cycles_t cycle)
  {
-    wc->ewc_prev = ewc;
+    wc->ewc_prev   = ewc;
      wc->cycle_prev = cycle;
  }
  
-static void wallcycle_all_stop(gmx_wallcycle_t wc,int ewc,gmx_cycles_t cycle)
+static void wallcycle_all_stop(gmx_wallcycle_t wc, int ewc, gmx_cycles_t cycle)
  {
      wc->wcc_all[wc->ewc_prev*ewcNR+ewc].n += 1;
      wc->wcc_all[wc->ewc_prev*ewcNR+ewc].c += cycle - wc->cycle_prev;
@@ -215,7 +218,7 @@ static void debug_start_check(gmx_wallcycle_t wc, int ewc)
  
      if (wc->count_depth < 0 || wc->count_depth >= DEPTH_MAX)
      {
-        gmx_fatal(FARGS,"wallcycle counter depth out of range: %d",
+        gmx_fatal(FARGS, "wallcycle counter depth out of range: %d",
                    wc->count_depth);
      }
      wc->counterlist[wc->count_depth] = ewc;
@@ -230,12 +233,12 @@ static void debug_stop_check(gmx_wallcycle_t wc, int ewc)
  
      if (wc->count_depth < 0)
      {
-        gmx_fatal(FARGS,"wallcycle counter depth out of range when stopping %s: %d",wcn[ewc],wc->count_depth);
+        gmx_fatal(FARGS, "wallcycle counter depth out of range when stopping %s: %d", wcn[ewc], wc->count_depth);
      }
      if (wc->counterlist[wc->count_depth] != ewc)
      {
-        gmx_fatal(FARGS,"wallcycle mismatch at stop, start %s, stop %s",
-                  wcn[wc->counterlist[wc->count_depth]],wcn[ewc]);
+        gmx_fatal(FARGS, "wallcycle mismatch at stop, start %s, stop %s",
+                  wcn[wc->counterlist[wc->count_depth]], wcn[ewc]);
      }
  }
  #endif
@@ -257,21 +260,21 @@ void wallcycle_start(gmx_wallcycle_t wc, int ewc)
  #endif
  
  #ifdef DEBUG_WCYCLE
-    debug_start_check(wc,ewc);
+    debug_start_check(wc, ewc);
  #endif
  
-    cycle = gmx_cycles_read();
+    cycle              = gmx_cycles_read();
      wc->wcc[ewc].start = cycle;
      if (wc->wcc_all != NULL)
      {
          wc->wc_depth++;
          if (ewc == ewcRUN)
          {
-            wallcycle_all_start(wc,ewc,cycle);
+            wallcycle_all_start(wc, ewc, cycle);
          }
          else if (wc->wc_depth == 3)
          {
-            wallcycle_all_stop(wc,ewc,cycle);
+            wallcycle_all_stop(wc, ewc, cycle);
          }
      }
  }
@@ -289,13 +292,13 @@ void wallcycle_start_nocount(gmx_wallcycle_t wc, int ewc)
  
  double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
  {
-    gmx_cycles_t cycle,last;
-    
+    gmx_cycles_t cycle, last;
+
      if (wc == NULL)
      {
          return 0;
      }
-    
+
  #ifdef GMX_MPI
      if (wc->wc_barrier)
      {
@@ -304,11 +307,11 @@ double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
  #endif
  
  #ifdef DEBUG_WCYCLE
-    debug_stop_check(wc,ewc);
+    debug_stop_check(wc, ewc);
  #endif
-    
-    cycle = gmx_cycles_read();
-    last = cycle - wc->wcc[ewc].start;
+
+    cycle           = gmx_cycles_read();
+    last            = cycle - wc->wcc[ewc].start;
      wc->wcc[ewc].c += last;
      wc->wcc[ewc].n++;
      if (wc->wcc_all)
@@ -316,11 +319,11 @@ double wallcycle_stop(gmx_wallcycle_t wc, int ewc)
          wc->wc_depth--;
          if (ewc == ewcRUN)
          {
-            wallcycle_all_stop(wc,ewc,cycle);
+            wallcycle_all_stop(wc, ewc, cycle);
          }
          else if (wc->wc_depth == 2)
          {
-            wallcycle_all_start(wc,ewc,cycle);
+            wallcycle_all_start(wc, ewc, cycle);
          }
      }
  
@@ -336,21 +339,21 @@ void wallcycle_reset_all(gmx_wallcycle_t wc)
          return;
      }
  
-    for(i=0; i<ewcNR; i++)
+    for (i = 0; i < ewcNR; i++)
      {
          wc->wcc[i].n = 0;
          wc->wcc[i].c = 0;
      }
      if (wc->wcc_all)
      {
-        for(i=0; i<ewcNR*ewcNR; i++)
+        for (i = 0; i < ewcNR*ewcNR; i++)
          {
              wc->wcc_all[i].n = 0;
              wc->wcc_all[i].c = 0;
          }
      }
  #ifdef GMX_CYCLE_SUBCOUNTERS
-    for (i=0; i<ewcsNR; i++)
+    for (i = 0; i < ewcsNR; i++)
      {
          wc->wcsc[i].n = 0;
          wc->wcsc[i].c = 0;
@@ -371,30 +374,30 @@ static gmx_bool is_pme_subcounter(int ewc)
  void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc)
  {
      wallcc_t *wcc;
-    double *cycles;
-    double cycles_n[ewcNR+ewcsNR],buf[ewcNR+ewcsNR],*cyc_all,*buf_all;
-    int    i,j;
-    int    nsum;
+    double   *cycles;
+    double    cycles_n[ewcNR+ewcsNR], buf[ewcNR+ewcsNR], *cyc_all, *buf_all;
+    int       i, j;
+    int       nsum;
  
      if (wc == NULL)
      {
          return;
      }
  
-    snew(wc->cycles_sum,ewcNR+ewcsNR);
+    snew(wc->cycles_sum, ewcNR+ewcsNR);
      cycles = wc->cycles_sum;
  
      wcc = wc->wcc;
  
-    for(i=0; i<ewcNR; i++)
+    for (i = 0; i < ewcNR; i++)
      {
-        if (is_pme_counter(i) || (i==ewcRUN && cr->duty == DUTY_PME))
+        if (is_pme_counter(i) || (i == ewcRUN && cr->duty == DUTY_PME))
          {
              wcc[i].c *= wc->nthreads_pme;
  
              if (wc->wcc_all)
              {
-                for(j=0; j<ewcNR; j++)
+                for (j = 0; j < ewcNR; j++)
                  {
                      wc->wcc_all[i*ewcNR+j].c *= wc->nthreads_pme;
                  }
@@ -406,7 +409,7 @@ void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc)
  
              if (wc->wcc_all)
              {
-                for(j=0; j<ewcNR; j++)
+                for (j = 0; j < ewcNR; j++)
                  {
                      wc->wcc_all[i*ewcNR+j].c *= wc->nthreads_pp;
                  }
@@ -444,58 +447,58 @@ void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc)
              wcc[ewcPMEWAITCOMM].c = wcc[ewcRUN].c - wcc[ewcPMEMESH].c;
          }
      }
-    
+
      /* Store the cycles in a double buffer for summing */
-    for(i=0; i<ewcNR; i++)
+    for (i = 0; i < ewcNR; i++)
      {
          cycles_n[i] = (double)wcc[i].n;
          cycles[i]   = (double)wcc[i].c;
      }
      nsum = ewcNR;
  #ifdef GMX_CYCLE_SUBCOUNTERS
-    for(i=0; i<ewcsNR; i++)
+    for (i = 0; i < ewcsNR; i++)
      {
-        wc->wcsc[i].c *= wc->nthreads_pp;
+        wc->wcsc[i].c    *= wc->nthreads_pp;
          cycles_n[ewcNR+i] = (double)wc->wcsc[i].n;
          cycles[ewcNR+i]   = (double)wc->wcsc[i].c;
      }
      nsum += ewcsNR;
-#endif   
-    
+#endif
+
  #ifdef GMX_MPI
      if (cr->nnodes > 1)
      {
-        MPI_Allreduce(cycles_n,buf,nsum,MPI_DOUBLE,MPI_MAX,
+        MPI_Allreduce(cycles_n, buf, nsum, MPI_DOUBLE, MPI_MAX,
                        cr->mpi_comm_mysim);
-        for(i=0; i<ewcNR; i++)
+        for (i = 0; i < ewcNR; i++)
          {
              wcc[i].n = (int)(buf[i] + 0.5);
          }
  #ifdef GMX_CYCLE_SUBCOUNTERS
-        for(i=0; i<ewcsNR; i++)
+        for (i = 0; i < ewcsNR; i++)
          {
              wc->wcsc[i].n = (int)(buf[ewcNR+i] + 0.5);
          }
-#endif   
+#endif
  
-        MPI_Allreduce(cycles,buf,nsum,MPI_DOUBLE,MPI_SUM,
+        MPI_Allreduce(cycles, buf, nsum, MPI_DOUBLE, MPI_SUM,
                        cr->mpi_comm_mysim);
-        for(i=0; i<nsum; i++)
+        for (i = 0; i < nsum; i++)
          {
              cycles[i] = buf[i];
          }
  
          if (wc->wcc_all != NULL)
          {
-            snew(cyc_all,ewcNR*ewcNR);
-            snew(buf_all,ewcNR*ewcNR);
-            for(i=0; i<ewcNR*ewcNR; i++)
+            snew(cyc_all, ewcNR*ewcNR);
+            snew(buf_all, ewcNR*ewcNR);
+            for (i = 0; i < ewcNR*ewcNR; i++)
              {
                  cyc_all[i] = wc->wcc_all[i].c;
              }
-            MPI_Allreduce(cyc_all,buf_all,ewcNR*ewcNR,MPI_DOUBLE,MPI_SUM,
+            MPI_Allreduce(cyc_all, buf_all, ewcNR*ewcNR, MPI_DOUBLE, MPI_SUM,
                            cr->mpi_comm_mysim);
-            for(i=0; i<ewcNR*ewcNR; i++)
+            for (i = 0; i < ewcNR*ewcNR; i++)
              {
                  wc->wcc_all[i].c = buf_all[i];
              }
@@ -506,36 +509,40 @@ void wallcycle_sum(t_commrec *cr, gmx_wallcycle_t wc)
  #endif
  }
  
-static void print_cycles(FILE *fplog, double c2t, const char *name, 
-                         int nnodes_tot,int nnodes, int nthreads,
+static void print_cycles(FILE *fplog, double c2t, const char *name,
+                         int nnodes_tot, int nnodes, int nthreads,
                           int n, double c, double tot)
  {
-    char num[11];
-    char thstr[6];
+    char   num[11];
+    char   thstr[6];
      double wallt;
-  
+
      if (c > 0)
      {
          if (n > 0)
          {
-            snprintf(num,sizeof(num),"%10d",n);
+            snprintf(num, sizeof(num), "%10d", n);
              if (nthreads < 0)
+            {
                  snprintf(thstr, sizeof(thstr), "N/A");
+            }
              else
+            {
                  snprintf(thstr, sizeof(thstr), "%4d", nthreads);
+            }
          }
          else
          {
-            sprintf(num,"          ");
+            sprintf(num, "          ");
              sprintf(thstr, "    ");
          }
          wallt = c*c2t*nnodes_tot/(double)nnodes;
-        fprintf(fplog," %-19s %4d %4s %10s  %10.3f %12.3f   %5.1f\n",
-                name,nnodes,thstr,num,wallt,c*1e-9,100*c/tot);
+        fprintf(fplog, " %-19s %4d %4s %10s  %10.3f %12.3f   %5.1f\n",
+                name, nnodes, thstr, num, wallt, c*1e-9, 100*c/tot);
      }
  }
  
-static void print_gputimes(FILE *fplog, const char *name, 
+static void print_gputimes(FILE *fplog, const char *name,
                             int n, double t, double tot_t)
  {
      char num[11];
@@ -548,30 +555,30 @@ static void print_gputimes(FILE *fplog, const char *name,
      }
      else
      {
-      sprintf(num,"          ");
-      sprintf(avg_perf,"          ");
+        sprintf(num, "          ");
+        sprintf(avg_perf, "          ");
      }
      if (t != tot_t)
      {
          fprintf(fplog, " %-29s %10s%12.3f   %s   %5.1f\n",
-                name, num, t/1000, avg_perf, 100 * t/tot_t); 
+                name, num, t/1000, avg_perf, 100 * t/tot_t);
      }
      else
      {
-         fprintf(fplog, " %-29s %10s%12.3f   %s   %5.1f\n",
-               name, "", t/1000, avg_perf, 100.0); 
+        fprintf(fplog, " %-29s %10s%12.3f   %s   %5.1f\n",
+                name, "", t/1000, avg_perf, 100.0);
      }
  }
  
  void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
                       gmx_wallcycle_t wc, wallclock_gpu_t *gpu_t)
  {
-    double *cycles;
-    double c2t,tot,tot_gpu,tot_cpu_overlap,gpu_cpu_ratio,sum,tot_k;
-    int    i,j,npp,nth_pp,nth_pme;
-    char   buf[STRLEN];
+    double     *cycles;
+    double      c2t, tot, tot_gpu, tot_cpu_overlap, gpu_cpu_ratio, sum, tot_k;
+    int         i, j, npp, nth_pp, nth_pme;
+    char        buf[STRLEN];
      const char *hline = "-----------------------------------------------------------------------------";
-    
+
      if (wc == NULL)
      {
          return;
@@ -603,33 +610,33 @@ void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
          c2t = 0;
      }
  
-    fprintf(fplog,"\n     R E A L   C Y C L E   A N D   T I M E   A C C O U N T I N G\n\n");
+    fprintf(fplog, "\n     R E A L   C Y C L E   A N D   T I M E   A C C O U N T I N G\n\n");
  
-    fprintf(fplog," Computing:         Nodes   Th.     Count  Wall t (s)     G-Cycles       %c\n",'%');
-    fprintf(fplog,"%s\n",hline);
+    fprintf(fplog, " Computing:         Nodes   Th.     Count  Wall t (s)     G-Cycles       %c\n", '%');
+    fprintf(fplog, "%s\n", hline);
      sum = 0;
-    for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
+    for (i = ewcPPDURINGPME+1; i < ewcNR; i++)
      {
          if (!is_pme_subcounter(i))
          {
-            print_cycles(fplog,c2t,wcn[i],nnodes,
+            print_cycles(fplog, c2t, wcn[i], nnodes,
                           is_pme_counter(i) ? npme : npp,
-                         is_pme_counter(i) ? nth_pme : nth_pp, 
-                         wc->wcc[i].n,cycles[i],tot);
+                         is_pme_counter(i) ? nth_pme : nth_pp,
+                         wc->wcc[i].n, cycles[i], tot);
              sum += cycles[i];
          }
      }
      if (wc->wcc_all != NULL)
      {
-        for(i=0; i<ewcNR; i++)
+        for (i = 0; i < ewcNR; i++)
          {
-            for(j=0; j<ewcNR; j++)
+            for (j = 0; j < ewcNR; j++)
              {
-                snprintf(buf,9,"%-9s",wcn[i]);
+                snprintf(buf, 9, "%-9s", wcn[i]);
                  buf[9] = ' ';
-                snprintf(buf+10,9,"%-9s",wcn[j]);
+                snprintf(buf+10, 9, "%-9s", wcn[j]);
                  buf[19] = '\0';
-                print_cycles(fplog,c2t,buf,nnodes,
+                print_cycles(fplog, c2t, buf, nnodes,
                               is_pme_counter(i) ? npme : npp,
                               is_pme_counter(i) ? nth_pme : nth_pp,
                               wc->wcc_all[i*ewcNR+j].n,
@@ -638,43 +645,44 @@ void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
              }
          }
      }
-    print_cycles(fplog,c2t,"Rest",npp,npp,-1,0,tot-sum,tot);
-    fprintf(fplog,"%s\n",hline);
-    print_cycles(fplog,c2t,"Total",nnodes,nnodes,-1,0,tot,tot);
-    fprintf(fplog,"%s\n",hline);
-    
+    print_cycles(fplog, c2t, "Rest", npp, npp, -1, 0, tot-sum, tot);
+    fprintf(fplog, "%s\n", hline);
+    print_cycles(fplog, c2t, "Total", nnodes, nnodes, -1, 0, tot, tot);
+    fprintf(fplog, "%s\n", hline);
+
      if (wc->wcc[ewcPMEMESH].n > 0)
      {
-        fprintf(fplog,"%s\n",hline);
-        for(i=ewcPPDURINGPME+1; i<ewcNR; i++)
+        fprintf(fplog, "%s\n", hline);
+        for (i = ewcPPDURINGPME+1; i < ewcNR; i++)
          {
              if (is_pme_subcounter(i))
              {
-                print_cycles(fplog,c2t,wcn[i],nnodes,
+                print_cycles(fplog, c2t, wcn[i], nnodes,
                               is_pme_counter(i) ? npme : npp,
                               is_pme_counter(i) ? nth_pme : nth_pp,
-                             wc->wcc[i].n,cycles[i],tot);
+                             wc->wcc[i].n, cycles[i], tot);
              }
          }
-        fprintf(fplog,"%s\n",hline);
+        fprintf(fplog, "%s\n", hline);
      }
  
  #ifdef GMX_CYCLE_SUBCOUNTERS
-    fprintf(fplog,"%s\n",hline);
-    for(i=0; i<ewcsNR; i++)
+    fprintf(fplog, "%s\n", hline);
+    for (i = 0; i < ewcsNR; i++)
      {
-        print_cycles(fplog,c2t,wcsn[i],nnodes,npp,nth_pp,
-                     wc->wcsc[i].n,cycles[ewcNR+i],tot);
+        print_cycles(fplog, c2t, wcsn[i], nnodes, npp, nth_pp,
+                     wc->wcsc[i].n, cycles[ewcNR+i], tot);
      }
-    fprintf(fplog,"%s\n",hline);
+    fprintf(fplog, "%s\n", hline);
  #endif
  
      /* print GPU timing summary */
      if (gpu_t)
      {
          const char *k_log_str[2][2] = {
-                {"Nonbonded F kernel", "Nonbonded F+ene k."},
-                {"Nonbonded F+prune k.", "Nonbonded F+ene+prune k."}};
+            {"Nonbonded F kernel", "Nonbonded F+ene k."},
+            {"Nonbonded F+prune k.", "Nonbonded F+ene+prune k."}
+        };
  
          tot_gpu = gpu_t->pl_h2d_t + gpu_t->nb_h2d_t + gpu_t->nb_d2h_t;
  
@@ -682,13 +690,13 @@ void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
          tot_k = 0.0;
          for (i = 0; i < 2; i++)
          {
-            for(j = 0; j < 2; j++)
+            for (j = 0; j < 2; j++)
              {
                  tot_k += gpu_t->ktime[i][j].t;
              }
          }
          tot_gpu += tot_k;
-    
+
          tot_cpu_overlap = wc->wcc[ewcFORCE].c;
          if (wc->wcc[ewcPMEMESH].n > 0)
          {
@@ -697,24 +705,24 @@ void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
          tot_cpu_overlap *= c2t * 1000; /* convert s to ms */
  
          fprintf(fplog, "\n GPU timings\n%s\n", hline);
-        fprintf(fplog," Computing:                         Count  Wall t (s)      ms/step       %c\n",'%');
+        fprintf(fplog, " Computing:                         Count  Wall t (s)      ms/step       %c\n", '%');
          fprintf(fplog, "%s\n", hline);
          print_gputimes(fplog, "Pair list H2D",
-                gpu_t->pl_h2d_c, gpu_t->pl_h2d_t, tot_gpu);
-         print_gputimes(fplog, "X / q H2D", 
-                gpu_t->nb_c, gpu_t->nb_h2d_t, tot_gpu);
+                       gpu_t->pl_h2d_c, gpu_t->pl_h2d_t, tot_gpu);
+        print_gputimes(fplog, "X / q H2D",
+                       gpu_t->nb_c, gpu_t->nb_h2d_t, tot_gpu);
  
          for (i = 0; i < 2; i++)
          {
-            for(j = 0; j < 2; j++)
+            for (j = 0; j < 2; j++)
              {
                  if (gpu_t->ktime[i][j].c)
                  {
                      print_gputimes(fplog, k_log_str[i][j],
-                            gpu_t->ktime[i][j].c, gpu_t->ktime[i][j].t, tot_gpu);
+                                   gpu_t->ktime[i][j].c, gpu_t->ktime[i][j].t, tot_gpu);
                  }
              }
-        }        
+        }
  
          print_gputimes(fplog, "F D2H",  gpu_t->nb_c, gpu_t->nb_d2h_t, tot_gpu);
          fprintf(fplog, "%s\n", hline);
@@ -743,7 +751,7 @@ void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
                          /* The user could have used -notunepme,
                           * but we currently can't check that here.
                           */
-                        md_print_warn(NULL,fplog,
+                        md_print_warn(NULL, fplog,
                                        "\nNOTE: The GPU has >25%% less load than the CPU. This imbalance causes\n"
                                        "      performance loss. Maybe the domain decomposition limits the PME tuning.\n"
                                        "      In that case, try setting the DD grid manually (-dd) or lowering -dds.");
@@ -753,14 +761,14 @@ void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
                          /* We should not end up here, unless the box is
                           * too small for increasing the cut-off for PME tuning.
                           */
-                        md_print_warn(NULL,fplog,
+                        md_print_warn(NULL, fplog,
                                        "\nNOTE: The GPU has >25%% less load than the CPU. This imbalance causes\n"
                                        "      performance loss.");
                      }
                  }
                  if (gpu_cpu_ratio > 1.2)
                  {
-                    md_print_warn(NULL,fplog,
+                    md_print_warn(NULL, fplog,
                                    "\nNOTE: The GPU has >20%% more load than the CPU. This imbalance causes\n"
                                    "      performance loss, consider using a shorter cut-off and a finer PME grid.");
                  }
@@ -775,14 +783,14 @@ void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
          /* Only the sim master calls this function, so always print to stderr */
          if (wc->wcc[ewcDOMDEC].n == 0)
          {
-            md_print_warn(NULL,fplog,
+            md_print_warn(NULL, fplog,
                            "NOTE: %d %% of the run time was spent in pair search,\n"
                            "      you might want to increase nstlist (this has no effect on accuracy)\n",
                            (int)(100*cycles[ewcNS]/tot+0.5));
          }
          else
          {
-            md_print_warn(NULL,fplog,
+            md_print_warn(NULL, fplog,
                            "NOTE: %d %% of the run time was spent in domain decomposition,\n"
                            "      %d %% of the run time was spent in pair search,\n"
                            "      you might want to increase nstlist (this has no effect on accuracy)\n",
@@ -794,7 +802,7 @@ void wallcycle_print(FILE *fplog, int nnodes, int npme, double realtime,
      if (cycles[ewcMoveE] > tot*0.05)
      {
          /* Only the sim master calls this function, so always print to stderr */
-        md_print_warn(NULL,fplog,
+        md_print_warn(NULL, fplog,
                        "NOTE: %d %% of the run time was spent communicating energies,\n"
                        "      you might want to use the -gcom option of mdrun\n",
                        (int)(100*cycles[ewcMoveE]/tot+0.5));
@@ -807,14 +815,16 @@ extern gmx_large_int_t wcycle_get_reset_counters(gmx_wallcycle_t wc)
      {
          return -1;
      }
-    
+
      return wc->reset_counters;
  }
  
  extern void wcycle_set_reset_counters(gmx_wallcycle_t wc, gmx_large_int_t reset_counters)
  {
      if (wc == NULL)
+    {
          return;
+    }
  
      wc->reset_counters = reset_counters;
  }