src/programs/mdrun/pme_loadbal.c

   1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
   2  *
   3  *
   4  *                This source code is part of
   5  *
   6  *                 G   R   O   M   A   C   S
   7  *
   8  *          GROningen MAchine for Chemical Simulations
   9  *
  10  *                        VERSION 4.6.0
  11  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  12  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  13  * Copyright (c) 2001-2011, The GROMACS development team,
  14  * check out http://www.gromacs.org for more information.
  15
  16  * This program is free software; you can redistribute it and/or
  17  * modify it under the terms of the GNU General Public License
  18  * as published by the Free Software Foundation; either version 2
  19  * of the License, or (at your option) any later version.
  20  *
  21  * If you want to redistribute modifications, please consider that
  22  * scientific software is very special. Version control is crucial -
  23  * bugs must be traceable. We will be happy to consider code for
  24  * inclusion in the official distribution, but derived work must not
  25  * be called official GROMACS. Details are found in the README & COPYING
  26  * files - if they are missing, get the official version at www.gromacs.org.
  27  *
  28  * To help us fund GROMACS development, we humbly ask that you cite
  29  * the papers on the package - you can find them in the top README file.
  30  *
  31  * For more info, check our website at http://www.gromacs.org
  32  *
  33  * And Hey:
  34  * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
  35  */
  36 #ifdef HAVE_CONFIG_H
  37 #include <config.h>
  38 #endif
  39
  40 #include "smalloc.h"
  41 #include "network.h"
  42 #include "calcgrid.h"
  43 #include "pme.h"
  44 #include "vec.h"
  45 #include "domdec.h"
  46 #include "nbnxn_cuda_data_mgmt.h"
  47 #include "force.h"
  48 #include "macros.h"
  49 #include "pme_loadbal.h"
  50
  51 /* Parameters and setting for one PP-PME setup */
  52 typedef struct {
  53     real rcut;            /* Coulomb cut-off                              */
  54     real rlist;           /* pair-list cut-off                            */
  55     real spacing;         /* (largest) PME grid spacing                   */
  56     ivec grid;            /* the PME grid dimensions                      */
  57     real grid_efficiency; /* ineffiency factor for non-uniform grids <= 1 */
  58     real ewaldcoeff;      /* the Ewald coefficient                        */
  59     gmx_pme_t pmedata;    /* the data structure used in the PME code      */
  60
  61     int  count;           /* number of times this setup has been timed    */
  62     double cycles;        /* the fastest time for this setup in cycles    */
  63 } pme_setup_t;
  64
  65 /* In the initial scan, step by grids that are at least a factor 0.8 coarser */
  66 #define PME_LB_GRID_SCALE_FAC  0.8
  67 /* In the initial scan, try to skip grids with uneven x/y/z spacing,
  68  * checking if the "efficiency" is more than 5% worse than the previous grid.
  69  */
  70 #define PME_LB_GRID_EFFICIENCY_REL_FAC  1.05
  71 /* Rerun up till 12% slower setups than the fastest up till now */
  72 #define PME_LB_SLOW_FAC  1.12
  73 /* If setups get more than 2% faster, do another round to avoid
  74  * choosing a slower setup due to acceleration or fluctuations.
  75  */
  76 #define PME_LB_ACCEL_TOL 1.02
  77
  78 enum { epmelblimNO, epmelblimBOX, epmelblimDD, epmelblimNR };
  79
  80 const char *pmelblim_str[epmelblimNR] =
  81 { "no", "box size", "domain decompostion" };
  82
  83 struct pme_load_balancing {
  84     int  nstage;        /* the current maximum number of stages */
  85
  86     real cut_spacing;   /* the minimum cutoff / PME grid spacing ratio */
  87     real rbuf;          /* the pairlist buffer size */
  88     matrix box_start;   /* the initial simulation box */
  89     int n;              /* the count of setup as well as the allocation size */
  90     pme_setup_t *setup; /* the PME+cutoff setups */
  91     int cur;            /* the current setup */
  92     int fastest;        /* fastest setup up till now */
  93     int start;          /* start of setup range to consider in stage>0 */
  94     int end;            /* end   of setup range to consider in stage>0 */
  95     int elimited;       /* was the balancing limited, uses enum above */
  96
  97     int stage;          /* the current stage */
  98 };
  99
 100 void pme_loadbal_init(pme_load_balancing_t *pme_lb_p,
 101                       const t_inputrec *ir,matrix box,
 102                       const interaction_const_t *ic,
 103                       gmx_pme_t pmedata)
 104 {
 105     pme_load_balancing_t pme_lb;
 106     real spm,sp;
 107     int  d;
 108
 109     snew(pme_lb,1);
 110
 111     /* Any number of stages >= 2 is supported */
 112     pme_lb->nstage   = 2;
 113
 114     pme_lb->rbuf = ic->rlist - ic->rcoulomb;
 115
 116     copy_mat(box,pme_lb->box_start);
 117     if (ir->ePBC==epbcXY && ir->nwall==2)
 118     {
 119         svmul(ir->wall_ewald_zfac,pme_lb->box_start[ZZ],pme_lb->box_start[ZZ]);
 120     }
 121
 122     pme_lb->n = 1;
 123     snew(pme_lb->setup,pme_lb->n);
 124
 125     pme_lb->cur = 0;
 126     pme_lb->setup[0].rcut       = ic->rcoulomb;
 127     pme_lb->setup[0].rlist      = ic->rlist;
 128     pme_lb->setup[0].grid[XX]   = ir->nkx;
 129     pme_lb->setup[0].grid[YY]   = ir->nky;
 130     pme_lb->setup[0].grid[ZZ]   = ir->nkz;
 131     pme_lb->setup[0].ewaldcoeff = ic->ewaldcoeff;
 132
 133     pme_lb->setup[0].pmedata  = pmedata;
 134
 135     spm = 0;
 136     for(d=0; d<DIM; d++)
 137     {
 138         sp = norm(pme_lb->box_start[d])/pme_lb->setup[0].grid[d];
 139         if (sp > spm)
 140         {
 141             spm = sp;
 142         }
 143     }
 144     pme_lb->setup[0].spacing = spm;
 145
 146     if (ir->fourier_spacing > 0)
 147     {
 148         pme_lb->cut_spacing = ir->rcoulomb/ir->fourier_spacing;
 149     }
 150     else
 151     {
 152         pme_lb->cut_spacing = ir->rcoulomb/pme_lb->setup[0].spacing;
 153     }
 154
 155     pme_lb->stage = 0;
 156
 157     pme_lb->fastest  = 0;
 158     pme_lb->start    = 0;
 159     pme_lb->end      = 0;
 160     pme_lb->elimited = epmelblimNO;
 161
 162     *pme_lb_p = pme_lb;
 163 }
 164
 165 static gmx_bool pme_loadbal_increase_cutoff(pme_load_balancing_t pme_lb,
 166                                             int pme_order)
 167 {
 168     pme_setup_t *set;
 169     real fac,sp;
 170     int d;
 171
 172     /* Try to add a new setup with next larger cut-off to the list */
 173     pme_lb->n++;
 174     srenew(pme_lb->setup,pme_lb->n);
 175     set = &pme_lb->setup[pme_lb->n-1];
 176     set->pmedata = NULL;
 177
 178     fac = 1;
 179     do
 180     {
 181         fac *= 1.01;
 182         clear_ivec(set->grid);
 183         sp = calc_grid(NULL,pme_lb->box_start,
 184                        fac*pme_lb->setup[pme_lb->cur].spacing,
 185                        &set->grid[XX],
 186                        &set->grid[YY],
 187                        &set->grid[ZZ]);
 188
 189         /* In parallel we can't have grids smaller than 2*pme_order,
 190          * and we would anyhow not gain much speed at these grid sizes.
 191          */
 192         for(d=0; d<DIM; d++)
 193         {
 194             if (set->grid[d] <= 2*pme_order)
 195             {
 196                 pme_lb->n--;
 197
 198                 return FALSE;
 199             }
 200         }
 201     }
 202     while (sp <= 1.001*pme_lb->setup[pme_lb->cur].spacing);
 203
 204     set->rcut    = pme_lb->cut_spacing*sp;
 205     set->rlist   = set->rcut + pme_lb->rbuf;
 206     set->spacing = sp;
 207     /* The grid efficiency is the size wrt a grid with uniform x/y/z spacing */
 208     set->grid_efficiency = 1;
 209     for(d=0; d<DIM; d++)
 210     {
 211         set->grid_efficiency *= (set->grid[d]*sp)/norm(pme_lb->box_start[d]);
 212     }
 213     /* The Ewald coefficient is inversly proportional to the cut-off */
 214     set->ewaldcoeff =
 215         pme_lb->setup[0].ewaldcoeff*pme_lb->setup[0].rcut/set->rcut;
 216
 217     set->count   = 0;
 218     set->cycles  = 0;
 219
 220     if (debug)
 221     {
 222         fprintf(debug,"PME loadbal: grid %d %d %d, cutoff %f\n",
 223                 set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut);
 224     }
 225
 226     return TRUE;
 227 }
 228
 229 static void print_grid(FILE *fp_err,FILE *fp_log,
 230                        const char *pre,
 231                        const char *desc,
 232                        const pme_setup_t *set,
 233                        double cycles)
 234 {
 235     char buf[STRLEN],buft[STRLEN];
 236
 237     if (cycles >= 0)
 238     {
 239         sprintf(buft,": %.1f M-cycles",cycles*1e-6);
 240     }
 241     else
 242     {
 243         buft[0] = '\0';
 244     }
 245     sprintf(buf,"%-11s%10s pme grid %d %d %d, cutoff %.3f%s",
 246             pre,
 247             desc,set->grid[XX],set->grid[YY],set->grid[ZZ],set->rcut,
 248             buft);
 249     if (fp_err != NULL)
 250     {
 251         fprintf(fp_err,"\r%s\n",buf);
 252     }
 253     if (fp_log != NULL)
 254     {
 255         fprintf(fp_log,"%s\n",buf);
 256     }
 257 }
 258
 259 static int pme_loadbal_end(pme_load_balancing_t pme_lb)
 260 {
 261     /* In the initial stage only n is set; end is not set yet */
 262     if (pme_lb->end > 0)
 263     {
 264         return pme_lb->end;
 265     }
 266     else
 267     {
 268         return pme_lb->n;
 269     }
 270 }
 271
 272 static void print_loadbal_limited(FILE *fp_err,FILE *fp_log,
 273                                   gmx_large_int_t step,
 274                                   pme_load_balancing_t pme_lb)
 275 {
 276     char buf[STRLEN],sbuf[22];
 277
 278     sprintf(buf,"step %4s: the %s limited the PME load balancing to a cut-off of %.3f",
 279             gmx_step_str(step,sbuf),
 280             pmelblim_str[pme_lb->elimited],
 281             pme_lb->setup[pme_loadbal_end(pme_lb)-1].rcut);
 282     if (fp_err != NULL)
 283     {
 284         fprintf(fp_err,"\r%s\n",buf);
 285     }
 286     if (fp_log != NULL)
 287     {
 288         fprintf(fp_log,"%s\n",buf);
 289     }
 290 }
 291
 292 static void switch_to_stage1(pme_load_balancing_t pme_lb)
 293 {
 294     pme_lb->start = 0;
 295     while (pme_lb->start+1 < pme_lb->n &&
 296            (pme_lb->setup[pme_lb->start].count == 0 ||
 297             pme_lb->setup[pme_lb->start].cycles >
 298             pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC))
 299     {
 300         pme_lb->start++;
 301     }
 302     while (pme_lb->start > 0 && pme_lb->setup[pme_lb->start-1].cycles == 0)
 303     {
 304         pme_lb->start--;
 305     }
 306
 307     pme_lb->end = pme_lb->n;
 308     if (pme_lb->setup[pme_lb->end-1].count > 0 &&
 309         pme_lb->setup[pme_lb->end-1].cycles >
 310         pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
 311     {
 312         pme_lb->end--;
 313     }
 314
 315     pme_lb->stage = 1;
 316
 317     /* Next we want to choose setup pme_lb->start, but as we will increase
 318      * pme_ln->cur by one right after returning, we subtract 1 here.
 319      */
 320     pme_lb->cur = pme_lb->start - 1;
 321 }
 322
 323 gmx_bool pme_load_balance(pme_load_balancing_t pme_lb,
 324                           t_commrec *cr,
 325                           FILE *fp_err,
 326                           FILE *fp_log,
 327                           t_inputrec *ir,
 328                           t_state *state,
 329                           double cycles,
 330                           interaction_const_t *ic,
 331                           nonbonded_verlet_t *nbv,
 332                           gmx_pme_t *pmedata,
 333                           gmx_large_int_t step)
 334 {
 335     gmx_bool OK;
 336     pme_setup_t *set;
 337     double cycles_fast;
 338     char buf[STRLEN],sbuf[22];
 339
 340     if (pme_lb->stage == pme_lb->nstage)
 341     {
 342         return FALSE;
 343     }
 344
 345     if (PAR(cr))
 346     {
 347         gmx_sumd(1,&cycles,cr);
 348         cycles /= cr->nnodes;
 349     }
 350
 351     set = &pme_lb->setup[pme_lb->cur];
 352
 353     set->count++;
 354     if (set->count % 2 == 1)
 355     {
 356         /* Skip the first cycle, because the first step after a switch
 357          * is much slower due to allocation and/or caching effects.
 358          */
 359         return TRUE;
 360     }
 361
 362     sprintf(buf, "step %4s: ", gmx_step_str(step,sbuf));
 363     print_grid(fp_err,fp_log,buf,"timed with",set,cycles);
 364
 365     if (set->count <= 2)
 366     {
 367         set->cycles = cycles;
 368     }
 369     else
 370     {
 371         if (cycles*PME_LB_ACCEL_TOL < set->cycles &&
 372             pme_lb->stage == pme_lb->nstage - 1)
 373         {
 374             /* The performance went up a lot (due to e.g. DD load balancing).
 375              * Add a stage, keep the minima, but rescan all setups.
 376              */
 377             pme_lb->nstage++;
 378
 379             if (debug)
 380             {
 381                 fprintf(debug,"The performance for grid %d %d %d went from %.3f to %.1f M-cycles, this is more than %f\n"
 382                         "Increased the number stages to %d"
 383                         " and ignoring the previous performance\n",
 384                         set->grid[XX],set->grid[YY],set->grid[ZZ],
 385                         cycles*1e-6,set->cycles*1e-6,PME_LB_ACCEL_TOL,
 386                         pme_lb->nstage);
 387             }
 388         }
 389         set->cycles = min(set->cycles,cycles);
 390     }
 391
 392     if (set->cycles < pme_lb->setup[pme_lb->fastest].cycles)
 393     {
 394         pme_lb->fastest = pme_lb->cur;
 395     }
 396     cycles_fast = pme_lb->setup[pme_lb->fastest].cycles;
 397
 398     /* Check in stage 0 if we should stop scanning grids.
 399      * Stop when the time is more than SLOW_FAC longer than the fastest.
 400      */
 401     if (pme_lb->stage == 0 && pme_lb->cur > 0 &&
 402         cycles > pme_lb->setup[pme_lb->fastest].cycles*PME_LB_SLOW_FAC)
 403     {
 404         pme_lb->n = pme_lb->cur + 1;
 405         /* Done with scanning, go to stage 1 */
 406         switch_to_stage1(pme_lb);
 407     }
 408
 409     if (pme_lb->stage == 0)
 410     {
 411         int gridsize_start;
 412
 413         gridsize_start = set->grid[XX]*set->grid[YY]*set->grid[ZZ];
 414
 415         do
 416         {
 417             if (pme_lb->cur+1 < pme_lb->n)
 418             {
 419                 /* We had already generated the next setup */
 420                 OK = TRUE;
 421             }
 422             else
 423             {
 424                 /* Find the next setup */
 425                 OK = pme_loadbal_increase_cutoff(pme_lb,ir->pme_order);
 426             }
 427
 428             if (OK && ir->ePBC != epbcNONE)
 429             {
 430                 OK = (sqr(pme_lb->setup[pme_lb->cur+1].rlist)
 431                       <= max_cutoff2(ir->ePBC,state->box));
 432                 if (!OK)
 433                 {
 434                     pme_lb->elimited = epmelblimBOX;
 435                 }
 436             }
 437
 438             if (OK)
 439             {
 440                 pme_lb->cur++;
 441
 442                 if (DOMAINDECOMP(cr))
 443                 {
 444                     OK = change_dd_cutoff(cr,state,ir,
 445                                           pme_lb->setup[pme_lb->cur].rlist);
 446                     if (!OK)
 447                     {
 448                         /* Failed: do not use this setup */
 449                         pme_lb->cur--;
 450                         pme_lb->elimited = epmelblimDD;
 451                     }
 452                 }
 453             }
 454             if (!OK)
 455             {
 456                 /* We hit the upper limit for the cut-off,
 457                  * the setup should not go further than cur.
 458                  */
 459                 pme_lb->n = pme_lb->cur + 1;
 460                 print_loadbal_limited(fp_err,fp_log,step,pme_lb);
 461                 /* Switch to the next stage */
 462                 switch_to_stage1(pme_lb);
 463             }
 464         }
 465         while (OK &&
 466                !(pme_lb->setup[pme_lb->cur].grid[XX]*
 467                  pme_lb->setup[pme_lb->cur].grid[YY]*
 468                  pme_lb->setup[pme_lb->cur].grid[ZZ] <
 469                  gridsize_start*PME_LB_GRID_SCALE_FAC
 470                  &&
 471                  pme_lb->setup[pme_lb->cur].grid_efficiency <
 472                  pme_lb->setup[pme_lb->cur-1].grid_efficiency*PME_LB_GRID_EFFICIENCY_REL_FAC));
 473     }
 474
 475     if (pme_lb->stage > 0 && pme_lb->end == 1)
 476     {
 477         pme_lb->cur = 0;
 478         pme_lb->stage = pme_lb->nstage;
 479     }
 480     else if (pme_lb->stage > 0 && pme_lb->end > 1)
 481     {
 482         /* If stage = nstage-1:
 483          *   scan over all setups, rerunning only those setups
 484          *   which are not much slower than the fastest
 485          * else:
 486          *   use the next setup
 487          */
 488         do
 489         {
 490             pme_lb->cur++;
 491             if (pme_lb->cur == pme_lb->end)
 492             {
 493                 pme_lb->stage++;
 494                 pme_lb->cur = pme_lb->start;
 495             }
 496         }
 497         while (pme_lb->stage == pme_lb->nstage - 1 &&
 498                pme_lb->setup[pme_lb->cur].count > 0 &&
 499                pme_lb->setup[pme_lb->cur].cycles > cycles_fast*PME_LB_SLOW_FAC);
 500
 501         if (pme_lb->stage == pme_lb->nstage)
 502         {
 503             /* We are done optimizing, use the fastest setup we found */
 504             pme_lb->cur = pme_lb->fastest;
 505         }
 506     }
 507
 508     if (DOMAINDECOMP(cr) && pme_lb->stage > 0)
 509     {
 510         OK = change_dd_cutoff(cr,state,ir,pme_lb->setup[pme_lb->cur].rlist);
 511         if (!OK)
 512         {
 513             /* Failsafe solution */
 514             if (pme_lb->cur > 1 && pme_lb->stage == pme_lb->nstage)
 515             {
 516                 pme_lb->stage--;
 517             }
 518             pme_lb->fastest  = 0;
 519             pme_lb->start    = 0;
 520             pme_lb->end      = pme_lb->cur;
 521             pme_lb->cur      = pme_lb->start;
 522             pme_lb->elimited = epmelblimDD;
 523             print_loadbal_limited(fp_err,fp_log,step,pme_lb);
 524         }
 525     }
 526
 527     /* Change the Coulomb cut-off and the PME grid */
 528
 529     set = &pme_lb->setup[pme_lb->cur];
 530
 531     ic->rcoulomb   = set->rcut;
 532     ic->rlist      = set->rlist;
 533     ic->ewaldcoeff = set->ewaldcoeff;
 534
 535     if (nbv->grp[0].kernel_type == nbk8x8x8_CUDA)
 536     {
 537         nbnxn_cuda_pme_loadbal_update_param(nbv->cu_nbv,ic);
 538     }
 539     else
 540     {
 541         init_interaction_const_tables(NULL,ic,nbv->grp[0].kernel_type);
 542     }
 543
 544     if (nbv->ngrp > 1)
 545     {
 546         init_interaction_const_tables(NULL,ic,nbv->grp[1].kernel_type);
 547     }
 548
 549     if (cr->duty & DUTY_PME)
 550     {
 551         if (pme_lb->setup[pme_lb->cur].pmedata == NULL)
 552         {
 553             /* Generate a new PME data structure,
 554              * copying part of the old pointers.
 555              */
 556             gmx_pme_reinit(&set->pmedata,
 557                            cr,pme_lb->setup[0].pmedata,ir,
 558                            set->grid);
 559         }
 560         *pmedata = set->pmedata;
 561     }
 562     else
 563     {
 564         /* Tell our PME-only node to switch grid */
 565         gmx_pme_send_switch(cr, set->grid, set->ewaldcoeff);
 566     }
 567
 568     if (debug)
 569     {
 570         print_grid(NULL,debug,"","switched to",set,-1);
 571     }
 572
 573     if (pme_lb->stage == pme_lb->nstage)
 574     {
 575         print_grid(fp_err,fp_log,"","optimal",set,-1);
 576     }
 577
 578     return TRUE;
 579 }
 580
 581 void restart_pme_loadbal(pme_load_balancing_t pme_lb, int n)
 582 {
 583     pme_lb->nstage += n;
 584 }
 585
 586 static int pme_grid_points(const pme_setup_t *setup)
 587 {
 588     return setup->grid[XX]*setup->grid[YY]*setup->grid[ZZ];
 589 }
 590
 591 static void print_pme_loadbal_setting(FILE *fplog,
 592                                      char *name,
 593                                      const pme_setup_t *setup)
 594 {
 595     fprintf(fplog,
 596             "   %-7s %6.3f nm %6.3f nm     %3d %3d %3d   %5.3f nm  %5.3f nm\n",
 597             name,
 598             setup->rcut,setup->rlist,
 599             setup->grid[XX],setup->grid[YY],setup->grid[ZZ],
 600             setup->spacing,1/setup->ewaldcoeff);
 601 }
 602
 603 static void print_pme_loadbal_settings(pme_load_balancing_t pme_lb,
 604                                        FILE *fplog)
 605 {
 606     double pp_ratio,grid_ratio;
 607
 608     pp_ratio   = pow(pme_lb->setup[pme_lb->cur].rlist/pme_lb->setup[0].rlist,3.0);
 609     grid_ratio = pme_grid_points(&pme_lb->setup[pme_lb->cur])/
 610         (double)pme_grid_points(&pme_lb->setup[0]);
 611
 612     fprintf(fplog,"\n");
 613     fprintf(fplog,"       P P   -   P M E   L O A D   B A L A N C I N G\n");
 614     fprintf(fplog,"\n");
 615     /* Here we only warn when the optimal setting is the last one */
 616     if (pme_lb->elimited != epmelblimNO &&
 617         pme_lb->cur == pme_loadbal_end(pme_lb)-1)
 618     {
 619         fprintf(fplog," NOTE: The PP/PME load balancing was limited by the %s,\n",
 620                 pmelblim_str[pme_lb->elimited]);
 621         fprintf(fplog,"       you might not have reached a good load balance.\n");
 622         if (pme_lb->elimited == epmelblimDD)
 623         {
 624             fprintf(fplog,"       Try different mdrun -dd settings or lower the -dds value.\n");
 625         }
 626         fprintf(fplog,"\n");
 627     }
 628     fprintf(fplog," PP/PME load balancing changed the cut-off and PME settings:\n");
 629     fprintf(fplog,"           particle-particle                    PME\n");
 630     fprintf(fplog,"            rcoulomb  rlist            grid      spacing   1/beta\n");
 631     print_pme_loadbal_setting(fplog,"initial",&pme_lb->setup[0]);
 632     print_pme_loadbal_setting(fplog,"final"  ,&pme_lb->setup[pme_lb->cur]);
 633     fprintf(fplog," cost-ratio           %4.2f             %4.2f\n",
 634             pp_ratio,grid_ratio);
 635     fprintf(fplog," (note that these numbers concern only part of the total PP and PME load)\n");
 636     fprintf(fplog,"\n");
 637 }
 638
 639 void pme_loadbal_done(pme_load_balancing_t pme_lb, FILE *fplog)
 640 {
 641     if (fplog != NULL && (pme_lb->cur > 0 || pme_lb->elimited != epmelblimNO))
 642     {
 643         print_pme_loadbal_settings(pme_lb,fplog);
 644     }
 645
 646     /* TODO: Here we should free all pointers in pme_lb,
 647      * but as it contains pme data structures,
 648      * we need to first make pme.c free all data.
 649      */
 650 }