src/external/thread_mpi/src/comm.c

   1 /*
   2    This source code file is part of thread_mpi.
   3    Written by Sander Pronk, Erik Lindahl, and possibly others.
   4
   5    Copyright (c) 2009, Sander Pronk, Erik Lindahl.
   6    All rights reserved.
   7
   8    Redistribution and use in source and binary forms, with or without
   9    modification, are permitted provided that the following conditions are met:
  10    1) Redistributions of source code must retain the above copyright
  11    notice, this list of conditions and the following disclaimer.
  12    2) Redistributions in binary form must reproduce the above copyright
  13    notice, this list of conditions and the following disclaimer in the
  14    documentation and/or other materials provided with the distribution.
  15    3) Neither the name of the copyright holders nor the
  16    names of its contributors may be used to endorse or promote products
  17    derived from this software without specific prior written permission.
  18
  19    THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
  20    EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  21    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22    DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
  23    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  24    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  25    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  26    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30    If you want to redistribute modifications, please consider that
  31    scientific software is very special. Version control is crucial -
  32    bugs must be traceable. We will be happy to consider code for
  33    inclusion in the official distribution, but derived work should not
  34    be called official thread_mpi. Details are found in the README & COPYING
  35    files.
  36  */
  37
  38 #ifdef HAVE_TMPI_CONFIG_H
  39 #include "tmpi_config.h"
  40 #endif
  41
  42 #ifdef HAVE_CONFIG_H
  43 #include "config.h"
  44 #endif
  45
  46 #ifdef HAVE_UNISTD_H
  47 #include <unistd.h>
  48 #endif
  49
  50 #include <errno.h>
  51 #include <stdlib.h>
  52 #include <stdio.h>
  53 #include <stdarg.h>
  54 #include <string.h>
  55
  56
  57 #include "impl.h"
  58
  59 /* helper function for tMPI_Comm_split. Splits N entities with color and key
  60    out so that the output contains Ngroups groups each with elements
  61    of the same color. The group array contains the entities in each group. */
  62 static void tMPI_Split_colors(int N, const int *color, const int *key,
  63                               int *Ngroups, int *grp_N, int *grp_color,
  64                               int *group);
  65
  66
  67
  68
  69
  70
  71 /* communicator query&manipulation functions */
  72 int tMPI_Comm_N(tMPI_Comm comm)
  73 {
  74 #ifdef TMPI_TRACE
  75     tMPI_Trace_print("tMPI_Comm_N(%p)", comm);
  76 #endif
  77     if (!comm)
  78     {
  79         return 0;
  80     }
  81     return comm->grp.N;
  82 }
  83
  84 int tMPI_Comm_size(tMPI_Comm comm, int *size)
  85 {
  86 #ifdef TMPI_TRACE
  87     tMPI_Trace_print("tMPI_Comm_size(%p, %p)", comm, size);
  88 #endif
  89     return tMPI_Group_size(&(comm->grp), size);
  90 }
  91
  92 int tMPI_Comm_rank(tMPI_Comm comm, int *rank)
  93 {
  94 #ifdef TMPI_TRACE
  95     tMPI_Trace_print("tMPI_Comm_rank(%p, %p)", comm, rank);
  96 #endif
  97     return tMPI_Group_rank(&(comm->grp), rank);
  98 }
  99
 100
 101 int tMPI_Comm_compare(tMPI_Comm comm1, tMPI_Comm comm2, int *result)
 102 {
 103     int i, j;
 104 #ifdef TMPI_TRACE
 105     tMPI_Trace_print("tMPI_Comm_compare(%p, %p, %p)", comm1, comm2, result);
 106 #endif
 107     if (comm1 == comm2)
 108     {
 109         *result = TMPI_IDENT;
 110         return TMPI_SUCCESS;
 111     }
 112
 113     if ( (!comm1) || (!comm2) )
 114     {
 115         *result = TMPI_UNEQUAL;
 116         return TMPI_SUCCESS;
 117     }
 118
 119     if (comm1->grp.N != comm2->grp.N)
 120     {
 121         *result = TMPI_UNEQUAL;
 122         return TMPI_SUCCESS;
 123     }
 124
 125     *result = TMPI_CONGRUENT;
 126     /* we assume that there are two identical comm members within a comm */
 127     for (i = 0; i < comm1->grp.N; i++)
 128     {
 129         if (comm1->grp.peers[i] != comm2->grp.peers[i])
 130         {
 131             tmpi_bool found = FALSE;
 132
 133             *result = TMPI_SIMILAR;
 134             for (j = 0; j < comm2->grp.N; j++)
 135             {
 136                 if (comm1->grp.peers[i] == comm2->grp.peers[j])
 137                 {
 138                     found = TRUE;
 139                     break;
 140                 }
 141             }
 142             if (!found)
 143             {
 144                 *result = TMPI_UNEQUAL;
 145                 return TMPI_SUCCESS;
 146             }
 147         }
 148     }
 149     return TMPI_SUCCESS;
 150 }
 151
 152
 153 int tMPI_Comm_alloc(tMPI_Comm *newcomm, tMPI_Comm parent, int N)
 154 {
 155     struct tmpi_comm_ *retc;
 156     int                i;
 157     int                ret;
 158
 159     retc = (struct tmpi_comm_*)tMPI_Malloc(sizeof(struct tmpi_comm_));
 160     if (retc == NULL)
 161     {
 162         return TMPI_ERR_NO_MEM;
 163     }
 164
 165     retc->grp.peers = (struct tmpi_thread**)tMPI_Malloc(
 166                 sizeof(struct tmpi_thread*)*Nthreads);
 167     if (retc->grp.peers == NULL)
 168     {
 169         return TMPI_ERR_NO_MEM;
 170     }
 171     retc->grp.N = N;
 172
 173     ret = tMPI_Thread_mutex_init( &(retc->comm_create_lock) );
 174     if (ret != 0)
 175     {
 176         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 177     }
 178     ret = tMPI_Thread_cond_init( &(retc->comm_create_prep) );
 179     if (ret != 0)
 180     {
 181         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 182     }
 183     ret = tMPI_Thread_cond_init( &(retc->comm_create_finish) );
 184     if (ret != 0)
 185     {
 186         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 187     }
 188
 189     retc->split    = NULL;
 190     retc->new_comm = NULL;
 191     /* we have no topology to start out with */
 192     retc->cart = NULL;
 193     /*retc->graph=NULL;*/
 194
 195     /* we start counting at 0 */
 196     tMPI_Atomic_set( &(retc->destroy_counter), 0);
 197
 198     /* initialize the main barrier */
 199     tMPI_Barrier_init(&(retc->barrier), N);
 200
 201     /* the reduce barriers */
 202     {
 203         /* First calculate the number of reduce barriers */
 204         int Niter = 0; /* the iteration number */
 205         int Nred  = N; /* the number of reduce barriers for this iteration */
 206         while (Nred > 1)
 207         {
 208             /* Nred is now Nred/2 + a rest term because solitary
 209                process at the end of the list must still be accounter for */
 210             Nred   = Nred/2 + Nred%2;
 211             Niter += 1;
 212         }
 213
 214         retc->N_reduce_iter = Niter;
 215         /* allocate the list */
 216         retc->reduce_barrier = (tMPI_Barrier_t**)
 217             tMPI_Malloc(sizeof(tMPI_Barrier_t*)*(Niter+1));
 218         if (retc->reduce_barrier == NULL)
 219         {
 220             return TMPI_ERR_NO_MEM;
 221         }
 222         retc->N_reduce = (int*)tMPI_Malloc(sizeof(int)*(Niter+1));
 223         if (retc->N_reduce == NULL)
 224         {
 225             return TMPI_ERR_NO_MEM;
 226         }
 227
 228         /* we re-set Nred to N */
 229         Nred = N;
 230         for (i = 0; i < Niter; i++)
 231         {
 232             int j;
 233
 234             Nred              = Nred/2 + Nred%2;
 235             retc->N_reduce[i] = Nred;
 236             /* allocate the sub-list */
 237             retc->reduce_barrier[i] = (tMPI_Barrier_t*)
 238                 tMPI_Malloc(sizeof(tMPI_Barrier_t)*(Nred));
 239             if (retc->reduce_barrier[i] == NULL)
 240             {
 241                 return TMPI_ERR_NO_MEM;
 242             }
 243             for (j = 0; j < Nred; j++)
 244             {
 245                 tMPI_Barrier_init(&(retc->reduce_barrier[i][j]), 2);
 246             }
 247         }
 248     }
 249
 250     /* the reduce buffers */
 251     retc->reduce_sendbuf = (tMPI_Atomic_ptr_t*)
 252         tMPI_Malloc(sizeof(tMPI_Atomic_ptr_t)*Nthreads);
 253     if (retc->reduce_sendbuf == NULL)
 254     {
 255         return TMPI_ERR_NO_MEM;
 256     }
 257     retc->reduce_recvbuf = (tMPI_Atomic_ptr_t*)
 258         tMPI_Malloc(sizeof(tMPI_Atomic_ptr_t)*Nthreads);
 259     if (retc->reduce_recvbuf == NULL)
 260     {
 261         return TMPI_ERR_NO_MEM;
 262     }
 263
 264     if (parent)
 265     {
 266         retc->erh = parent->erh;
 267     }
 268     else
 269     {
 270         retc->erh = TMPI_ERRORS_ARE_FATAL;
 271     }
 272
 273     /* coll_env objects */
 274     retc->cev = (struct coll_env*)tMPI_Malloc(sizeof(struct coll_env)*
 275                                               N_COLL_ENV);
 276     if (retc->cev == NULL)
 277     {
 278         return TMPI_ERR_NO_MEM;
 279     }
 280
 281     for (i = 0; i < N_COLL_ENV; i++)
 282     {
 283         ret = tMPI_Coll_env_init( &(retc->cev[i]), N);
 284         if (ret != TMPI_SUCCESS)
 285         {
 286             return ret;
 287         }
 288     }
 289     /* multi_sync objects */
 290     retc->csync = (struct coll_sync*)tMPI_Malloc(sizeof(struct coll_sync)*N);
 291     if (retc->csync == NULL)
 292     {
 293         return TMPI_ERR_NO_MEM;
 294     }
 295
 296     for (i = 0; i < N; i++)
 297     {
 298         ret = tMPI_Coll_sync_init( &(retc->csync[i]), N);
 299         if (ret != TMPI_SUCCESS)
 300         {
 301             return ret;
 302         }
 303     }
 304
 305     ret = tMPI_Thread_mutex_lock( &(tmpi_global->comm_link_lock) );
 306     if (ret != 0)
 307     {
 308         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 309     }
 310     /* we insert ourselves in the circular list, after TMPI_COMM_WORLD */
 311     if (TMPI_COMM_WORLD)
 312     {
 313         retc->next = TMPI_COMM_WORLD;
 314         retc->prev = TMPI_COMM_WORLD->prev;
 315
 316         TMPI_COMM_WORLD->prev->next = retc;
 317         TMPI_COMM_WORLD->prev       = retc;
 318     }
 319     else
 320     {
 321         retc->prev = retc->next = retc;
 322     }
 323     ret = tMPI_Thread_mutex_unlock( &(tmpi_global->comm_link_lock) );
 324     if (ret != 0)
 325     {
 326         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 327     }
 328     *newcomm = retc;
 329     return TMPI_SUCCESS;
 330 }
 331
 332 int tMPI_Comm_destroy(tMPI_Comm comm, tmpi_bool do_link_lock)
 333 {
 334     int i;
 335     int ret;
 336
 337     free(comm->grp.peers);
 338     for (i = 0; i < comm->N_reduce_iter; i++)
 339     {
 340         free(comm->reduce_barrier[i]);
 341     }
 342     free(comm->reduce_barrier);
 343     free(comm->N_reduce);
 344
 345     for (i = 0; i < N_COLL_ENV; i++)
 346     {
 347         tMPI_Coll_env_destroy( &(comm->cev[i]) );
 348     }
 349     for (i = 0; i < comm->grp.N; i++)
 350     {
 351         tMPI_Coll_sync_destroy( &(comm->csync[i]) );
 352     }
 353     free(comm->cev);
 354     free(comm->csync);
 355
 356     ret = tMPI_Thread_mutex_destroy( &(comm->comm_create_lock) );
 357     if (ret != 0)
 358     {
 359         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 360     }
 361     ret = tMPI_Thread_cond_destroy( &(comm->comm_create_prep) );
 362     if (ret != 0)
 363     {
 364         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 365     }
 366     ret = tMPI_Thread_cond_destroy( &(comm->comm_create_finish) );
 367     if (ret != 0)
 368     {
 369         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 370     }
 371
 372     free((void*)comm->reduce_sendbuf);
 373     free((void*)comm->reduce_recvbuf);
 374
 375     if (comm->cart)
 376     {
 377         tMPI_Cart_destroy( comm->cart );
 378         free(comm->cart);
 379     }
 380
 381     /* remove ourselves from the circular list */
 382     if (do_link_lock)
 383     {
 384         ret = tMPI_Thread_mutex_lock( &(tmpi_global->comm_link_lock) );
 385         if (ret != 0)
 386         {
 387             return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 388         }
 389     }
 390     if (comm->next)
 391     {
 392         comm->next->prev = comm->prev;
 393     }
 394     if (comm->prev)
 395     {
 396         comm->prev->next = comm->next;
 397     }
 398     free(comm);
 399     if (do_link_lock)
 400     {
 401         ret = tMPI_Thread_mutex_unlock( &(tmpi_global->comm_link_lock) );
 402         if (ret != 0)
 403         {
 404             return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 405         }
 406     }
 407     return TMPI_SUCCESS;
 408 }
 409
 410 int tMPI_Comm_free(tMPI_Comm *comm)
 411 {
 412     int size;
 413     int sum;
 414     int ret;
 415 #ifdef TMPI_TRACE
 416     tMPI_Trace_print("tMPI_Comm_free(%p)", comm);
 417 #endif
 418 #ifndef TMPI_STRICT
 419     if (!*comm)
 420     {
 421         return TMPI_SUCCESS;
 422     }
 423
 424     if ((*comm)->grp.N > 1)
 425     {
 426         /* we remove ourselves from the comm. */
 427         ret = tMPI_Thread_mutex_lock(&((*comm)->comm_create_lock));
 428         if (ret != 0)
 429         {
 430             return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 431         }
 432         (*comm)->grp.peers[myrank] = (*comm)->grp.peers[(*comm)->grp.N-1];
 433         (*comm)->grp.N--;
 434         ret = tMPI_Thread_mutex_unlock(&((*comm)->comm_create_lock));
 435         if (ret != 0)
 436         {
 437             return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 438         }
 439     }
 440     else
 441     {
 442         /* we're the last one so we can safely destroy it */
 443         ret = tMPI_Comm_destroy(*comm, TRUE);
 444         if (ret != 0)
 445         {
 446             return ret;
 447         }
 448     }
 449 #else
 450     /* This is correct if programs actually treat Comm_free as a collective
 451        call */
 452     if (!*comm)
 453     {
 454         return TMPI_SUCCESS;
 455     }
 456
 457     size = (*comm)->grp.N;
 458
 459     /* we add 1 to the destroy counter and actually deallocate if the counter
 460        reaches N. */
 461     sum = tMPI_Atomic_fetch_add( &((*comm)->destroy_counter), 1) + 1;
 462     /* this is a collective call on a shared data structure, so only
 463        one process (the last one in this case) should do anything */
 464     if (sum == size)
 465     {
 466         ret = tMPI_Comm_destroy(*comm, TRUE);
 467         if (ret != 0)
 468         {
 469             return ret;
 470         }
 471     }
 472 #endif
 473     return TMPI_SUCCESS;
 474 }
 475
 476 int tMPI_Comm_dup(tMPI_Comm comm, tMPI_Comm *newcomm)
 477 {
 478 #ifdef TMPI_TRACE
 479     tMPI_Trace_print("tMPI_Comm_dup(%p, %p)", comm, newcomm);
 480 #endif
 481     /* we just call Comm_split because it already contains all the
 482        neccesary synchronization constructs. */
 483     return tMPI_Comm_split(comm, 0, tMPI_Comm_seek_rank(comm,
 484                                                         tMPI_Get_current()), newcomm);
 485 }
 486
 487
 488 int tMPI_Comm_create(tMPI_Comm comm, tMPI_Group group, tMPI_Comm *newcomm)
 489 {
 490     int color = TMPI_UNDEFINED;
 491     int key   = tMPI_Comm_seek_rank(comm, tMPI_Get_current());
 492
 493 #ifdef TMPI_TRACE
 494     tMPI_Trace_print("tMPI_Comm_create(%p, %p, %p)", comm, group, newcomm);
 495 #endif
 496     if (tMPI_In_group(group))
 497     {
 498         color = 1;
 499     }
 500     /* the MPI specs specifically say that this is equivalent */
 501     return tMPI_Comm_split(comm, color, key, newcomm);
 502 }
 503
 504 static void tMPI_Split_colors(int N, const int *color, const int *key,
 505                               int *Ngroups, int *grp_N, int *grp_color,
 506                               int *group)
 507 {
 508     int       i, j;
 509     tmpi_bool found;
 510
 511     /* reset groups */
 512     for (i = 0; i < N; i++)
 513     {
 514         grp_N[i] = 0;
 515     }
 516     for (i = 0; i < N; i++)
 517     {
 518         if (color[i] != TMPI_UNDEFINED)
 519         {
 520             found = FALSE;
 521             for (j = 0; j < (*Ngroups); j++)
 522             {
 523                 if (grp_color[j] == color[i])
 524                 {
 525                     /* we insert where we need to, by counting back */
 526                     int k = grp_N[j];
 527
 528                     while (k > 0 && ( key[group[N*j + k-1]] > key[i]) )
 529                     {
 530                         /* shift up */
 531                         group[N*j + k] = group[N*j + k-1];
 532                         k--;
 533                     }
 534                     group[N*j+k] = i;
 535                     grp_N[j]++;
 536                     found = TRUE;
 537                 }
 538             }
 539             if (!found)
 540             {
 541                 /* not found. just add a new color */
 542                 grp_N[(*Ngroups)]       = 1;
 543                 grp_color[(*Ngroups)]   = color[i];
 544                 group[N*(*Ngroups) + 0] = i;
 545                 (*Ngroups)++;
 546             }
 547         }
 548     }
 549 }
 550
 551 /* this is the main comm creation function. All other functions that create
 552     comms use this*/
 553 int tMPI_Comm_split(tMPI_Comm comm, int color, int key, tMPI_Comm *newcomm)
 554 {
 555     int                 i, j;
 556     int                 N = tMPI_Comm_N(comm);
 557     volatile tMPI_Comm *newcomm_list;
 558     volatile int        colors[MAX_PREALLOC_THREADS]; /* array with the colors
 559                                                          of each thread */
 560     volatile int        keys[MAX_PREALLOC_THREADS];   /* same for keys (only one of
 561                                                          the threads actually suplies
 562                                                          these arrays to the comm
 563                                                          structure) */
 564     tmpi_bool          i_am_first = FALSE;
 565     int                myrank     = tMPI_Comm_seek_rank(comm, tMPI_Get_current());
 566     struct tmpi_split *spl;
 567     int                ret;
 568
 569 #ifdef TMPI_TRACE
 570     tMPI_Trace_print("tMPI_Comm_split(%p, %d, %d, %p)", comm, color, key,
 571                      newcomm);
 572 #endif
 573     if (!comm)
 574     {
 575         *newcomm = NULL;
 576         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
 577     }
 578
 579     ret = tMPI_Thread_mutex_lock(&(comm->comm_create_lock));
 580     if (ret != 0)
 581     {
 582         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 583     }
 584     /* first get the colors */
 585     if (!comm->new_comm)
 586     {
 587         /* i am apparently  first */
 588         comm->split    = (struct tmpi_split*)tMPI_Malloc(sizeof(struct tmpi_split));
 589         comm->new_comm = (tMPI_Comm*)tMPI_Malloc(N*sizeof(tMPI_Comm));
 590         if (N <= MAX_PREALLOC_THREADS)
 591         {
 592             comm->split->colors = colors;
 593             comm->split->keys   = keys;
 594         }
 595         else
 596         {
 597             comm->split->colors = (int*)tMPI_Malloc(N*sizeof(int));
 598             comm->split->keys   = (int*)tMPI_Malloc(N*sizeof(int));
 599         }
 600         comm->split->Ncol_init  = tMPI_Comm_N(comm);
 601         comm->split->can_finish = FALSE;
 602         i_am_first              = TRUE;
 603         /* the main communicator contains a list the size of grp.N */
 604     }
 605     newcomm_list        = comm->new_comm; /* we copy it to the local stacks because
 606                                              we can later erase comm->new_comm safely */
 607     spl                 = comm->split;    /* we do the same for spl */
 608     spl->colors[myrank] = color;
 609     spl->keys[myrank]   = key;
 610     spl->Ncol_init--;
 611
 612     if (spl->Ncol_init == 0)
 613     {
 614         ret = tMPI_Thread_cond_signal(&(comm->comm_create_prep));
 615         if (ret != 0)
 616         {
 617             return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 618         }
 619     }
 620
 621     if (!i_am_first)
 622     {
 623         /* all other threads can just wait until the creator thread is
 624            finished */
 625         while (!spl->can_finish)
 626         {
 627             ret = tMPI_Thread_cond_wait(&(comm->comm_create_finish),
 628                                         &(comm->comm_create_lock) );
 629             if (ret != 0)
 630             {
 631                 return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 632             }
 633         }
 634     }
 635     else
 636     {
 637         int        Ncomms = 0;
 638         int        comm_color_[MAX_PREALLOC_THREADS];
 639         int        comm_N_[MAX_PREALLOC_THREADS];
 640         int       *comm_color = comm_color_; /* there can't be more comms than N*/
 641         int       *comm_N     = comm_N_;     /* the number of procs in a group */
 642
 643         int       *comm_groups;              /* the groups */
 644         tMPI_Comm *comms;                    /* the communicators */
 645
 646         /* wait for the colors to be done */
 647         /*if (N>1)*/
 648         while (spl->Ncol_init > 0)
 649         {
 650             ret = tMPI_Thread_cond_wait(&(comm->comm_create_prep),
 651                                         &(comm->comm_create_lock));
 652             if (ret != 0)
 653             {
 654                 return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 655             }
 656         }
 657
 658         /* reset the state so that a new comm creating function can run */
 659         spl->Ncol_destroy = N;
 660         comm->new_comm    = 0;
 661         comm->split       = 0;
 662
 663         comm_groups = (int*)tMPI_Malloc(N*N*sizeof(int));
 664         if (N > MAX_PREALLOC_THREADS)
 665         {
 666             comm_color = (int*)tMPI_Malloc(N*sizeof(int));
 667             comm_N     = (int*)tMPI_Malloc(N*sizeof(int));
 668         }
 669
 670         /* count colors, allocate and split up communicators */
 671         tMPI_Split_colors(N, (int*)spl->colors,
 672                           (int*)spl->keys,
 673                           &Ncomms,
 674                           comm_N, comm_color, comm_groups);
 675
 676
 677         /* allocate a bunch of communicators */
 678         comms = (tMPI_Comm*)tMPI_Malloc(Ncomms*sizeof(tMPI_Comm));
 679         for (i = 0; i < Ncomms; i++)
 680         {
 681             ret = tMPI_Comm_alloc(&(comms[i]), comm, comm_N[i]);
 682             if (ret != TMPI_SUCCESS)
 683             {
 684                 return ret;
 685             }
 686         }
 687
 688         /* now distribute the comms */
 689         for (i = 0; i < Ncomms; i++)
 690         {
 691             comms[i]->grp.N = comm_N[i];
 692             for (j = 0; j < comm_N[i]; j++)
 693             {
 694                 comms[i]->grp.peers[j] =
 695                     comm->grp.peers[comm_groups[i*comm->grp.N + j]];
 696             }
 697         }
 698         /* and put them into the newcomm_list */
 699         for (i = 0; i < N; i++)
 700         {
 701             newcomm_list[i] = TMPI_COMM_NULL;
 702             for (j = 0; j < Ncomms; j++)
 703             {
 704                 if (spl->colors[i] == comm_color[j])
 705                 {
 706                     newcomm_list[i] = comms[j];
 707                     break;
 708                 }
 709             }
 710         }
 711
 712 #ifdef TMPI_DEBUG
 713         /* output */
 714         for (i = 0; i < Ncomms; i++)
 715         {
 716             printf("Group %d (color %d) has %d members: ",
 717                    i, comm_color[i], comm_N[i]);
 718             for (j = 0; j < comm_N[i]; j++)
 719             {
 720                 printf(" %d ", comm_groups[comm->grp.N*i + j]);
 721             }
 722
 723             printf(" rank: ");
 724             for (j = 0; j < comm_N[i]; j++)
 725             {
 726                 printf(" %d ", spl->keys[comm_groups[N*i + j]]);
 727             }
 728             printf(" color: ");
 729             for (j = 0; j < comm_N[i]; j++)
 730             {
 731                 printf(" %d ", spl->colors[comm_groups[N*i + j]]);
 732             }
 733             printf("\n");
 734         }
 735 #endif
 736         if (N > MAX_PREALLOC_THREADS)
 737         {
 738             free((int*)spl->colors);
 739             free((int*)spl->keys);
 740             free(comm_color);
 741             free(comm_N);
 742         }
 743         free(comm_groups);
 744         free(comms);
 745         spl->can_finish = TRUE;
 746
 747         /* tell the waiting threads that there's a comm ready */
 748         ret = tMPI_Thread_cond_broadcast(&(comm->comm_create_finish));
 749         if (ret != 0)
 750         {
 751             return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 752         }
 753     }
 754     /* here the individual threads get their comm object */
 755     *newcomm = newcomm_list[myrank];
 756
 757     /* free when we have assigned them all, so we can reuse the object*/
 758     spl->Ncol_destroy--;
 759     if (spl->Ncol_destroy == 0)
 760     {
 761         free((void*)newcomm_list);
 762         free(spl);
 763     }
 764
 765     ret = tMPI_Thread_mutex_unlock(&(comm->comm_create_lock));
 766     if (ret != 0)
 767     {
 768         return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_IO);
 769     }
 770
 771     return TMPI_SUCCESS;
 772 }
 773
 774 int tMPI_Comm_seek_rank(tMPI_Comm comm, struct tmpi_thread *th)
 775 {
 776     int i;
 777     if (!comm)
 778     {
 779         return -1;
 780     }
 781
 782     for (i = 0; i < comm->grp.N; i++)
 783     {
 784         if (comm->grp.peers[i] == th)
 785         {
 786             return i;
 787         }
 788     }
 789     return -1;
 790 }