2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009, Sander Pronk, Erik Lindahl.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
43 * \brief Partial implementation of MPI using only threads.
45 * See the MPI specification at
46 * http://www.mpi-forum.org/docs/docs.html
47 * for an explanation of what these functions do.
49 * Because this is a thread-based library, be very careful with global
50 * variables and static variables in functions: they will be shared across
51 * all threads and lead to conflicts if not properly mutex-ed or barrier-ed
54 * \sa http://www.mpi-forum.org/docs/docs.html for MPI documentation.
58 /* for size_t, include stddef.h - which is in C89. This is done
59 regardless of whether we're compiling C++ or C code because the base
60 library for this is in C. */
63 #include "visibility.h"
70 } /* Avoids screwing up auto-indentation */
77 Use this to check for thread_mpi with the preprocessor. */
81 /** tMPI initialization thread affinity strategy.
83 Used in the tMPI_Init_affinity() and tMPI_Init_fn_affinity() functions,
84 to control how affinity is set. The default tMPI_Init() and tMPI_Init_fn()
85 functions use the TMPI_AFFINITY_ALL_CORES strategy.
87 These strategies are fairly basic. For more flexibility, use the
88 tMPI_Set_affinity() function.*/
91 TMPI_AFFINITY_NONE = 0, /**< Do not set any thread affinity */
92 TMPI_AFFINITY_ALL_CORES, /**< Only set affinity if the number of threads
93 is equal to the number of hardware threads
94 (cores + hyperthreads). This is the only
95 safe way to set thread affinity,
96 without clashes between multiple
97 instances of the same program. */
98 } tMPI_Affinity_strategy;
102 /** tMPI Communicator
104 Holds the group of processes to communicate
105 with, and defines the scope for global operations such as broadcast. */
106 typedef struct tmpi_comm_ *tMPI_Comm;
110 The group structure. Contains a list of threads. */
111 typedef struct tmpi_group_ *tMPI_Group;
115 Request structure for holding data about non-blocking transfers. */
116 typedef struct tmpi_req_ *tMPI_Request;
121 tMPI data type structure. Holds info about datatypes. */
122 typedef struct tmpi_datatype_ *tMPI_Datatype;
125 /*! \name tMPI Data types
126 These are MPI data types as specified by the MPI standard.
127 Note that not all are available. */
130 extern const tMPI_Datatype TMPI_CHAR; /**< char */
132 extern const tMPI_Datatype TMPI_SHORT; /**< short */
134 extern const tMPI_Datatype TMPI_INT; /**< int */
136 extern const tMPI_Datatype TMPI_LONG; /**< long */
137 #ifdef SIZEOF_LONG_LONG_INT
139 extern const tMPI_Datatype TMPI_LONG_LONG; /**< long long */
141 extern const tMPI_Datatype TMPI_LONG_LONG_INT; /**< long long int */
144 extern const tMPI_Datatype TMPI_SIGNED_CHAR; /**< signed char */
146 extern const tMPI_Datatype TMPI_UNSIGNED_CHAR; /**< unsigned char */
148 extern const tMPI_Datatype TMPI_UNSIGNED_SHORT; /**< unsigned short */
150 extern const tMPI_Datatype TMPI_UNSIGNED; /**< unsigned int */
152 extern const tMPI_Datatype TMPI_UNSIGNED_LONG; /**< unsigned long */
153 #ifdef SIZEOF_LONG_LONG_INT
155 extern const tMPI_Datatype TMPI_UNSIGNED_LONG_LONG; /**< unsigned long long */
158 extern const tMPI_Datatype TMPI_FLOAT; /**< float */
160 extern const tMPI_Datatype TMPI_DOUBLE; /**< double */
162 extern const tMPI_Datatype TMPI_LONG_DOUBLE; /**< long double */
163 /*extern tMPI_Datatype tMPI_UNSIGNED_WCHAR */
165 extern const tMPI_Datatype TMPI_BYTE; /**< byte (for binary
168 extern const tMPI_Datatype TMPI_POINTER; /**< pointer (thread_mpi
172 extern const tMPI_Datatype TMPI_INT64_T; /**< int64_t */
180 TMPI_SUCCESS = 0, /*!< No error */
181 TMPI_ERR_NO_MEM, /*!< Out of memory */
182 TMPI_ERR_IO, /*!< I/O Error (used for system errors) */
183 TMPI_ERR_INIT, /*!< Initialization error */
184 TMPI_ERR_FINALIZE, /*!< Finalize error */
185 TMPI_ERR_GROUP, /*!< Group error */
186 TMPI_ERR_COMM, /*!< Comm error */
187 TMPI_ERR_STATUS, /*!< Status error */
188 TMPI_ERR_GROUP_RANK, /*!< Group rank error */
189 TMPI_ERR_DIMS, /*!< Invalid topology dimensions */
190 TMPI_ERR_COORDS, /*!< Invalid topology coordinates */
191 TMPI_ERR_CART_CREATE_NPROCS, /*!< Not enough processes for topology*/
192 TMPI_ERR_XFER_COUNTERPART, /*!< Invalid counterpart for xfer */
193 TMPI_ERR_XFER_BUFSIZE, /*!< buffer size too small*/
194 TMPI_ERR_XFER_BUF_OVERLAP, /*!< buffer overlaps (thread error?)*/
195 TMPI_ERR_SEND_DEST, /*!< Faulty send destination */
196 TMPI_ERR_RECV_SRC, /*!< Faulty receive source */
197 TMPI_ERR_BUF, /*!< Invalid buffer */
198 TMPI_ERR_MULTI_MISMATCH, /*!< Comm not the same in collective call*/
199 TMPI_ERR_OP_FN, /*!< Invalid reduce operator*/
200 TMPI_ERR_ENVELOPES, /*!< out of envelopes (tMPI internal) */
201 TMPI_ERR_REQUESTS, /*!< out of requests (tMPI internal) */
202 TMPI_ERR_COPY_NBUFFERS, /*!< out of copy buffers (tMPI internal)*/
203 TMPI_ERR_COPY_BUFFER_SIZE, /*!< copy buffer size err (tMPI internal)*/
204 TMPI_ERR_IN_STATUS, /*!< error code in tMPI_Status */
205 TMPI_ERR_PROCNR, /*!< Hardware processor number (such as for
206 thread affinity) error */
207 TMPI_FAILURE, /*!< Transmission failure */
208 TMPI_ERR_UNKNOWN, /*!< Unknown error */
209 N_TMPI_ERR /* this must be the last one */
212 /** Maximum length of error string for tMPI_Error_string() */
213 #define TMPI_MAX_ERROR_STRING 256
215 /** default code for undefined value,
217 For example for undefined color in tMPI_Split(). */
218 #define TMPI_UNDEFINED -1
220 /** error handler function */
221 typedef void (*tMPI_Errhandler_fn)(tMPI_Comm*, int*);
222 /** error handler object */
223 typedef struct tmpi_errhandler_ *tMPI_Errhandler;
225 /** pre-defined error handler that abort()s on every error */
226 extern tMPI_Errhandler TMPI_ERRORS_ARE_FATAL;
227 /** pre-defined error handler that tries to continue on every error */
228 extern tMPI_Errhandler TMPI_ERRORS_RETURN;
230 /*! \name tMPI_Comm_compare() return codes */
232 /** Identical comms*/
234 /** Comms with the same members in the same order*/
235 #define TMPI_CONGRUENT 1
236 /** Comms with the same members in the different order*/
237 #define TMPI_SIMILAR 2
238 /** Comms with the different members */
239 #define TMPI_UNEQUAL 3
243 /** Source number wildcard so tMPI_Recv(), etc. can receive from
245 #define TMPI_ANY_SOURCE -1
246 /** Tag number wildcard so tMPI_Recv(), etc. can receive messages with
248 #define TMPI_ANY_TAG -1
250 /** Return code for Cartesian topology with tMPI_Topo_test(). */
252 /** Return code for graph topology with tMPI_Topo_test(). */
256 /** Pre-initialized communicator with all available threads. */
258 extern tMPI_Comm TMPI_COMM_WORLD;
261 /** A pre-defined NULL communicator to compare against, to check comm
263 #define TMPI_COMM_NULL NULL
264 /** A pre-defined NULL group to compare against, to check group
266 #define TMPI_GROUP_NULL NULL
268 /** the empty group */
269 extern tMPI_Group TMPI_GROUP_EMPTY;
272 /** The maximum processor name returned using tMPI_Get_processor_name(). */
273 #define TMPI_MAX_PROCESSOR_NAME 128
276 /** Used as NULL status for tMPI_Recv(), etc. */
277 #define TMPI_STATUS_IGNORE NULL
278 /** Used as NULL status list for tMPI_Waitall(), etc. */
279 #define TMPI_STATUSES_IGNORE NULL
283 Holds status info (tag, sender, amount of data transmitted) for receives.
284 The status object is user-maintained. */
285 typedef struct tmpi_status_
287 int TMPI_SOURCE; /**< Message source rank. */
288 int TMPI_TAG; /**< Message source tag. */
289 int TMPI_ERROR; /**< Message error. */
290 size_t transferred; /**< Number of transferred bytes */
291 int cancelled; /**< Whether the transmission was canceled */
293 /*typedef struct tmpi_status_ tMPI_Status;*/
296 #define TMPI_REQUEST_NULL NULL
298 /** collective communication special to signify that the send
299 buffer is to function as receive buffer.
301 Used, for example in tMPI_Reduce. */
302 #define TMPI_IN_PLACE NULL
305 /** tMPI_Reduce operators.
307 These all work (except obviously bad combinations like bitwise
308 and/or/xor on floats, etc): */
311 TMPI_MAX, /**< calculate maximum value */
312 TMPI_MIN, /**< calculate minimum value */
313 TMPI_SUM, /**< calculate sum */
314 TMPI_PROD, /**< calculate product */
315 TMPI_LAND, /**< calculate logical and */
316 TMPI_BAND, /**< calculate binary and */
317 TMPI_LOR, /**< calculate logical or */
318 TMPI_BOR, /**< calculate binary or */
319 TMPI_LXOR, /**< calculate logical xor */
320 TMPI_BXOR /**< calculate binary xor */
324 /* function to obtain tMPI_COMM_SELF */
325 tMPI_Comm tMPI_Get_comm_self(void);
327 /** The thread-specific comm containing only the thread itself.
330 \return the self comm object associated with the thread. */
331 #define TMPI_COMM_SELF (tMPI_Get_comm_self())
341 /*! \name Initialization and exit functions
343 /** Traditional MPI initializer; spawns threads that start at the given
346 Seeks the argument '-nt n', where n is the number of
347 threads that will be created. If n==0, the number of threads will
348 be the recommended number of threads for this platform as obtained
349 from tMPI_Get_recommended_ntreads().
351 The new threads then run the function start_function, with the original
352 argc and argv. This function could be main(), or any other function;
353 calling this function again - whether from the started threads or from
354 the main thread - has no effect.
356 On platforms that support thread affinity setting, this function will
357 use the 'all-cores' affinity strategy: it will only set thread affinity
358 if the number of threads is equal to the number of hardware threads
359 (cores + hyperthreads).
361 \param[in] argc argc of original main() invocation, or NULL
362 \param[in] argv argv of original main() invocation, or NULL.
363 \param[in] start_function Starting function of type
364 int start_function(int argc, char *argv[]);
366 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
368 int tMPI_Init(int *argc, char ***argv,
369 int (*start_function)(int, char**));
372 /** Generic init function thread MPI intializer and thread spawner.
374 Creates N threads (including main thread)
375 that run the function start_function, which takes a void* argument,
376 given by arg. The function start_function also gets called by the main
377 thread. When the function start_function returns it, will behave
378 as if tMPI_Finalize is called, and if it's a sub-thread it will
381 If N==0, the number of threads will be the recommended number of
382 threads for this platform as obtained from tMPI_Get_recommended_ntreads().
384 Note that thread affinity strategy only has an effect when this is
385 supported by the underlying platform. As of yet (2012), this is not the
386 case for Mac OS X, for example.
388 \param[in] main_thread_returns whether the control in the main thread
389 should return immediately (if true), or
390 the start_function() should be called
391 from the main thread, too (if false).
392 \param[in] N The number of threads to start (or 0 to
393 automatically determine this).
394 \param[in] aff_strategy The thread affinity strategy to use.
395 \param[in] start_function The function to start threads at
396 (including main thread if
397 main_thread_returns).
398 \param[in] arg An optional argument for start_function().
400 \return TMPI_FAILURE on failure, TMPI_SUCCESS on succes (after all
401 threads have finished if main_thread_returns=true). */
403 int tMPI_Init_fn(int main_thread_returns, int N,
404 tMPI_Affinity_strategy aff_strategy,
405 void (*start_function)(void*), void *arg);
411 /** get the number of threads from the command line
413 can be called before tMPI_Init()
415 \param[in] argc argc from main()
416 \param[in] argv argv from main()
417 \param[in] optname name of the argument specifying the
418 number of threads to run. If this is
419 NULL, this function will read the first
420 argument and interpret it as the number
422 \param[out] nthreads the number of threads
424 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
426 int tMPI_Get_N(int *argc, char ***argv, const char *optname, int *nthreads);
430 /** Waits for all other threads to finish and cleans up
432 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
434 int tMPI_Finalize(void);
437 /** Just kills all threads.
439 Not really neccesary because exit() would do that for us anyway.
441 \param[in] comm Comm to kill threads for
442 \param[in] errorcode Error code to exit with
444 \return Never returns. */
446 int tMPI_Abort(tMPI_Comm comm, int errorcode);
448 /** whether tMPI_Init, but not yet tMPI_Finalize, has been run
450 \param[out] flag Set to TRUE if tMPI_Init() has been called,
453 \return always returns TMPI_SUCCESS. */
455 int tMPI_Initialized(int *flag);
457 /** Determine whether tMPI_Finalize has been run.
459 \param[out] flag Set to TRUE if tMPI_Finalize() has been
460 called, FALSE if not.
462 \return always returns TMPI_SUCCESS. */
464 int tMPI_Finalized(int *flag);
475 /*! \name Error handling functions
477 /** Create an error handler object from a function.
479 \param[in] function The function to make an error handler of.
480 \param[out] errhandler The error handler.
482 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
484 int tMPI_Create_errhandler(tMPI_Errhandler_fn *function,
485 tMPI_Errhandler *errhandler);
488 /** Free the error handler object.
490 \param[in] errhandler The error handler.
491 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
493 int tMPI_Errhandler_free(tMPI_Errhandler *errhandler);
495 /** Set the error handler.
497 \param[in] comm the communicator to set the error handler for.
498 \param[in] errhandler the error handler.
500 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
502 int tMPI_Comm_set_errhandler(tMPI_Comm comm, tMPI_Errhandler errhandler);
504 /** get the error handler.
506 Gets the error handler associated with a comm
508 \param[in] comm the communicator to get the error handler for.
509 \param[out] errhandler the error handler.
511 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
513 int tMPI_Comm_get_errhandler(tMPI_Comm comm, tMPI_Errhandler *errhandler);
515 /** get the error string associated with an error code.
517 The length of the error string will never exceed TMPI_MAX_ERROR_STRING.
519 \param[in] errorcode The error code.
520 \param[out] string The pre-allocated char pointer to output to.
521 \param[out] resultlen The length of the error string. Will
522 never be longer than TMPI_MAX_ERROR_STRING.
524 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
526 int tMPI_Error_string(int errorcode, char *string, size_t *resultlen);
536 /*! \name Environment query functions
538 /** returns string with thread number.
540 \param[out] name Pre-allocated string to output name to (will not
541 be longer than TMPI_MAX_PROCESSOR_NAME).
542 \param[out] resultlen The length of the output. Note that this is an
543 int instead of a size_t because the MPI standard
544 for some reason defines all sizes as int
546 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
548 int tMPI_Get_processor_name(char *name, int *resultlen);
550 /** get a time value as a double, in seconds.
555 double tMPI_Wtime(void);
556 /** get the resolution of tMPI_Wtime as a double, in seconds
558 \return time resolution. */
560 double tMPI_Wtick(void);
563 #define tMPI_This_threadnr() (int)(tMPI_Get_current() - threads)
565 /** Get the thread number of this thread.
566 Mostly for debugging.
568 \return the global thread number. */
569 int tMPI_This_Threadnr(void);
582 /*! \name tMPI_Group functions
584 /** Get the size (number of members) of a group.
586 \param[in] group The group.
587 \param[out] size Size.
588 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
590 int tMPI_Group_size(tMPI_Group group, int *size);
592 /** Get the rank of a thread in a group
594 \param[in] group The group.
595 \param[out] rank Variable for the rank, or TMPI_UNDEFINED
597 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
599 int tMPI_Group_rank(tMPI_Group group, int *rank);
601 /** Create a new group as a the collection of threads with given ranks.
603 \param[in] group The group from which the ranks are taken.
604 \param[in] n The number of new group members.
605 \param[in] ranks The ranks of the threads to add to the new group.
606 \param[out] newgroup The new group.
608 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
610 int tMPI_Group_incl(tMPI_Group group, int n, int *ranks, tMPI_Group *newgroup);
612 /** Get a pointer to the group in the comm.
614 \param[in] comm The comm from which to take the group.
615 \param[out] group The comm's group.
617 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
619 int tMPI_Comm_group(tMPI_Comm comm, tMPI_Group *group);
621 /** De-allocate a group
623 \param[in] group The group to de-allocate.
624 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
626 int tMPI_Group_free(tMPI_Group *group);
635 /*! \name tMPI_Comm functions
637 /** Get the comm size (nr. of threads).
639 \param[in] comm The comm to query.
640 \param[out] size The comm size.
641 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
643 int tMPI_Comm_size(tMPI_Comm comm, int *size);
645 /** get the rank in comm of the current process
647 \param[in] comm The comm to query.
648 \param[out] rank Thread rank in comm.
649 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
651 int tMPI_Comm_rank(tMPI_Comm comm, int *rank);
653 /** Compare two comms. Returns TMPI_IDENT if the two comms point to
654 the same underlying comm structure, TMPI_CONGRUENT if all
655 members appear in the both comms in the same order, TMPI_SIMILAR
656 if both comms have the smae members but not in the same order, or
657 TMPI_UNEQUAL if the comms have different members.
659 \param[in] comm1 The first comm to compare.
660 \param[in] comm2 The second comm to compare.
661 \param[out] result The output result, one of the values
663 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
665 int tMPI_Comm_compare(tMPI_Comm comm1, tMPI_Comm comm2, int *result);
668 /** De-allocate a comm
672 \param[in] comm The comm to free.
673 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
675 int tMPI_Comm_free(tMPI_Comm *comm);
677 /** Create a comm based on group membership.
679 Collective function that creates a new comm containing only proceses
680 that are members of the given group.
682 \param[in] comm The originating comm.
683 \param[in] group The group of threads to create a comm from.
684 \param[out] newcomm The new comm.
686 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
688 int tMPI_Comm_create(tMPI_Comm comm, tMPI_Group group, tMPI_Comm *newcomm);
690 /** Split up a group into same-colored sub-groups ordered by key.
692 This is the main comm creation function: it's a collective call that takes
693 a color and a key from each process, and arranges all threads that
694 call tMPI_Split() withe the same color together into a comm.
696 The rank in the new group will be based on the value given in key.
698 Passing TMPI_UNDEFINED as a color will result in the thread not being
699 part of any group, and getting TMPI_COMM_NULL back in newcomm.
701 \param[in] comm The originating comm.
702 \param[in] color This thread's color (determines which comm it will
703 be in). Giving TMPI_UNDEFINED will result in
704 this thread not being in any group.
705 \param[in] key This thread's key (determines rank).
706 \param[out] newcomm The new comm.
707 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
709 int tMPI_Comm_split(tMPI_Comm comm, int color, int key, tMPI_Comm *newcomm);
711 /** Make a duplicate of a comm.
715 \param[in] comm The originating comm.
716 \param[in] newcomm The new comm.
717 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
719 int tMPI_Comm_dup(tMPI_Comm comm, tMPI_Comm *newcomm);
729 /*! \name Topology functions
731 /* topology functions */
732 /** Check what type of topology the comm has.
734 \param[in] comm The comm to query
735 \param[out] status The type of topology.
737 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
739 int tMPI_Topo_test(tMPI_Comm comm, int *status);
741 /** Get the dimensionality of a comm with a topology.
743 \param[in] comm The comm to query.
744 \param[out] ndims The number of dimensions.
746 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
749 int tMPI_Cartdim_get(tMPI_Comm comm, int *ndims);
750 /** Get the size and pbc a of a comm with a Cartesian topology has.
752 \param[in] comm The comm to query.
753 \param[in] maxdims The maximum number of dimensions in the periods
754 and coords parameter.
755 \param[out] dims The number of dimensions.
756 \param[out] periods The periodicity in each dimension.
757 \param[out] coords The number of coordinates in each dimension.
759 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
762 int tMPI_Cart_get(tMPI_Comm comm, int maxdims, int *dims, int *periods,
766 /** Get rank that a specific set of process coordinates has in
767 a Cartesian topology.
769 \param[in] comm The comm to query.
770 \param[in] coords The coordinates in each dimension.
771 \param[out] rank The rank associated with the coordinates.
773 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
775 int tMPI_Cart_rank(tMPI_Comm comm, int *coords, int *rank);
777 /** Get coordinates of a process rank in a Cartesian topology.
779 \param[in] comm The comm to query.
780 \param[in] rank The rank associated with the coordinates.
781 \param[in] maxdims The maximum number of dimensions in the coords
783 \param[out] coords The coordinates in each dimension.
785 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
787 int tMPI_Cart_coords(tMPI_Comm comm, int rank, int maxdims, int *coords);
789 /** Get optimal rank this process would have in a Cartesian topology.
791 \param[in] comm The comm to query.
792 \param[in] ndims The number of dimensions.
793 \param[in] dims The size in each dimension.
794 \param[in] periods The periodicity in each dimension.
796 \param[out] newrank The rank the thread would have given the topology.
798 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
800 int tMPI_Cart_map(tMPI_Comm comm, int ndims, int *dims, int *periods,
803 /** Create a comm with a Cartesian topology.
805 \param[in] comm_old The originating comm.
806 \param[in] ndims The number of dimensions.
807 \param[in] dims The size in each dimension.
808 \param[in] periods The periodicity in each dimension.
809 \param[in] reorder Whether to allow reordering of the threads
810 according to tMPI_Cart_map().
811 \param[out] comm_cart The new comm with Cartesian topology.
813 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
815 int tMPI_Cart_create(tMPI_Comm comm_old, int ndims, int *dims, int *periods,
816 int reorder, tMPI_Comm *comm_cart);
818 /** Create a comms that are sub-spaces of the Cartesian topology communicator.
819 Works like a MPI_Comm_split() for the Cartesian dimensions specified
820 as false in remain_dims.
822 \param[in] comm The originating comm with Cartesian topology.
823 \param[in] remain_dims An Boolean array that decides whether a specific
824 dimensionality should remain in newcomm (if true),
825 or should be split up (if false).
826 \param[out] newcomm The new split communicator
828 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
830 int tMPI_Cart_sub(tMPI_Comm comm, int *remain_dims, tMPI_Comm *newcomm);
841 /*! \name Data type manipulation functions
843 /** Create a contiguous data type (the only type possible right now).
845 Creates a datatype that is a vector of oldtype.
847 \param[in] count The number of oldtype types in the new type.
848 \param[in] oldtype The old data type.
849 \param[out] newtype The new data type (still needs to be committed).
850 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
852 int tMPI_Type_contiguous(int count, tMPI_Datatype oldtype,
853 tMPI_Datatype *newtype);
856 /** Make a data type ready for use.
858 \param[in,out] datatype The new datatype.
859 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
861 int tMPI_Type_commit(tMPI_Datatype *datatype);
871 /*! \name Point-to-point communication functions
874 /* blocking transfers. The actual transfer (copy) is done on the receiving end
875 (so that the receiver's cache already contains the data that it presumably
877 /** Send message; blocks until buf is reusable.
879 \param[in] buf The buffer with data to send.
880 \param[in] count The number of items to send.
881 \param[in] datatype The data type of the items in buf.
882 \param[in] dest The rank of the destination thread.
883 \param[in] tag The message tag.
884 \param[in] comm The shared communicator.
885 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
887 int tMPI_Send(void* buf, int count, tMPI_Datatype datatype, int dest,
888 int tag, tMPI_Comm comm);
890 /** Receive message; blocks until buf is filled.
892 \param[out] buf The buffer for data to receive.
893 \param[in] count The maximum number of items to receive.
894 \param[in] datatype The data type of the items in buf.
895 \param[in] source The rank of the source thread (or TMPI_ANY_SOURCE).
896 \param[in] tag The message tag (or TMPI_ANY_TAG).
897 \param[in] comm The shared communicator.
898 \param[out] status The message status.
899 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
901 int tMPI_Recv(void* buf, int count, tMPI_Datatype datatype, int source,
902 int tag, tMPI_Comm comm, tMPI_Status *status);
904 /** Send & receive message at the same time.
906 Blocks until recvbuf is filled, and sendbuf is ready for reuse.
908 \param[in] sendbuf The buffer with data to send.
909 \param[in] sendcount The number of items to send.
910 \param[in] sendtype The data type of the items in send buf.
911 \param[in] dest The rank of the destination thread.
912 \param[in] sendtag The send message tag.
913 \param[out] recvbuf The buffer for data to receive.
914 \param[in] recvcount The maximum number of items to receive.
915 \param[in] recvtype The data type of the items in recvbuf.
916 \param[in] source The rank of the source thread (or TMPI_ANY_SOURCE).
917 \param[in] recvtag The recveive message tag (or TMPI_ANY_TAG).
918 \param[in] comm The shared communicator.
919 \param[out] status The received message status.
920 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
922 int tMPI_Sendrecv(void *sendbuf, int sendcount, tMPI_Datatype sendtype,
923 int dest, int sendtag, void *recvbuf, int recvcount,
924 tMPI_Datatype recvtype, int source, int recvtag,
925 tMPI_Comm comm, tMPI_Status *status);
927 /* async send/recv. The actual transfer is done on the receiving
928 end, during tMPI_Wait, tMPI_Waitall or tMPI_Test. For tMPI_Waitall,
929 the incoming messages are processed in the order they come in. */
931 /** Initiate sending a message, non-blocking.
933 This makes the buffer available to be received. The contents of buf
934 should not be touched before the transmission is finished with
935 tMPI_Wait(), tMPI_Test() or tMPI_Waitall().
938 \param[in] buf The buffer with data to send.
939 \param[in] count The number of items to send.
940 \param[in] datatype The data type of the items in buf.
941 \param[in] dest The rank of the destination thread.
942 \param[in] tag The message tag.
943 \param[in] comm The shared communicator.
944 \param[out] request The request object that can be used in tMPI_Wait(),
946 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
948 int tMPI_Isend(void* buf, int count, tMPI_Datatype datatype, int dest,
949 int tag, tMPI_Comm comm, tMPI_Request *request);
951 /** Initiate receiving a message.
953 This makes the buffer available to be filled with data. The contents of
954 buf should not be relied on before the transmission is finished with
955 tMPI_Wait(), tMPI_Test() or tMPI_Waitall().
957 \param[out] buf The buffer for data to receive.
958 \param[in] count The maximum number of items to receive.
959 \param[in] datatype The data type of the items in buf.
960 \param[in] source The rank of the source thread (or TMPI_ANY_SOURCE).
961 \param[in] tag The message tag (or TMPI_ANY_TAG).
962 \param[in] comm The shared communicator.
963 \param[out] request The request object that can be used in tMPI_Wait(),
965 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
967 int tMPI_Irecv(void* buf, int count, tMPI_Datatype datatype, int source,
968 int tag, tMPI_Comm comm, tMPI_Request *request);
973 /** Test whether a message is transferred.
975 \param[in,out] request The request obtained wit tMPI_Isend()/tMPI_Irecv().
976 \param[out] flag A flag set to TRUE(1) if the request is finished,
978 \param[out] status Message status (can be set to TMPI_STATUS_IGNORE).
980 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
982 int tMPI_Test(tMPI_Request *request, int *flag, tMPI_Status *status);
984 /** Wait until a message is transferred.
986 \param[in,out] request The request obtained wit tMPI_Isend()/tMPI_Irecv().
987 \param[out] status Message status.
989 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
991 int tMPI_Wait(tMPI_Request *request, tMPI_Status *status);
996 /** Wait until several messages are transferred.
998 \param[in] count The number of requests
999 \param[in,out] array_of_requests List of count requests obtained with
1000 tMPI_Isend()/tMPI_Irecv().
1001 \param[out] array_of_statuses List of count message statuses (can
1002 be set to TMPI_STATUSES_IGNORE).
1004 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1006 int tMPI_Waitall(int count, tMPI_Request *array_of_requests,
1007 tMPI_Status *array_of_statuses);
1009 /** Test whether several messages are transferred.
1011 \param[in] count The number of requests
1012 \param[in,out] array_of_requests List of count requests obtained with
1013 tMPI_Isend()/tMPI_Irecv().
1014 \param[out] flag Whether all requests have completed.
1015 \param[out] array_of_statuses List of count message statuses (can
1016 be set to TMPI_STATUSES_IGNORE).
1018 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1020 int tMPI_Testall(int count, tMPI_Request *array_of_requests, int *flag,
1021 tMPI_Status *array_of_statuses);
1023 /** Wait until one of several messages is transferred.
1025 \param[in] count The number of requests
1026 \param[in,out] array_of_requests List of count requests obtained with
1027 tMPI_Isend()/tMPI_Irecv().
1028 \param[out] index Index of the request that has
1030 \param[out] status Pointer to tMPI_Status object
1031 associated with completed request.
1033 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1035 int tMPI_Waitany(int count, tMPI_Request *array_of_requests,
1036 int *index, tMPI_Status *status);
1038 /** Test whether one of several messages is transferred.
1040 \param[in] count The number of requests
1041 \param[in,out] array_of_requests List of count requests obtained with
1042 tMPI_Isend()/tMPI_Irecv().
1043 \param[out] index Index of the request that has
1045 \param[out] flag Whether any request has completed.
1046 \param[out] status Pointer to tMPI_Status object
1047 associated with completed request.
1049 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1051 int tMPI_Testany(int count, tMPI_Request *array_of_requests,
1052 int *index, int *flag, tMPI_Status *status);
1054 /** Wait until some of several messages are transferred. Waits until at least
1055 one message is transferred.
1057 \param[in] incount The number of requests
1058 \param[in,out] array_of_requests List of count requests obtained with
1059 tMPI_Isend()/tMPI_Irecv().
1060 \param[out] outcount Number of completed requests
1061 \param[out] array_of_indices Array of ints that gets filled with
1062 the indices of the completed requests.
1063 \param[out] array_of_statuses List of count message statuses (can
1064 be set to TMPI_STATUSES_IGNORE).
1066 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1068 int tMPI_Waitsome(int incount, tMPI_Request *array_of_requests,
1069 int *outcount, int *array_of_indices,
1070 tMPI_Status *array_of_statuses);
1072 /** Test whether some of several messages are transferred.
1074 \param[in] incount The number of requests
1075 \param[in,out] array_of_requests List of count requests obtained with
1076 tMPI_Isend()/tMPI_Irecv().
1077 \param[out] outcount Number of completed requests
1078 \param[out] array_of_indices Array of ints that gets filled with
1079 the indices of the completed requests.
1080 \param[out] array_of_statuses List of count message statuses (can
1081 be set to TMPI_STATUSES_IGNORE).
1083 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1085 int tMPI_Testsome(int incount, tMPI_Request *array_of_requests,
1086 int *outcount, int *array_of_indices,
1087 tMPI_Status *array_of_statuses);
1094 /** get the number of actually transferred items from a receive
1097 \param[in] status The status.
1098 \param[in] datatype The data type which was received.
1099 \param[out] count The number of items actually received.
1101 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1103 int tMPI_Get_count(tMPI_Status *status, tMPI_Datatype datatype, int *count);
1113 /*! \name Synchronization functions
1115 /** Block until all threads in the comm call this function.
1117 \param[in] comm The comm object.
1119 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1121 int tMPI_Barrier(tMPI_Comm comm);
1130 /*! \name Multicast communication functions
1132 /** Broadcast from one thread to all others in comm.
1134 Collective function; data is transferred from root's buffer to all others'
1137 \param[in,out] buffer The buffer to send from (root)/receive from
1139 \param[in] count The number of items to send/receive.
1140 \param[in] datatype The type of the items to send/receive.
1141 \param[in] root The rank of the sending thread.
1142 \param[in] comm The communicator.
1144 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1146 int tMPI_Bcast(void* buffer, int count, tMPI_Datatype datatype, int root,
1149 /** Gather data from all threads in comm to root.
1151 Collective function; assumes that all data is received in blocks of
1154 \param[in] sendbuf The send buffer for all threads (root may
1155 specify TMPI_IN_PLACE, in which case it
1156 transfers nothing to itself).
1157 \param[in] sendcount The number of items to send for all threads.
1158 \param[in] sendtype The type of the items to send.
1159 \param[out] recvbuf The receiving buffer (for root thread).
1160 \param[in] recvcount The number of items to receive (for root).
1161 \param[in] recvtype The type of the items to receive (for root).
1162 \param[in] root The rank of root.
1163 \param[in] comm The communicator.
1165 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1167 int tMPI_Gather(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
1168 void* recvbuf, int recvcount, tMPI_Datatype recvtype, int root,
1172 /** Gather irregularly laid out data from all processes in comm to root.
1174 Collective function.
1176 \param[in] sendbuf The send buffer for all threads (root may
1177 specify TMPI_IN_PLACE, in which case it
1178 transfers nothing to itself).
1179 \param[in] sendcount The number of items to send for all threads.
1180 \param[in] sendtype The type of the items to send.
1181 \param[out] recvbuf The receiving buffer (for root thread).
1182 \param[in] recvcounts The list of number of items to receive (for
1184 \param[in] displs The list of displacements in recvbuf to
1185 receive data in (for root).
1186 \param[in] recvtype The type of the items to receive (for root).
1187 \param[in] root The rank of root.
1188 \param[in] comm The communicator.
1190 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1192 int tMPI_Gatherv(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
1193 void* recvbuf, int *recvcounts, int *displs,
1194 tMPI_Datatype recvtype, int root, tMPI_Comm comm);
1197 /** Spread parts of sendbuf to all processes in comm from root.
1199 Collective function.
1201 \param[in] sendbuf The send buffer for root.
1202 \param[in] sendcount The number of items for root to send to each
1204 \param[in] sendtype The type of the items root sends.
1205 \param[out] recvbuf The receiving buffer for all receiving threads
1206 (root may specify TMPI_IN_PLACE, in which case
1207 it transmits nothing to itself).
1208 \param[in] recvcount The number of items recvbuf can receive.
1209 \param[in] recvtype The type of items to receive.
1210 \param[in] root The rank of root.
1211 \param[in] comm The communicator.
1213 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1215 int tMPI_Scatter(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
1216 void* recvbuf, int recvcount, tMPI_Datatype recvtype, int root,
1219 /** Spread irregularly laid out parts of sendbuf to all processes
1222 Collective function.
1224 \param[in] sendbuf The send buffer for root.
1225 \param[in] sendcounts List of the number of items for root to send
1227 \param[in] displs List of displacements in sendbuf from which
1228 to start transmission to each thread.
1229 \param[in] sendtype The type of the items root sends.
1230 \param[out] recvbuf The receiving buffer for all receiving threads
1231 (root may specify TMPI_IN_PLACE, in which case
1232 it transmits nothing to itself).
1233 \param[in] recvcount The number of items recvbuf can receive.
1234 \param[in] recvtype The type of items to receive.
1235 \param[in] root The rank of root.
1236 \param[in] comm The communicator.
1238 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1240 int tMPI_Scatterv(void* sendbuf, int *sendcounts, int *displs,
1241 tMPI_Datatype sendtype, void* recvbuf, int recvcount,
1242 tMPI_Datatype recvtype, int root, tMPI_Comm comm);
1245 /** Spread out parts of sendbuf to all processes from all processes in
1248 Collective function.
1250 \param[in] sendbuf The send buffer.
1251 \param[in] sendcount The number of items for to send to each thread.
1252 \param[in] sendtype The type of the items to send.
1253 \param[out] recvbuf The receive buffer for all threads.
1254 \param[in] recvcount The number of items recvbuf can receive per
1256 \param[in] recvtype The type of items to receive.
1257 \param[in] comm The communicator.
1259 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1261 int tMPI_Alltoall(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
1262 void* recvbuf, int recvcount, tMPI_Datatype recvtype,
1266 /** Spread out irregularly laid out parts of sendbuf to all
1267 processes from all processes in comm.
1269 Collective function.
1271 \param[in] sendbuf The send buffer.
1272 \param[in] sendcounts List of the number of items for to send to
1274 \param[in] sdispls List of the displacements in sendbuf of items
1275 to send to each thread.
1276 \param[in] sendtype The type of the items to send.
1277 \param[out] recvbuf The receive buffer for all threads.
1278 \param[in] recvcounts List of the number of items recvbuf can
1279 receive from each thread.
1280 \param[in] rdispls List of the displacements in recvbuf of items
1281 to receive from each thread.
1282 \param[in] recvtype The type of items to receive.
1283 \param[in] comm The communicator.
1285 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1287 int tMPI_Alltoallv(void* sendbuf, int *sendcounts, int *sdispls,
1288 tMPI_Datatype sendtype, void* recvbuf, int *recvcounts,
1289 int *rdispls, tMPI_Datatype recvtype, tMPI_Comm comm);
1301 /*! \name Reduce functions
1303 /** Do an operation between all locally held buffers on all items in the
1304 buffers, and send the results to root.
1306 Collective function.
1308 \param[in] sendbuf The operand parameters. Root may specify
1309 TMPI_IN_PLACE, in which case recvbuf will hold
1310 the operand parameters.
1311 \param[out] recvbuf The result buffer at root.
1312 \param[in] count The number of items to do operation on.
1313 \param[in] datatype The data type of the items.
1314 \param[in] op The operation to perform.
1315 \param[in] root The root thread (which is to receive the results).
1316 \param[in] comm The communicator.
1318 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1320 int tMPI_Reduce(void* sendbuf, void* recvbuf, int count,
1321 tMPI_Datatype datatype, tMPI_Op op, int root, tMPI_Comm comm);
1325 /** Do an operation between all locally held buffers on all items in the
1326 buffers and broadcast the results.
1328 Collective function.
1331 \param[in] sendbuf The operand parameters. Any process may specify
1332 TMPI_IN_PLACE, in which case recvbuf will hold
1333 the operand parameters for that process.
1334 \param[in,out] recvbuf The result buffer.
1335 \param[in] count The number of items to do operation on.
1336 \param[in] datatype The data type of the items.
1337 \param[in] op The operation to perform.
1338 \param[in] comm The communicator.
1340 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1342 int tMPI_Allreduce(void* sendbuf, void* recvbuf, int count,
1343 tMPI_Datatype datatype, tMPI_Op op, tMPI_Comm comm);
1345 /** Do an tMPI_Reduce, but with the following assumption:
1346 recvbuf points to a valid buffer in all calling threads, or
1347 sendbuf has the value TMPI_IN_PLACE (in which case the values of
1348 sendbuf may be changed in that thread).
1350 This avoids unnecesary memory allocations associated with the normal
1353 Collective function.
1355 \param[in] sendbuf The operand parameters (or TMPI_IN_PLACE,
1356 in which case the operand parameters will
1358 \param[in,out] recvbuf The result buffer.
1359 \param[in] count The number of items to do operation on.
1360 \param[in] datatype The data type of the items.
1361 \param[in] op The operation to perform.
1362 \param[in] root The root thread (which is to receive
1364 \param[in] comm The communicator.
1366 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1368 int tMPI_Reduce_fast(void* sendbuf, void* recvbuf, int count,
1369 tMPI_Datatype datatype, tMPI_Op op, int root,
1372 /** Do a partial reduce operation, based on rank: the results of the
1373 reduction operation of ranks 0 - i will be put in the recvbuf of
1376 Collective function.
1378 \param[in] sendbuf The operand parameters. All ranks may specify
1379 TMPI_IN_PLACE, in which case recvbuf will hold
1380 the operand parameters.
1381 \param[in,out] recvbuf The result buffer.
1382 \param[in] count The number of items to do operation on.
1383 \param[in] datatype The data type of the items.
1384 \param[in] op The operation to perform.
1385 \param[in] comm The communicator.
1387 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1389 int tMPI_Scan(void* sendbuf, void* recvbuf, int count,
1390 tMPI_Datatype datatype, tMPI_Op op, tMPI_Comm comm);
1398 } /* closing extern "C" */
1401 #endif /* TMPI_TMPI_H_ */