2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009, Sander Pronk, Erik Lindahl.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
43 * \brief Partial implementation of MPI using only threads.
45 * See the MPI specification at
46 * http://www.mpi-forum.org/docs/docs.html
47 * for an explanation of what these functions do.
49 * Because this is a thread-based library, be very careful with global
50 * variables and static variables in functions: they will be shared across
51 * all threads and lead to conflicts if not properly mutex-ed or barrier-ed
54 * \sa http://www.mpi-forum.org/docs/docs.html for MPI documentation.
58 /* for size_t, include stddef.h - which is in C89. This is done
59 regardless of whether we're compiling C++ or C code because the base
60 library for this is in C. */
63 #include "visibility.h"
70 } /* Avoids screwing up auto-indentation */
77 Use this to check for thread_mpi with the preprocessor. */
81 /** tMPI initialization thread affinity strategy.
83 Used in the tMPI_Init_affinity() and tMPI_Init_fn_affinity() functions,
84 to control how affinity is set. The default tMPI_Init() and tMPI_Init_fn()
85 functions use the TMPI_AFFINITY_ALL_CORES strategy.
87 These strategies are fairly basic. For more flexibility, use the
88 tMPI_Set_affinity() function.*/
91 TMPI_AFFINITY_NONE = 0, /**< Do not set any thread affinity */
92 TMPI_AFFINITY_ALL_CORES, /**< Only set affinity if the number of threads
93 is equal to the number of hardware threads
94 (cores + hyperthreads). This is the only
95 safe way to set thread affinity,
96 without clashes between multiple
97 instances of the same program. */
98 } tMPI_Affinity_strategy;
102 /** tMPI Communicator
104 Holds the group of processes to communicate
105 with, and defines the scope for global operations such as broadcast. */
106 typedef struct tmpi_comm_ *tMPI_Comm;
110 The group structure. Contains a list of threads. */
111 typedef struct tmpi_group_ *tMPI_Group;
115 Request structure for holding data about non-blocking transfers. */
116 typedef struct tmpi_req_ *tMPI_Request;
121 tMPI data type structure. Holds info about datatypes. */
122 typedef struct tmpi_datatype_ *tMPI_Datatype;
125 /*! \name tMPI Data types
126 These are MPI data types as specified by the MPI standard.
127 Note that not all are available. */
130 extern const tMPI_Datatype TMPI_CHAR; /**< char */
132 extern const tMPI_Datatype TMPI_SHORT; /**< short */
134 extern const tMPI_Datatype TMPI_INT; /**< int */
136 extern const tMPI_Datatype TMPI_LONG; /**< long */
137 #ifdef SIZEOF_LONG_LONG_INT
139 extern const tMPI_Datatype TMPI_LONG_LONG; /**< long long */
141 extern const tMPI_Datatype TMPI_LONG_LONG_INT; /**< long long int */
144 extern const tMPI_Datatype TMPI_SIGNED_CHAR; /**< signed char */
146 extern const tMPI_Datatype TMPI_UNSIGNED_CHAR; /**< unsigned char */
148 extern const tMPI_Datatype TMPI_UNSIGNED_SHORT; /**< unsigned short */
150 extern const tMPI_Datatype TMPI_UNSIGNED; /**< unsigned int */
152 extern const tMPI_Datatype TMPI_UNSIGNED_LONG; /**< unsigned long */
153 #ifdef SIZEOF_LONG_LONG_INT
155 extern const tMPI_Datatype TMPI_UNSIGNED_LONG_LONG; /**< unsigned long long */
158 extern const tMPI_Datatype TMPI_FLOAT; /**< float */
160 extern const tMPI_Datatype TMPI_DOUBLE; /**< double */
162 extern const tMPI_Datatype TMPI_LONG_DOUBLE; /**< long double */
163 /*extern tMPI_Datatype tMPI_UNSIGNED_WCHAR */
165 extern const tMPI_Datatype TMPI_BYTE; /**< byte (for binary
168 extern const tMPI_Datatype TMPI_POINTER; /**< pointer (thread_mpi
177 TMPI_SUCCESS = 0, /*!< No error */
178 TMPI_ERR_NO_MEM, /*!< Out of memory */
179 TMPI_ERR_IO, /*!< I/O Error (used for system errors) */
180 TMPI_ERR_INIT, /*!< Initialization error */
181 TMPI_ERR_FINALIZE, /*!< Finalize error */
182 TMPI_ERR_GROUP, /*!< Group error */
183 TMPI_ERR_COMM, /*!< Comm error */
184 TMPI_ERR_STATUS, /*!< Status error */
185 TMPI_ERR_GROUP_RANK, /*!< Group rank error */
186 TMPI_ERR_DIMS, /*!< Invalid topology dimensions */
187 TMPI_ERR_COORDS, /*!< Invalid topology coordinates */
188 TMPI_ERR_CART_CREATE_NPROCS, /*!< Not enough processes for topology*/
189 TMPI_ERR_XFER_COUNTERPART, /*!< Invalid counterpart for xfer */
190 TMPI_ERR_XFER_BUFSIZE, /*!< buffer size too small*/
191 TMPI_ERR_XFER_BUF_OVERLAP, /*!< buffer overlaps (thread error?)*/
192 TMPI_ERR_SEND_DEST, /*!< Faulty send destination */
193 TMPI_ERR_RECV_SRC, /*!< Faulty receive source */
194 TMPI_ERR_BUF, /*!< Invalid buffer */
195 TMPI_ERR_MULTI_MISMATCH, /*!< Comm not the same in collective call*/
196 TMPI_ERR_OP_FN, /*!< Invalid reduce operator*/
197 TMPI_ERR_ENVELOPES, /*!< out of envelopes (tMPI internal) */
198 TMPI_ERR_REQUESTS, /*!< out of requests (tMPI internal) */
199 TMPI_ERR_COPY_NBUFFERS, /*!< out of copy buffers (tMPI internal)*/
200 TMPI_ERR_COPY_BUFFER_SIZE, /*!< copy buffer size err (tMPI internal)*/
201 TMPI_ERR_IN_STATUS, /*!< error code in tMPI_Status */
202 TMPI_ERR_PROCNR, /*!< Hardware processor number (such as for
203 thread affinity) error */
204 TMPI_FAILURE, /*!< Transmission failure */
205 TMPI_ERR_UNKNOWN, /*!< Unknown error */
206 N_TMPI_ERR /* this must be the last one */
209 /** Maximum length of error string for tMPI_Error_string() */
210 #define TMPI_MAX_ERROR_STRING 256
212 /** default code for undefined value,
214 For example for undefined color in tMPI_Split(). */
215 #define TMPI_UNDEFINED -1
217 /** error handler function */
218 typedef void (*tMPI_Errhandler_fn)(tMPI_Comm*, int*);
219 /** error handler object */
220 typedef struct tmpi_errhandler_ *tMPI_Errhandler;
222 /** pre-defined error handler that abort()s on every error */
223 extern tMPI_Errhandler TMPI_ERRORS_ARE_FATAL;
224 /** pre-defined error handler that tries to continue on every error */
225 extern tMPI_Errhandler TMPI_ERRORS_RETURN;
227 /*! \name tMPI_Comm_compare() return codes */
229 /** Identical comms*/
231 /** Comms with the same members in the same order*/
232 #define TMPI_CONGRUENT 1
233 /** Comms with the same members in the different order*/
234 #define TMPI_SIMILAR 2
235 /** Comms with the different members */
236 #define TMPI_UNEQUAL 3
240 /** Source number wildcard so tMPI_Recv(), etc. can receive from
242 #define TMPI_ANY_SOURCE -1
243 /** Tag number wildcard so tMPI_Recv(), etc. can receive messages with
245 #define TMPI_ANY_TAG -1
247 /** Return code for Cartesian topology with tMPI_Topo_test(). */
249 /** Return code for graph topology with tMPI_Topo_test(). */
253 /** Pre-initialized communicator with all available threads. */
255 extern tMPI_Comm TMPI_COMM_WORLD;
258 /** A pre-defined NULL communicator to compare against, to check comm
260 #define TMPI_COMM_NULL NULL
261 /** A pre-defined NULL group to compare against, to check group
263 #define TMPI_GROUP_NULL NULL
265 /** the empty group */
266 extern tMPI_Group TMPI_GROUP_EMPTY;
269 /** The maximum processor name returned using tMPI_Get_processor_name(). */
270 #define TMPI_MAX_PROCESSOR_NAME 128
273 /** Used as NULL status for tMPI_Recv(), etc. */
274 #define TMPI_STATUS_IGNORE NULL
275 /** Used as NULL status list for tMPI_Waitall(), etc. */
276 #define TMPI_STATUSES_IGNORE NULL
280 Holds status info (tag, sender, amount of data transmitted) for receives.
281 The status object is user-maintained. */
282 typedef struct tmpi_status_
284 int TMPI_SOURCE; /**< Message source rank. */
285 int TMPI_TAG; /**< Message source tag. */
286 int TMPI_ERROR; /**< Message error. */
287 size_t transferred; /**< Number of transferred bytes */
288 int cancelled; /**< Whether the transmission was canceled */
290 /*typedef struct tmpi_status_ tMPI_Status;*/
293 #define TMPI_REQUEST_NULL NULL
295 /** collective communication special to signify that the send
296 buffer is to function as receive buffer.
298 Used, for example in tMPI_Reduce. */
299 #define TMPI_IN_PLACE NULL
302 /** tMPI_Reduce operators.
304 These all work (except obviously bad combinations like bitwise
305 and/or/xor on floats, etc): */
308 TMPI_MAX, /**< calculate maximum value */
309 TMPI_MIN, /**< calculate minimum value */
310 TMPI_SUM, /**< calculate sum */
311 TMPI_PROD, /**< calculate product */
312 TMPI_LAND, /**< calculate logical and */
313 TMPI_BAND, /**< calculate binary and */
314 TMPI_LOR, /**< calculate logical or */
315 TMPI_BOR, /**< calculate binary or */
316 TMPI_LXOR, /**< calculate logical xor */
317 TMPI_BXOR /**< calculate binary xor */
321 /* function to obtain tMPI_COMM_SELF */
322 tMPI_Comm tMPI_Get_comm_self(void);
324 /** The thread-specific comm containing only the thread itself.
327 \return the self comm object associated with the thread. */
328 #define TMPI_COMM_SELF (tMPI_Get_comm_self())
338 /*! \name Initialization and exit functions
340 /** Traditional MPI initializer; spawns threads that start at the given
343 Seeks the argument '-nt n', where n is the number of
344 threads that will be created. If n==0, the number of threads will
345 be the recommended number of threads for this platform as obtained
346 from tMPI_Get_recommended_ntreads().
348 The new threads then run the function start_function, with the original
349 argc and argv. This function could be main(), or any other function;
350 calling this function again - whether from the started threads or from
351 the main thread - has no effect.
353 On platforms that support thread affinity setting, this function will
354 use the 'all-cores' affinity strategy: it will only set thread affinity
355 if the number of threads is equal to the number of hardware threads
356 (cores + hyperthreads).
358 \param[in] argc argc of original main() invocation, or NULL
359 \param[in] argv argv of original main() invocation, or NULL.
360 \param[in] start_function Starting function of type
361 int start_function(int argc, char *argv[]);
363 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
365 int tMPI_Init(int *argc, char ***argv,
366 int (*start_function)(int, char**));
369 /** Generic init function thread MPI intializer and thread spawner.
371 Creates N threads (including main thread)
372 that run the function start_function, which takes a void* argument,
373 given by arg. The function start_function also gets called by the main
374 thread. When the function start_function returns it, will behave
375 as if tMPI_Finalize is called, and if it's a sub-thread it will
378 If N==0, the number of threads will be the recommended number of
379 threads for this platform as obtained from tMPI_Get_recommended_ntreads().
381 Note that thread affinity strategy only has an effect when this is
382 supported by the underlying platform. As of yet (2012), this is not the
383 case for Mac OS X, for example.
385 \param[in] main_thread_returns whether the control in the main thread
386 should return immediately (if true), or
387 the start_function() should be called
388 from the main thread, too (if false).
389 \param[in] N The number of threads to start (or 0 to
390 automatically determine this).
391 \param[in] aff_strategy The thread affinity strategy to use.
392 \param[in] start_function The function to start threads at
393 (including main thread if
394 main_thread_returns).
395 \param[in] arg An optional argument for start_function().
397 \return TMPI_FAILURE on failure, TMPI_SUCCESS on succes (after all
398 threads have finished if main_thread_returns=true). */
400 int tMPI_Init_fn(int main_thread_returns, int N,
401 tMPI_Affinity_strategy aff_strategy,
402 void (*start_function)(void*), void *arg);
408 /** get the number of threads from the command line
410 can be called before tMPI_Init()
412 \param[in] argc argc from main()
413 \param[in] argv argv from main()
414 \param[in] optname name of the argument specifying the
415 number of threads to run. If this is
416 NULL, this function will read the first
417 argument and interpret it as the number
419 \param[out] nthreads the number of threads
421 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
423 int tMPI_Get_N(int *argc, char ***argv, const char *optname, int *nthreads);
427 /** Waits for all other threads to finish and cleans up
429 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
431 int tMPI_Finalize(void);
434 /** Just kills all threads.
436 Not really neccesary because exit() would do that for us anyway.
438 \param[in] comm Comm to kill threads for
439 \param[in] errorcode Error code to exit with
441 \return Never returns. */
443 int tMPI_Abort(tMPI_Comm comm, int errorcode);
445 /** whether tMPI_Init, but not yet tMPI_Finalize, has been run
447 \param[out] flag Set to TRUE if tMPI_Init() has been called,
450 \return always returns TMPI_SUCCESS. */
452 int tMPI_Initialized(int *flag);
454 /** Determine whether tMPI_Finalize has been run.
456 \param[out] flag Set to TRUE if tMPI_Finalize() has been
457 called, FALSE if not.
459 \return always returns TMPI_SUCCESS. */
461 int tMPI_Finalized(int *flag);
472 /*! \name Error handling functions
474 /** Create an error handler object from a function.
476 \param[in] function The function to make an error handler of.
477 \param[out] errhandler The error handler.
479 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
481 int tMPI_Create_errhandler(tMPI_Errhandler_fn *function,
482 tMPI_Errhandler *errhandler);
485 /** Free the error handler object.
487 \param[in] errhandler The error handler.
488 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
490 int tMPI_Errhandler_free(tMPI_Errhandler *errhandler);
492 /** Set the error handler.
494 \param[in] comm the communicator to set the error handler for.
495 \param[in] errhandler the error handler.
497 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
499 int tMPI_Comm_set_errhandler(tMPI_Comm comm, tMPI_Errhandler errhandler);
501 /** get the error handler.
503 Gets the error handler associated with a comm
505 \param[in] comm the communicator to get the error handler for.
506 \param[out] errhandler the error handler.
508 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
510 int tMPI_Comm_get_errhandler(tMPI_Comm comm, tMPI_Errhandler *errhandler);
512 /** get the error string associated with an error code.
514 The length of the error string will never exceed TMPI_MAX_ERROR_STRING.
516 \param[in] errorcode The error code.
517 \param[out] string The pre-allocated char pointer to output to.
518 \param[out] resultlen The length of the error string. Will
519 never be longer than TMPI_MAX_ERROR_STRING.
521 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
523 int tMPI_Error_string(int errorcode, char *string, size_t *resultlen);
533 /*! \name Environment query functions
535 /** returns string with thread number.
537 \param[out] name Pre-allocated string to output name to (will not
538 be longer than TMPI_MAX_PROCESSOR_NAME).
539 \param[out] resultlen The length of the output. Note that this is an
540 int instead of a size_t because the MPI standard
541 for some reason defines all sizes as int
543 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
545 int tMPI_Get_processor_name(char *name, int *resultlen);
547 /** get a time value as a double, in seconds.
552 double tMPI_Wtime(void);
553 /** get the resolution of tMPI_Wtime as a double, in seconds
555 \return time resolution. */
557 double tMPI_Wtick(void);
560 #define tMPI_This_threadnr() (int)(tMPI_Get_current() - threads)
562 /** Get the thread number of this thread.
563 Mostly for debugging.
565 \return the global thread number. */
566 int tMPI_This_Threadnr(void);
579 /*! \name tMPI_Group functions
581 /** Get the size (number of members) of a group.
583 \param[in] group The group.
584 \param[out] size Size.
585 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
587 int tMPI_Group_size(tMPI_Group group, int *size);
589 /** Get the rank of a thread in a group
591 \param[in] group The group.
592 \param[out] rank Variable for the rank, or TMPI_UNDEFINED
594 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
596 int tMPI_Group_rank(tMPI_Group group, int *rank);
598 /** Create a new group as a the collection of threads with given ranks.
600 \param[in] group The group from which the ranks are taken.
601 \param[in] n The number of new group members.
602 \param[in] ranks The ranks of the threads to add to the new group.
603 \param[out] newgroup The new group.
605 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
607 int tMPI_Group_incl(tMPI_Group group, int n, int *ranks, tMPI_Group *newgroup);
609 /** Get a pointer to the group in the comm.
611 \param[in] comm The comm from which to take the group.
612 \param[out] group The comm's group.
614 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
616 int tMPI_Comm_group(tMPI_Comm comm, tMPI_Group *group);
618 /** De-allocate a group
620 \param[in] group The group to de-allocate.
621 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
623 int tMPI_Group_free(tMPI_Group *group);
632 /*! \name tMPI_Comm functions
634 /** Get the comm size (nr. of threads).
636 \param[in] comm The comm to query.
637 \param[out] size The comm size.
638 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
640 int tMPI_Comm_size(tMPI_Comm comm, int *size);
642 /** get the rank in comm of the current process
644 \param[in] comm The comm to query.
645 \param[out] rank Thread rank in comm.
646 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
648 int tMPI_Comm_rank(tMPI_Comm comm, int *rank);
650 /** Compare two comms. Returns TMPI_IDENT if the two comms point to
651 the same underlying comm structure, TMPI_CONGRUENT if all
652 members appear in the both comms in the same order, TMPI_SIMILAR
653 if both comms have the smae members but not in the same order, or
654 TMPI_UNEQUAL if the comms have different members.
656 \param[in] comm1 The first comm to compare.
657 \param[in] comm2 The second comm to compare.
658 \param[out] result The output result, one of the values
660 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
662 int tMPI_Comm_compare(tMPI_Comm comm1, tMPI_Comm comm2, int *result);
665 /** De-allocate a comm
669 \param[in] comm The comm to free.
670 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
672 int tMPI_Comm_free(tMPI_Comm *comm);
674 /** Create a comm based on group membership.
676 Collective function that creates a new comm containing only proceses
677 that are members of the given group.
679 \param[in] comm The originating comm.
680 \param[in] group The group of threads to create a comm from.
681 \param[out] newcomm The new comm.
683 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
685 int tMPI_Comm_create(tMPI_Comm comm, tMPI_Group group, tMPI_Comm *newcomm);
687 /** Split up a group into same-colored sub-groups ordered by key.
689 This is the main comm creation function: it's a collective call that takes
690 a color and a key from each process, and arranges all threads that
691 call tMPI_Split() withe the same color together into a comm.
693 The rank in the new group will be based on the value given in key.
695 Passing TMPI_UNDEFINED as a color will result in the thread not being
696 part of any group, and getting TMPI_COMM_NULL back in newcomm.
698 \param[in] comm The originating comm.
699 \param[in] color This thread's color (determines which comm it will
700 be in). Giving TMPI_UNDEFINED will result in
701 this thread not being in any group.
702 \param[in] key This thread's key (determines rank).
703 \param[out] newcomm The new comm.
704 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
706 int tMPI_Comm_split(tMPI_Comm comm, int color, int key, tMPI_Comm *newcomm);
708 /** Make a duplicate of a comm.
712 \param[in] comm The originating comm.
713 \param[in] newcomm The new comm.
714 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
716 int tMPI_Comm_dup(tMPI_Comm comm, tMPI_Comm *newcomm);
726 /*! \name Topology functions
728 /* topology functions */
729 /** Check what type of topology the comm has.
731 \param[in] comm The comm to query
732 \param[out] status The type of topology.
734 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
736 int tMPI_Topo_test(tMPI_Comm comm, int *status);
738 /** Get the dimensionality of a comm with a topology.
740 \param[in] comm The comm to query.
741 \param[out] ndims The number of dimensions.
743 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
746 int tMPI_Cartdim_get(tMPI_Comm comm, int *ndims);
747 /** Get the size and pbc a of a comm with a Cartesian topology has.
749 \param[in] comm The comm to query.
750 \param[in] maxdims The maximum number of dimensions in the periods
751 and coords parameter.
752 \param[out] dims The number of dimensions.
753 \param[out] periods The periodicity in each dimension.
754 \param[out] coords The number of coordinates in each dimension.
756 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
759 int tMPI_Cart_get(tMPI_Comm comm, int maxdims, int *dims, int *periods,
763 /** Get rank that a specific set of process coordinates has in
764 a Cartesian topology.
766 \param[in] comm The comm to query.
767 \param[in] coords The coordinates in each dimension.
768 \param[out] rank The rank associated with the coordinates.
770 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
772 int tMPI_Cart_rank(tMPI_Comm comm, int *coords, int *rank);
774 /** Get coordinates of a process rank in a Cartesian topology.
776 \param[in] comm The comm to query.
777 \param[in] rank The rank associated with the coordinates.
778 \param[in] maxdims The maximum number of dimensions in the coords
780 \param[out] coords The coordinates in each dimension.
782 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
784 int tMPI_Cart_coords(tMPI_Comm comm, int rank, int maxdims, int *coords);
786 /** Get optimal rank this process would have in a Cartesian topology.
788 \param[in] comm The comm to query.
789 \param[in] ndims The number of dimensions.
790 \param[in] dims The size in each dimension.
791 \param[in] periods The periodicity in each dimension.
793 \param[out] newrank The rank the thread would have given the topology.
795 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
797 int tMPI_Cart_map(tMPI_Comm comm, int ndims, int *dims, int *periods,
800 /** Create a comm with a Cartesian topology.
802 \param[in] comm_old The originating comm.
803 \param[in] ndims The number of dimensions.
804 \param[in] dims The size in each dimension.
805 \param[in] periods The periodicity in each dimension.
806 \param[in] reorder Whether to allow reordering of the threads
807 according to tMPI_Cart_map().
808 \param[out] comm_cart The new comm with Cartesian topology.
810 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
812 int tMPI_Cart_create(tMPI_Comm comm_old, int ndims, int *dims, int *periods,
813 int reorder, tMPI_Comm *comm_cart);
815 /** Create a comms that are sub-spaces of the Cartesian topology communicator.
816 Works like a MPI_Comm_split() for the Cartesian dimensions specified
817 as false in remain_dims.
819 \param[in] comm The originating comm with Cartesian topology.
820 \param[in] remain_dims An Boolean array that decides whether a specific
821 dimensionality should remain in newcomm (if true),
822 or should be split up (if false).
823 \param[out] newcomm The new split communicator
825 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
827 int tMPI_Cart_sub(tMPI_Comm comm, int *remain_dims, tMPI_Comm *newcomm);
838 /*! \name Data type manipulation functions
840 /** Create a contiguous data type (the only type possible right now).
842 Creates a datatype that is a vector of oldtype.
844 \param[in] count The number of oldtype types in the new type.
845 \param[in] oldtype The old data type.
846 \param[out] newtype The new data type (still needs to be committed).
847 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
849 int tMPI_Type_contiguous(int count, tMPI_Datatype oldtype,
850 tMPI_Datatype *newtype);
853 /** Make a data type ready for use.
855 \param[in,out] datatype The new datatype.
856 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
858 int tMPI_Type_commit(tMPI_Datatype *datatype);
868 /*! \name Point-to-point communication functions
871 /* blocking transfers. The actual transfer (copy) is done on the receiving end
872 (so that the receiver's cache already contains the data that it presumably
874 /** Send message; blocks until buf is reusable.
876 \param[in] buf The buffer with data to send.
877 \param[in] count The number of items to send.
878 \param[in] datatype The data type of the items in buf.
879 \param[in] dest The rank of the destination thread.
880 \param[in] tag The message tag.
881 \param[in] comm The shared communicator.
882 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
884 int tMPI_Send(void* buf, int count, tMPI_Datatype datatype, int dest,
885 int tag, tMPI_Comm comm);
887 /** Receive message; blocks until buf is filled.
889 \param[out] buf The buffer for data to receive.
890 \param[in] count The maximum number of items to receive.
891 \param[in] datatype The data type of the items in buf.
892 \param[in] source The rank of the source thread (or TMPI_ANY_SOURCE).
893 \param[in] tag The message tag (or TMPI_ANY_TAG).
894 \param[in] comm The shared communicator.
895 \param[out] status The message status.
896 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
898 int tMPI_Recv(void* buf, int count, tMPI_Datatype datatype, int source,
899 int tag, tMPI_Comm comm, tMPI_Status *status);
901 /** Send & receive message at the same time.
903 Blocks until recvbuf is filled, and sendbuf is ready for reuse.
905 \param[in] sendbuf The buffer with data to send.
906 \param[in] sendcount The number of items to send.
907 \param[in] sendtype The data type of the items in send buf.
908 \param[in] dest The rank of the destination thread.
909 \param[in] sendtag The send message tag.
910 \param[out] recvbuf The buffer for data to receive.
911 \param[in] recvcount The maximum number of items to receive.
912 \param[in] recvtype The data type of the items in recvbuf.
913 \param[in] source The rank of the source thread (or TMPI_ANY_SOURCE).
914 \param[in] recvtag The recveive message tag (or TMPI_ANY_TAG).
915 \param[in] comm The shared communicator.
916 \param[out] status The received message status.
917 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
919 int tMPI_Sendrecv(void *sendbuf, int sendcount, tMPI_Datatype sendtype,
920 int dest, int sendtag, void *recvbuf, int recvcount,
921 tMPI_Datatype recvtype, int source, int recvtag,
922 tMPI_Comm comm, tMPI_Status *status);
924 /* async send/recv. The actual transfer is done on the receiving
925 end, during tMPI_Wait, tMPI_Waitall or tMPI_Test. For tMPI_Waitall,
926 the incoming messages are processed in the order they come in. */
928 /** Initiate sending a message, non-blocking.
930 This makes the buffer available to be received. The contents of buf
931 should not be touched before the transmission is finished with
932 tMPI_Wait(), tMPI_Test() or tMPI_Waitall().
935 \param[in] buf The buffer with data to send.
936 \param[in] count The number of items to send.
937 \param[in] datatype The data type of the items in buf.
938 \param[in] dest The rank of the destination thread.
939 \param[in] tag The message tag.
940 \param[in] comm The shared communicator.
941 \param[out] request The request object that can be used in tMPI_Wait(),
943 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
945 int tMPI_Isend(void* buf, int count, tMPI_Datatype datatype, int dest,
946 int tag, tMPI_Comm comm, tMPI_Request *request);
948 /** Initiate receiving a message.
950 This makes the buffer available to be filled with data. The contents of
951 buf should not be relied on before the transmission is finished with
952 tMPI_Wait(), tMPI_Test() or tMPI_Waitall().
954 \param[out] buf The buffer for data to receive.
955 \param[in] count The maximum number of items to receive.
956 \param[in] datatype The data type of the items in buf.
957 \param[in] source The rank of the source thread (or TMPI_ANY_SOURCE).
958 \param[in] tag The message tag (or TMPI_ANY_TAG).
959 \param[in] comm The shared communicator.
960 \param[out] request The request object that can be used in tMPI_Wait(),
962 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
964 int tMPI_Irecv(void* buf, int count, tMPI_Datatype datatype, int source,
965 int tag, tMPI_Comm comm, tMPI_Request *request);
970 /** Test whether a message is transferred.
972 \param[in,out] request The request obtained wit tMPI_Isend()/tMPI_Irecv().
973 \param[out] flag A flag set to TRUE(1) if the request is finished,
975 \param[out] status Message status (can be set to TMPI_STATUS_IGNORE).
977 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
979 int tMPI_Test(tMPI_Request *request, int *flag, tMPI_Status *status);
981 /** Wait until a message is transferred.
983 \param[in,out] request The request obtained wit tMPI_Isend()/tMPI_Irecv().
984 \param[out] status Message status.
986 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
988 int tMPI_Wait(tMPI_Request *request, tMPI_Status *status);
993 /** Wait until several messages are transferred.
995 \param[in] count The number of requests
996 \param[in,out] array_of_requests List of count requests obtained with
997 tMPI_Isend()/tMPI_Irecv().
998 \param[out] array_of_statuses List of count message statuses (can
999 be set to TMPI_STATUSES_IGNORE).
1001 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1003 int tMPI_Waitall(int count, tMPI_Request *array_of_requests,
1004 tMPI_Status *array_of_statuses);
1006 /** Test whether several messages are transferred.
1008 \param[in] count The number of requests
1009 \param[in,out] array_of_requests List of count requests obtained with
1010 tMPI_Isend()/tMPI_Irecv().
1011 \param[out] flag Whether all requests have completed.
1012 \param[out] array_of_statuses List of count message statuses (can
1013 be set to TMPI_STATUSES_IGNORE).
1015 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1017 int tMPI_Testall(int count, tMPI_Request *array_of_requests, int *flag,
1018 tMPI_Status *array_of_statuses);
1020 /** Wait until one of several messages is transferred.
1022 \param[in] count The number of requests
1023 \param[in,out] array_of_requests List of count requests obtained with
1024 tMPI_Isend()/tMPI_Irecv().
1025 \param[out] index Index of the request that has
1027 \param[out] status Pointer to tMPI_Status object
1028 associated with completed request.
1030 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1032 int tMPI_Waitany(int count, tMPI_Request *array_of_requests,
1033 int *index, tMPI_Status *status);
1035 /** Test whether one of several messages is transferred.
1037 \param[in] count The number of requests
1038 \param[in,out] array_of_requests List of count requests obtained with
1039 tMPI_Isend()/tMPI_Irecv().
1040 \param[out] index Index of the request that has
1042 \param[out] flag Whether any request has completed.
1043 \param[out] status Pointer to tMPI_Status object
1044 associated with completed request.
1046 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1048 int tMPI_Testany(int count, tMPI_Request *array_of_requests,
1049 int *index, int *flag, tMPI_Status *status);
1051 /** Wait until some of several messages are transferred. Waits until at least
1052 one message is transferred.
1054 \param[in] incount The number of requests
1055 \param[in,out] array_of_requests List of count requests obtained with
1056 tMPI_Isend()/tMPI_Irecv().
1057 \param[out] outcount Number of completed requests
1058 \param[out] array_of_indices Array of ints that gets filled with
1059 the indices of the completed requests.
1060 \param[out] array_of_statuses List of count message statuses (can
1061 be set to TMPI_STATUSES_IGNORE).
1063 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1065 int tMPI_Waitsome(int incount, tMPI_Request *array_of_requests,
1066 int *outcount, int *array_of_indices,
1067 tMPI_Status *array_of_statuses);
1069 /** Test whether some of several messages are transferred.
1071 \param[in] incount The number of requests
1072 \param[in,out] array_of_requests List of count requests obtained with
1073 tMPI_Isend()/tMPI_Irecv().
1074 \param[out] outcount Number of completed requests
1075 \param[out] array_of_indices Array of ints that gets filled with
1076 the indices of the completed requests.
1077 \param[out] array_of_statuses List of count message statuses (can
1078 be set to TMPI_STATUSES_IGNORE).
1080 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1082 int tMPI_Testsome(int incount, tMPI_Request *array_of_requests,
1083 int *outcount, int *array_of_indices,
1084 tMPI_Status *array_of_statuses);
1091 /** get the number of actually transferred items from a receive
1094 \param[in] status The status.
1095 \param[in] datatype The data type which was received.
1096 \param[out] count The number of items actually received.
1098 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1100 int tMPI_Get_count(tMPI_Status *status, tMPI_Datatype datatype, int *count);
1110 /*! \name Synchronization functions
1112 /** Block until all threads in the comm call this function.
1114 \param[in] comm The comm object.
1116 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1118 int tMPI_Barrier(tMPI_Comm comm);
1127 /*! \name Multicast communication functions
1129 /** Broadcast from one thread to all others in comm.
1131 Collective function; data is transferred from root's buffer to all others'
1134 \param[in,out] buffer The buffer to send from (root)/receive from
1136 \param[in] count The number of items to send/receive.
1137 \param[in] datatype The type of the items to send/receive.
1138 \param[in] root The rank of the sending thread.
1139 \param[in] comm The communicator.
1141 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1143 int tMPI_Bcast(void* buffer, int count, tMPI_Datatype datatype, int root,
1146 /** Gather data from all threads in comm to root.
1148 Collective function; assumes that all data is received in blocks of
1151 \param[in] sendbuf The send buffer for all threads (root may
1152 specify TMPI_IN_PLACE, in which case it
1153 transfers nothing to itself).
1154 \param[in] sendcount The number of items to send for all threads.
1155 \param[in] sendtype The type of the items to send.
1156 \param[out] recvbuf The receiving buffer (for root thread).
1157 \param[in] recvcount The number of items to receive (for root).
1158 \param[in] recvtype The type of the items to receive (for root).
1159 \param[in] root The rank of root.
1160 \param[in] comm The communicator.
1162 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1164 int tMPI_Gather(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
1165 void* recvbuf, int recvcount, tMPI_Datatype recvtype, int root,
1169 /** Gather irregularly laid out data from all processes in comm to root.
1171 Collective function.
1173 \param[in] sendbuf The send buffer for all threads (root may
1174 specify TMPI_IN_PLACE, in which case it
1175 transfers nothing to itself).
1176 \param[in] sendcount The number of items to send for all threads.
1177 \param[in] sendtype The type of the items to send.
1178 \param[out] recvbuf The receiving buffer (for root thread).
1179 \param[in] recvcounts The list of number of items to receive (for
1181 \param[in] displs The list of displacements in recvbuf to
1182 receive data in (for root).
1183 \param[in] recvtype The type of the items to receive (for root).
1184 \param[in] root The rank of root.
1185 \param[in] comm The communicator.
1187 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1189 int tMPI_Gatherv(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
1190 void* recvbuf, int *recvcounts, int *displs,
1191 tMPI_Datatype recvtype, int root, tMPI_Comm comm);
1194 /** Spread parts of sendbuf to all processes in comm from root.
1196 Collective function.
1198 \param[in] sendbuf The send buffer for root.
1199 \param[in] sendcount The number of items for root to send to each
1201 \param[in] sendtype The type of the items root sends.
1202 \param[out] recvbuf The receiving buffer for all receiving threads
1203 (root may specify TMPI_IN_PLACE, in which case
1204 it transmits nothing to itself).
1205 \param[in] recvcount The number of items recvbuf can receive.
1206 \param[in] recvtype The type of items to receive.
1207 \param[in] root The rank of root.
1208 \param[in] comm The communicator.
1210 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1212 int tMPI_Scatter(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
1213 void* recvbuf, int recvcount, tMPI_Datatype recvtype, int root,
1216 /** Spread irregularly laid out parts of sendbuf to all processes
1219 Collective function.
1221 \param[in] sendbuf The send buffer for root.
1222 \param[in] sendcounts List of the number of items for root to send
1224 \param[in] displs List of displacements in sendbuf from which
1225 to start transmission to each thread.
1226 \param[in] sendtype The type of the items root sends.
1227 \param[out] recvbuf The receiving buffer for all receiving threads
1228 (root may specify TMPI_IN_PLACE, in which case
1229 it transmits nothing to itself).
1230 \param[in] recvcount The number of items recvbuf can receive.
1231 \param[in] recvtype The type of items to receive.
1232 \param[in] root The rank of root.
1233 \param[in] comm The communicator.
1235 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1237 int tMPI_Scatterv(void* sendbuf, int *sendcounts, int *displs,
1238 tMPI_Datatype sendtype, void* recvbuf, int recvcount,
1239 tMPI_Datatype recvtype, int root, tMPI_Comm comm);
1242 /** Spread out parts of sendbuf to all processes from all processes in
1245 Collective function.
1247 \param[in] sendbuf The send buffer.
1248 \param[in] sendcount The number of items for to send to each thread.
1249 \param[in] sendtype The type of the items to send.
1250 \param[out] recvbuf The receive buffer for all threads.
1251 \param[in] recvcount The number of items recvbuf can receive per
1253 \param[in] recvtype The type of items to receive.
1254 \param[in] comm The communicator.
1256 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1258 int tMPI_Alltoall(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
1259 void* recvbuf, int recvcount, tMPI_Datatype recvtype,
1263 /** Spread out irregularly laid out parts of sendbuf to all
1264 processes from all processes in comm.
1266 Collective function.
1268 \param[in] sendbuf The send buffer.
1269 \param[in] sendcounts List of the number of items for to send to
1271 \param[in] sdispls List of the displacements in sendbuf of items
1272 to send to each thread.
1273 \param[in] sendtype The type of the items to send.
1274 \param[out] recvbuf The receive buffer for all threads.
1275 \param[in] recvcounts List of the number of items recvbuf can
1276 receive from each thread.
1277 \param[in] rdispls List of the displacements in recvbuf of items
1278 to receive from each thread.
1279 \param[in] recvtype The type of items to receive.
1280 \param[in] comm The communicator.
1282 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1284 int tMPI_Alltoallv(void* sendbuf, int *sendcounts, int *sdispls,
1285 tMPI_Datatype sendtype, void* recvbuf, int *recvcounts,
1286 int *rdispls, tMPI_Datatype recvtype, tMPI_Comm comm);
1298 /*! \name Reduce functions
1300 /** Do an operation between all locally held buffers on all items in the
1301 buffers, and send the results to root.
1303 Collective function.
1305 \param[in] sendbuf The operand parameters. Root may specify
1306 TMPI_IN_PLACE, in which case recvbuf will hold
1307 the operand parameters.
1308 \param[out] recvbuf The result buffer at root.
1309 \param[in] count The number of items to do operation on.
1310 \param[in] datatype The data type of the items.
1311 \param[in] op The operation to perform.
1312 \param[in] root The root thread (which is to receive the results).
1313 \param[in] comm The communicator.
1315 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1317 int tMPI_Reduce(void* sendbuf, void* recvbuf, int count,
1318 tMPI_Datatype datatype, tMPI_Op op, int root, tMPI_Comm comm);
1322 /** Do an operation between all locally held buffers on all items in the
1323 buffers and broadcast the results.
1325 Collective function.
1328 \param[in] sendbuf The operand parameters. Any process may specify
1329 TMPI_IN_PLACE, in which case recvbuf will hold
1330 the operand parameters for that process.
1331 \param[in,out] recvbuf The result buffer.
1332 \param[in] count The number of items to do operation on.
1333 \param[in] datatype The data type of the items.
1334 \param[in] op The operation to perform.
1335 \param[in] comm The communicator.
1337 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1339 int tMPI_Allreduce(void* sendbuf, void* recvbuf, int count,
1340 tMPI_Datatype datatype, tMPI_Op op, tMPI_Comm comm);
1342 /** Do an tMPI_Reduce, but with the following assumption:
1343 recvbuf points to a valid buffer in all calling threads, or
1344 sendbuf has the value TMPI_IN_PLACE (in which case the values of
1345 sendbuf may be changed in that thread).
1347 This avoids unnecesary memory allocations associated with the normal
1350 Collective function.
1352 \param[in] sendbuf The operand parameters (or TMPI_IN_PLACE,
1353 in which case the operand parameters will
1355 \param[in,out] recvbuf The result buffer.
1356 \param[in] count The number of items to do operation on.
1357 \param[in] datatype The data type of the items.
1358 \param[in] op The operation to perform.
1359 \param[in] root The root thread (which is to receive
1361 \param[in] comm The communicator.
1363 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1365 int tMPI_Reduce_fast(void* sendbuf, void* recvbuf, int count,
1366 tMPI_Datatype datatype, tMPI_Op op, int root,
1369 /** Do a partial reduce operation, based on rank: the results of the
1370 reduction operation of ranks 0 - i will be put in the recvbuf of
1373 Collective function.
1375 \param[in] sendbuf The operand parameters. All ranks may specify
1376 TMPI_IN_PLACE, in which case recvbuf will hold
1377 the operand parameters.
1378 \param[in,out] recvbuf The result buffer.
1379 \param[in] count The number of items to do operation on.
1380 \param[in] datatype The data type of the items.
1381 \param[in] op The operation to perform.
1382 \param[in] comm The communicator.
1384 \return TMPI_SUCCESS on success, TMPI_FAILURE on failure. */
1386 int tMPI_Scan(void* sendbuf, void* recvbuf, int count,
1387 tMPI_Datatype datatype, tMPI_Op op, tMPI_Comm comm);
1395 } /* closing extern "C" */
1398 #endif /* TMPI_TMPI_H_ */