2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009, Sander Pronk, Erik Lindahl.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
38 #ifdef HAVE_TMPI_CONFIG_H
39 #include "tmpi_config.h"
58 #include "collective.h"
62 int tMPI_Scatter(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
63 void* recvbuf, int recvcount, tMPI_Datatype recvtype,
64 int root, tMPI_Comm comm)
69 int ret = TMPI_SUCCESS;
70 struct tmpi_thread *cur = tMPI_Get_current();
73 tMPI_Profile_count_start(cur);
76 tMPI_Trace_print("tMPI_Scatter(%p, %d, %p, %p, %d, %p, %d, %p)",
77 sendbuf, sendcount, sendtype,
78 recvbuf, recvcount, recvtype, root, comm);
82 return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
84 myrank = tMPI_Comm_seek_rank(comm, cur);
86 /* we increase our counter, and determine which coll_env we get */
87 cev = tMPI_Get_cev(comm, myrank, &synct);
92 size_t sendsize = sendtype->size*sendcount;
93 size_t total_send_size = 0;
94 #ifdef USE_COLLECTIVE_COPY_BUFFER
98 if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
100 return tMPI_Error(comm, TMPI_ERR_BUF);
103 /* we set up multiple posts, so no Post_multi */
104 cev->met[myrank].tag = TMPI_SCATTER_TAG;
105 cev->met[myrank].datatype = sendtype;
106 tMPI_Atomic_memory_barrier_rel();
107 tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 );
108 for (i = 0; i < comm->grp.N; i++)
110 total_send_size += sendtype->size*sendcount;
111 cev->met[myrank].bufsize[i] = sendsize;
112 cev->met[myrank].buf[i] = (char*)sendbuf+sendsize*i;
114 #ifdef USE_COLLECTIVE_COPY_BUFFER
115 /* we must copy our own data too, unfortunately (otherwise there's
117 using_cb = (total_send_size < (size_t)((cev->N)*COPY_BUFFER_SIZE));
118 cev->met[myrank].using_cb = using_cb;
121 /* we set cpbuf stuff to NULL initially */
122 for (i = 0; i < cev->N; i++)
124 /*cev->met[myrank].cpbuf[i]=NULL;*/
125 tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[i]), NULL);
127 tMPI_Atomic_set(&(cev->met[myrank].buf_readcount), 0);
131 /* post availability */
132 for (i = 0; i < cev->N; i++)
136 tMPI_Event_signal( &(cev->met[i].recv_ev) );
143 #ifdef USE_COLLECTIVE_COPY_BUFFER
144 /* do the copy_buffer thing */
147 /* copy the buffer locally. First allocate */
148 cev->met[myrank].cb = tMPI_Copy_buffer_list_get(
149 &(tMPI_Get_thread(comm, myrank)->cbl_multi));
150 if (cev->met[myrank].cb->size < total_send_size)
152 fprintf(stderr, "ERROR: cb size too small\n");
155 /* copy to the new buf */
156 memcpy(cev->met[myrank].cb->buf, sendbuf, total_send_size);
158 /* post the new buf */
159 for (i = 0; i < cev->N; i++)
161 tMPI_Atomic_memory_barrier_rel();
162 tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[i]),
163 (char*)cev->met[myrank].cb->buf+sendsize*i);
164 /*cev->met[myrank].cpbuf[i] = (char*)cev->met[myrank].cb->buf +
170 /* do root transfer */
171 if (recvbuf != TMPI_IN_PLACE)
173 tMPI_Coll_root_xfer(comm, sendtype, recvtype,
174 sendsize, recvtype->size*recvcount,
175 (char*)sendbuf+sendsize*myrank,
179 /* and wait until everybody is done copying */
180 tMPI_Wait_for_others(cev, myrank);
184 /* get the root cev */
185 size_t bufsize = recvcount*recvtype->size;
186 /* wait until root becomes available */
187 tMPI_Wait_for_data(cur, cev, myrank);
188 tMPI_Mult_recv(comm, cev, root, myrank, TMPI_SCATTER_TAG, recvtype,
189 bufsize, recvbuf, &ret);
192 tMPI_Profile_count_stop(cur, TMPIFN_Scatter);
199 int tMPI_Scatterv(void* sendbuf, int *sendcounts, int *displs,
200 tMPI_Datatype sendtype, void* recvbuf, int recvcount,
201 tMPI_Datatype recvtype, int root, tMPI_Comm comm)
204 struct coll_env *cev;
206 int ret = TMPI_SUCCESS;
207 struct tmpi_thread *cur = tMPI_Get_current();
210 tMPI_Trace_print("tMPI_Scatterv(%p, %p, %p, %p, %p, %d, %p, %d, %p)",
211 sendbuf, sendcounts, displs, sendtype, recvbuf,
212 recvcount, recvtype, root, comm);
215 tMPI_Profile_count_start(cur);
221 return tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_COMM);
223 myrank = tMPI_Comm_seek_rank(comm, cur);
225 /* we increase our counter, and determine which coll_env we get */
226 cev = tMPI_Get_cev(comm, myrank, &synct);
231 size_t total_send_size = 0;
232 #ifdef USE_COLLECTIVE_COPY_BUFFER
236 if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
238 return tMPI_Error(comm, TMPI_ERR_BUF);
241 /* we set up multiple posts, so no Post_multi */
242 cev->met[myrank].tag = TMPI_SCATTERV_TAG;
243 cev->met[myrank].datatype = sendtype;
244 tMPI_Atomic_memory_barrier_rel();
245 tMPI_Atomic_set( &(cev->met[myrank].n_remaining), cev->N-1 );
246 for (i = 0; i < cev->N; i++)
248 total_send_size += sendtype->size*sendcounts[i];
249 cev->met[myrank].bufsize[i] = sendtype->size*sendcounts[i];
250 cev->met[myrank].buf[i] = (char*)sendbuf+sendtype->size*displs[i];
252 #ifdef USE_COLLECTIVE_COPY_BUFFER
253 /* we must copy our own data too, unfortunately (otherwise there's
255 using_cb = (total_send_size < (size_t)((cev->N)*COPY_BUFFER_SIZE));
256 cev->met[myrank].using_cb = using_cb;
259 /* we set cpbuf stuff to NULL initially */
260 for (i = 0; i < cev->N; i++)
262 /*cev->met[myrank].cpbuf[i]=NULL;*/
263 tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[i]), NULL);
265 tMPI_Atomic_set(&(cev->met[myrank].buf_readcount), 0);
269 /* post availability */
270 for (i = 0; i < cev->N; i++)
274 tMPI_Event_signal( &(cev->met[i].recv_ev) );
280 #ifdef USE_COLLECTIVE_COPY_BUFFER
281 /* do the copy_buffer thing */
284 /* copy the buffer locally. First allocate */
285 cev->met[myrank].cb = tMPI_Copy_buffer_list_get(
286 &(tMPI_Get_thread(comm, myrank)->cbl_multi));
287 if (cev->met[myrank].cb->size < total_send_size)
289 fprintf(stderr, "ERROR: cb size too small\n");
292 /* copy to the new buf */
293 memcpy(cev->met[myrank].cb->buf, sendbuf, total_send_size);
294 /* post the new buf */
295 for (i = 0; i < cev->N; i++)
297 tMPI_Atomic_memory_barrier_rel();
298 tMPI_Atomic_ptr_set(&(cev->met[myrank].cpbuf[i]),
299 (char*)cev->met[myrank].cb->buf +
300 sendtype->size*displs[i]);
301 /*cev->met[myrank].cpbuf[i]=(char*)cev->met[myrank].cb->buf +
302 sendtype->size*displs[i];*/
307 /* do root transfer */
308 if (recvbuf != TMPI_IN_PLACE)
310 tMPI_Coll_root_xfer(comm, sendtype, recvtype,
311 sendtype->size*sendcounts[myrank],
312 recvtype->size*recvcount,
313 (char*)sendbuf+sendtype->size*displs[myrank],
318 /* and wait until everybody is done copying */
319 tMPI_Wait_for_others(cev, myrank);
323 /* get the root cev */
324 size_t bufsize = recvcount*recvtype->size;
325 /* wait until root becomes available */
326 tMPI_Wait_for_data(cur, cev, myrank);
327 tMPI_Mult_recv(comm, cev, root, myrank, TMPI_SCATTERV_TAG,
328 recvtype, bufsize, recvbuf, &ret);
331 tMPI_Profile_count_stop(cur, TMPIFN_Scatterv);