Merge branch 'release-4-5-patches'
[alexxy/gromacs.git] / src / gromacs / mdlib / domdec_network.c
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  * 
4  * This file is part of Gromacs        Copyright (c) 1991-2008
5  * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * To help us fund GROMACS development, we humbly ask that you cite
13  * the research papers on the package. Check out http://www.gromacs.org
14  * 
15  * And Hey:
16  * Gnomes, ROck Monsters And Chili Sauce
17  */
18
19 #ifdef HAVE_CONFIG_H
20 #include <config.h>
21 #endif
22
23 #include <string.h>
24 #include "domdec_network.h"
25
26 #ifdef GMX_LIB_MPI
27 #include <mpi.h>
28 #endif
29 #ifdef GMX_THREADS
30 #include "tmpi.h"
31 #endif
32
33
34 #define DDMASTERRANK(dd)   (dd->masterrank)
35
36
37 void dd_sendrecv_int(const gmx_domdec_t *dd,
38                      int ddimind,int direction,
39                      int *buf_s,int n_s,
40                      int *buf_r,int n_r)
41 {
42 #ifdef GMX_MPI
43     int rank_s,rank_r;
44     MPI_Status stat;
45     
46     rank_s = dd->neighbor[ddimind][direction==dddirForward ? 0 : 1];
47     rank_r = dd->neighbor[ddimind][direction==dddirForward ? 1 : 0];
48     
49     if (n_s && n_r)
50     {
51         MPI_Sendrecv(buf_s,n_s*sizeof(int),MPI_BYTE,rank_s,0,
52                      buf_r,n_r*sizeof(int),MPI_BYTE,rank_r,0,
53                      dd->mpi_comm_all,&stat);
54     }
55     else if (n_s)
56     {
57         MPI_Send(    buf_s,n_s*sizeof(int),MPI_BYTE,rank_s,0,
58                      dd->mpi_comm_all);
59     }
60     else if (n_r)
61     {
62         MPI_Recv(    buf_r,n_r*sizeof(int),MPI_BYTE,rank_r,0,
63                      dd->mpi_comm_all,&stat);
64     }
65     
66 #endif
67 }
68
69 void dd_sendrecv_real(const gmx_domdec_t *dd,
70                       int ddimind,int direction,
71                       real *buf_s,int n_s,
72                       real *buf_r,int n_r)
73 {
74 #ifdef GMX_MPI
75     int rank_s,rank_r;
76     MPI_Status stat;
77     
78     rank_s = dd->neighbor[ddimind][direction==dddirForward ? 0 : 1];
79     rank_r = dd->neighbor[ddimind][direction==dddirForward ? 1 : 0];
80     
81     if (n_s && n_r)
82     {
83         MPI_Sendrecv(buf_s,n_s*sizeof(real),MPI_BYTE,rank_s,0,
84                      buf_r,n_r*sizeof(real),MPI_BYTE,rank_r,0,
85                      dd->mpi_comm_all,&stat);
86     }
87     else if (n_s)
88     {
89         MPI_Send(    buf_s,n_s*sizeof(real),MPI_BYTE,rank_s,0,
90                      dd->mpi_comm_all);
91     }
92     else if (n_r)
93     {
94         MPI_Recv(    buf_r,n_r*sizeof(real),MPI_BYTE,rank_r,0,
95                      dd->mpi_comm_all,&stat);
96     }
97     
98 #endif
99 }
100
101 void dd_sendrecv_rvec(const gmx_domdec_t *dd,
102                       int ddimind,int direction,
103                       rvec *buf_s,int n_s,
104                       rvec *buf_r,int n_r)
105 {
106 #ifdef GMX_MPI
107     int rank_s,rank_r;
108     MPI_Status stat;
109     
110     rank_s = dd->neighbor[ddimind][direction==dddirForward ? 0 : 1];
111     rank_r = dd->neighbor[ddimind][direction==dddirForward ? 1 : 0];
112     
113     if (n_s && n_r)
114     {
115         MPI_Sendrecv(buf_s[0],n_s*sizeof(rvec),MPI_BYTE,rank_s,0,
116                      buf_r[0],n_r*sizeof(rvec),MPI_BYTE,rank_r,0,
117                      dd->mpi_comm_all,&stat);
118     }
119     else if (n_s)
120     {
121         MPI_Send(    buf_s[0],n_s*sizeof(rvec),MPI_BYTE,rank_s,0,
122                      dd->mpi_comm_all);
123     } else if (n_r)
124     {
125         MPI_Recv(    buf_r[0],n_r*sizeof(rvec),MPI_BYTE,rank_r,0,
126                      dd->mpi_comm_all,&stat);
127     }
128     
129 #endif
130 }
131
132 void dd_sendrecv2_rvec(const gmx_domdec_t *dd,
133                        int ddimind,
134                        rvec *buf_s_fw,int n_s_fw,
135                        rvec *buf_r_fw,int n_r_fw,
136                        rvec *buf_s_bw,int n_s_bw,
137                        rvec *buf_r_bw,int n_r_bw)
138 {
139 #ifdef GMX_MPI
140     int rank_fw,rank_bw,nreq;
141     MPI_Request req[4];
142     MPI_Status  stat[4];
143     
144     rank_fw = dd->neighbor[ddimind][0];
145     rank_bw = dd->neighbor[ddimind][1];
146     
147     if (!dd->bSendRecv2)
148     {
149         /* Try to send and receive in two directions simultaneously.
150          * Should be faster, especially on machines
151          * with full 3D communication networks.
152          * However, it could be that communication libraries are
153          * optimized for MPI_Sendrecv and non-blocking MPI calls
154          * are slower.
155          * SendRecv2 can be turned on with the env.var. GMX_DD_SENDRECV2
156          */
157         nreq = 0;
158         if (n_r_fw)
159         {
160             MPI_Irecv(buf_r_fw[0],n_r_fw*sizeof(rvec),MPI_BYTE,
161                       rank_bw,0,dd->mpi_comm_all,&req[nreq++]);
162         }
163         if (n_r_bw)
164         {
165             MPI_Irecv(buf_r_bw[0],n_r_bw*sizeof(rvec),MPI_BYTE,
166                       rank_fw,1,dd->mpi_comm_all,&req[nreq++]);
167         }
168         if (n_s_fw)
169         {
170             MPI_Isend(buf_s_fw[0],n_s_fw*sizeof(rvec),MPI_BYTE,
171                       rank_fw,0,dd->mpi_comm_all,&req[nreq++]);
172         }
173         if (n_s_bw)
174         {
175             MPI_Isend(buf_s_bw[0],n_s_bw*sizeof(rvec),MPI_BYTE,
176                       rank_bw,1,dd->mpi_comm_all,&req[nreq++]);
177         }
178         if (nreq)
179         {
180             MPI_Waitall(nreq,req,stat);
181         }
182     }
183     else
184     {
185         /* Communicate in two ordered phases.
186          * This is slower, even on a dual-core Opteron cluster
187          * with a single full-duplex network connection per machine.
188          */
189         /* Forward */
190         MPI_Sendrecv(buf_s_fw[0],n_s_fw*sizeof(rvec),MPI_BYTE,rank_fw,0,
191                      buf_r_fw[0],n_r_fw*sizeof(rvec),MPI_BYTE,rank_bw,0,
192                      dd->mpi_comm_all,&stat[0]);
193         /* Backward */
194         MPI_Sendrecv(buf_s_bw[0],n_s_bw*sizeof(rvec),MPI_BYTE,rank_bw,0,
195                      buf_r_bw[0],n_r_bw*sizeof(rvec),MPI_BYTE,rank_fw,0,
196                      dd->mpi_comm_all,&stat[0]);
197     }
198 #endif
199 }
200
201 /* IBM's BlueGene(/L) MPI_Bcast dereferences the data pointer
202  * even when 0 == nbytes, so we protect calls to it on BlueGene.
203  * Fortunately dd_bcast() and dd_bcastc() are only
204  * called during DD setup and partition.
205  */
206
207 void dd_bcast(gmx_domdec_t *dd,int nbytes,void *data)
208 {
209 #ifdef GMX_MPI
210 #ifdef GMX_BLUEGENE
211     if (nbytes > 0)
212     {
213 #endif
214     MPI_Bcast(data,nbytes,MPI_BYTE,
215               DDMASTERRANK(dd),dd->mpi_comm_all);
216 #ifdef GMX_BLUEGENE
217     }
218 #endif
219 #endif
220 }
221
222 void dd_bcastc(gmx_domdec_t *dd,int nbytes,void *src,void *dest)
223 {
224     if (DDMASTER(dd))
225     {
226         memcpy(dest,src,nbytes);
227     }
228 #ifdef GMX_MPI
229 #ifdef GMX_BLUEGENE
230     if (nbytes > 0)
231     {
232 #endif
233     MPI_Bcast(dest,nbytes,MPI_BYTE,
234               DDMASTERRANK(dd),dd->mpi_comm_all);
235 #ifdef GMX_BLUEGENE
236     }
237 #endif
238 #endif
239 }
240
241 void dd_scatter(gmx_domdec_t *dd,int nbytes,void *src,void *dest)
242 {
243 #ifdef GMX_MPI
244     MPI_Scatter(src,nbytes,MPI_BYTE,
245                 dest,nbytes,MPI_BYTE,
246                 DDMASTERRANK(dd),dd->mpi_comm_all);
247 #endif
248 }
249
250 void dd_gather(gmx_domdec_t *dd,int nbytes,void *src,void *dest)
251 {
252 #ifdef GMX_MPI
253     MPI_Gather(src,nbytes,MPI_BYTE,
254                dest,nbytes,MPI_BYTE,
255                DDMASTERRANK(dd),dd->mpi_comm_all);
256 #endif
257 }
258
259 void dd_scatterv(gmx_domdec_t *dd,
260                  int *scounts,int *disps,void *sbuf,
261                  int rcount,void *rbuf)
262 {
263 #ifdef GMX_MPI
264     int dum;
265
266     if (rcount == 0)
267     {
268         /* MPI does not allow NULL pointers */
269         rbuf = &dum;
270     }
271     MPI_Scatterv(sbuf,scounts,disps,MPI_BYTE,
272                  rbuf,rcount,MPI_BYTE,
273                  DDMASTERRANK(dd),dd->mpi_comm_all);
274 #endif
275 }
276
277 void dd_gatherv(gmx_domdec_t *dd,
278                 int scount,void *sbuf,
279                 int *rcounts,int *disps,void *rbuf)
280 {
281 #ifdef GMX_MPI
282     int dum;
283
284     if (scount == 0)
285     {
286         /* MPI does not allow NULL pointers */
287         sbuf = &dum;
288     }
289     MPI_Gatherv(sbuf,scount,MPI_BYTE,
290                 rbuf,rcounts,disps,MPI_BYTE,
291                 DDMASTERRANK(dd),dd->mpi_comm_all);
292 #endif
293 }
294