Merge branch 'release-4-6' into master
[alexxy/gromacs.git] / src / gromacs / fft / parallel_3dfft.h
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  * Gromacs                               Copyright (c) 1991-2005
5  * David van der Spoel, Erik Lindahl, University of Groningen.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * To help us fund GROMACS development, we humbly ask that you cite
13  * the research papers on the package. Check out http://www.gromacs.org
14  *
15  * And Hey:
16  * Gnomes, ROck Monsters And Chili Sauce
17  */
18
19 #ifndef GMX_FFT_PARALLEL_3DFFT_H
20 #define GMX_FFT_PARALLEL_3DFFT_H
21
22 #include "../legacyheaders/types/nrnb.h"
23 #include "../legacyheaders/types/simple.h"
24 #include "../legacyheaders/gmxcomplex.h"
25 #include "../utility/gmxmpi.h"
26 #include "fft.h"
27
28 #ifdef __cplusplus
29 extern "C" {
30 #endif
31
32 typedef struct gmx_parallel_3dfft *
33     gmx_parallel_3dfft_t;
34
35
36
37 /*! \brief Initialize parallel MPI-based 3D-FFT.
38  *
39  *  This routine performs real-to-complex and complex-to-real parallel 3D FFTs,
40  *  but not complex-to-complex.
41  *
42  *  The routine is optimized for small-to-medium size FFTs used for PME and
43  *  PPPM algorithms, and do allocate extra workspace whenever it might improve
44  *  performance.
45  *
46  *  \param pfft_setup     Pointer to parallel 3dfft setup structure, previously
47  *                        allocated or with automatic storage.
48  *  \param ndata          Number of grid cells in each direction
49  *  \param real_data      Real data. Input for forward and output for backward.
50  *  \param complex_data   Complex data.
51  *  \param comm           MPI communicator for both parallelization axis.
52  *                        Needs to be either initialized or MPI_NULL for
53  *                        no parallelization in that axis.
54  *  \param slab2index_major Not used
55  *  \param slab2index_minor Not used
56  *  \param bReproducible  Try to avoid FFT timing optimizations and other stuff
57  *                        that could make results differ for two runs with
58  *                        identical input (reproducibility for debugging).
59  *  \param nthreads       Run in parallel using n threads
60  *
61  *  \return 0 or a standard error code.
62  */
63 int
64     gmx_parallel_3dfft_init   (gmx_parallel_3dfft_t *    pfft_setup,
65                                ivec                      ndata,
66                                real **real_data,
67                                t_complex **complex_data,
68                                MPI_Comm                  comm[2],
69                                int *                     slab2index_major,
70                                int *                     slab2index_minor,
71                                gmx_bool                  bReproducible,
72                                int                       nthreads);
73
74
75
76
77
78 /*! \brief Get direct space grid index limits
79  */
80 int
81 gmx_parallel_3dfft_real_limits(gmx_parallel_3dfft_t      pfft_setup,
82                                ivec                      local_ndata,
83                                ivec                      local_offset,
84                                ivec                      local_size);
85
86
87 /*! \brief Get reciprocal space grid index limits
88  */
89 int
90 gmx_parallel_3dfft_complex_limits(gmx_parallel_3dfft_t      pfft_setup,
91                                   ivec                      complex_order,
92                                   ivec                      local_ndata,
93                                   ivec                      local_offset,
94                                   ivec                      local_size);
95
96
97 int
98 gmx_parallel_3dfft_execute(gmx_parallel_3dfft_t    pfft_setup,
99                            enum gmx_fft_direction  dir,
100                            void *                  in_data,
101                            void *                  out_data,
102                            int                     thread,
103                            gmx_wallcycle_t         wcycle);
104
105
106 /*! \brief Release all data in parallel fft setup
107  *
108  *  All temporary storage and FFT plans are released. The structure itself
109  *  is not released, but the contents is invalid after this call.
110  *
111  *  \param pfft_setup Parallel 3dfft setup.
112  *  \param in_data    Input data.
113  *  \param out_data   Output data.
114  *  \param thread     Thread index of the calling thread, i.e. index to the part
115  *                    of the data operated on last by the calling thread. This
116  *                    is needed to start the FFT without an OpenMP barrier.
117  *  \param wcycle     Wall cycle counters.
118  *
119  *  \return 0 or a standard error code.
120  */
121 int
122 gmx_parallel_3dfft_destroy(gmx_parallel_3dfft_t    pfft_setup);
123
124 #ifdef __cplusplus
125 }
126 #endif
127
128 #endif