Merge branch release-4-6 into master
[alexxy/gromacs.git] / src / gromacs / fft / parallel_3dfft.h
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
2  *
3  *
4  * Gromacs                               Copyright (c) 1991-2005
5  * David van der Spoel, Erik Lindahl, University of Groningen.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * To help us fund GROMACS development, we humbly ask that you cite
13  * the research papers on the package. Check out http://www.gromacs.org
14  *
15  * And Hey:
16  * Gnomes, ROck Monsters And Chili Sauce
17  */
18
19 #ifndef GMX_FFT_PARALLEL_3DFFT_H
20 #define GMX_FFT_PARALLEL_3DFFT_H
21
22 #include "../legacyheaders/types/nrnb.h"
23 #include "../legacyheaders/types/simple.h"
24 #include "../legacyheaders/gmxcomplex.h"
25 #include "../utility/gmxmpi.h"
26 #include "fft.h"
27
28 #ifdef __cplusplus
29 extern "C" {
30 #endif
31
32 typedef struct gmx_parallel_3dfft *
33     gmx_parallel_3dfft_t;
34
35
36
37 /*! \brief Initialize parallel MPI-based 3D-FFT.
38  *
39  *  This routine performs real-to-complex and complex-to-real parallel 3D FFTs,
40  *  but not complex-to-complex.
41  *
42  *  The routine is optimized for small-to-medium size FFTs used for PME and
43  *  PPPM algorithms, and do allocate extra workspace whenever it might improve
44  *  performance.
45  *
46  *  \param pfft_setup     Pointer to parallel 3dfft setup structure, previously
47  *                        allocated or with automatic storage.
48  *  \param ndata          Number of grid cells in each direction
49  *  \param real_data      Real data. Input for forward and output for backward.
50  *  \param complex_data   Complex data.
51  *  \param comm           MPI communicator for both parallelization axis.
52  *                        Needs to be either initialized or MPI_NULL for
53  *                        no parallelization in that axis.
54  *  \param bReproducible  Try to avoid FFT timing optimizations and other stuff
55  *                        that could make results differ for two runs with
56  *                        identical input (reproducibility for debugging).
57  *  \param nthreads       Run in parallel using n threads
58  *
59  *  \return 0 or a standard error code.
60  */
61 int
62     gmx_parallel_3dfft_init   (gmx_parallel_3dfft_t *    pfft_setup,
63                                ivec                      ndata,
64                                real **real_data,
65                                t_complex **complex_data,
66                                MPI_Comm                  comm[2],
67                                gmx_bool                  bReproducible,
68                                int                       nthreads);
69
70
71
72
73
74 /*! \brief Get direct space grid index limits
75  */
76 int
77 gmx_parallel_3dfft_real_limits(gmx_parallel_3dfft_t      pfft_setup,
78                                ivec                      local_ndata,
79                                ivec                      local_offset,
80                                ivec                      local_size);
81
82
83 /*! \brief Get reciprocal space grid index limits
84  */
85 int
86 gmx_parallel_3dfft_complex_limits(gmx_parallel_3dfft_t      pfft_setup,
87                                   ivec                      complex_order,
88                                   ivec                      local_ndata,
89                                   ivec                      local_offset,
90                                   ivec                      local_size);
91
92
93 int
94 gmx_parallel_3dfft_execute(gmx_parallel_3dfft_t    pfft_setup,
95                            enum gmx_fft_direction  dir,
96                            int                     thread,
97                            gmx_wallcycle_t         wcycle);
98
99
100 /*! \brief Release all data in parallel fft setup
101  *
102  *  All temporary storage and FFT plans are released. The structure itself
103  *  is not released, but the contents is invalid after this call.
104  *
105  *  \param pfft_setup Parallel 3dfft setup.
106  *  \param in_data    Input data.
107  *  \param out_data   Output data.
108  *  \param thread     Thread index of the calling thread, i.e. index to the part
109  *                    of the data operated on last by the calling thread. This
110  *                    is needed to start the FFT without an OpenMP barrier.
111  *  \param wcycle     Wall cycle counters.
112  *
113  *  \return 0 or a standard error code.
114  */
115 int
116 gmx_parallel_3dfft_destroy(gmx_parallel_3dfft_t    pfft_setup);
117
118 #ifdef __cplusplus
119 }
120 #endif
121
122 #endif