21 #define __fftwpp_h__ 1
23 #define __FFTWPP_H_VERSION__ 2.05
33 #ifndef FFTWPP_SINGLE_THREAD
34 #define FFTWPP_SINGLE_THREAD
38 #ifndef FFTWPP_SINGLE_THREAD
44 #ifdef FFTWPP_SINGLE_THREAD
47 return omp_get_thread_num();
53 #ifdef FFTWPP_SINGLE_THREAD
56 return omp_get_max_threads();
60 #ifndef FFTWPP_SINGLE_THREAD
61 #define PARALLEL(code) \
63 _Pragma("omp parallel for num_threads(threads)") \
69 #define PARALLEL(code) \
75 #ifdef FFTWPP_SINGLE_THREAD
76 # define FFTWPP_THREADS_PAR
78 # define FFTWPP_THREADS_PAR threads
93 #define FFTWComplex utils::ComplexAlign
94 #define FFTWdouble utils::doubleAlign
95 #define FFTWdelete utils::deleteAlign
103 extern const char *
inout;
111 threads(threads), mean(mean), stdev(stdev) {}
139 return (!
out || in ==
out) ? 2*(n/2+1) : n;
164 unsigned int Dist(
unsigned int n,
size_t stride,
size_t dist) {
165 return dist ? dist : ((stride == 1) ? n : 1);
184 unsigned int nyp=ny/2+1;
185 unsigned int stop=nx*nyp;
187 unsigned int inc=2*nyp;
188 #ifndef FFTWPP_SINGLE_THREAD
189 #pragma omp parallel for num_threads(threads)
191 for(
unsigned int i=nyp; i < stop; i += inc) {
193 for(
unsigned int j=0; j < nyp; j++) p[j]=-p[j];
202 static void Shift(
double *
data,
unsigned int nx,
unsigned int ny,
205 unsigned int stop=nx*ny;
206 unsigned int inc=2*ny;
207 #ifndef FFTWPP_SINGLE_THREAD
208 #pragma omp parallel for num_threads(threads)
210 for(
unsigned int i=ny; i < stop; i += inc) {
212 for(
unsigned int j=0; j < ny; j++) p[j]=-p[j];
223 unsigned int nzp=nz/2+1;
224 unsigned int nyzp=ny*nzp;
225 if(nx % 2 == 0 && ny % 2 == 0) {
226 unsigned int pinc=2*nzp;
227 #ifndef FFTWPP_SINGLE_THREAD
228 #pragma omp parallel for num_threads(threads)
230 for(
unsigned int i=0; i < nx; i++) {
233 for(
Complex *
p=pstart+(1-(i % 2))*nzp;
p < pstop;
p += pinc) {
234 for(
unsigned int k=0; k < nzp; k++)
p[k]=-
p[k];
238 std::cerr <<
oddshift <<
" or odd ny" << std::endl;
244 static void Shift(
double *
data,
unsigned int nx,
unsigned int ny,
246 unsigned int nyz=ny*nz;
247 if(nx % 2 == 0 && ny % 2 == 0) {
248 unsigned int pinc=2*nz;
249 #ifndef FFTWPP_SINGLE_THREAD
250 #pragma omp parallel for num_threads(threads)
252 for(
unsigned int i=0; i < nx; i++) {
253 double *pstart=data+i*nyz;
254 double *pstop=pstart+nyz;
255 for(
double *
p=pstart+(1-(i % 2))*nz;
p < pstop;
p += pinc) {
256 for(
unsigned int k=0; k < nz; k++)
p[k]=-
p[k];
260 std::cerr <<
oddshift <<
" or odd ny" << std::endl;
268 doubles(doubles), sign(sign), threads(threads),
269 norm(1.0/(n ? n : doubles/2)),
plan(NULL) {
270 #ifndef FFTWPP_SINGLE_THREAD
282 if((
size_t) p %
sizeof(
Complex) == 0)
return;
283 std::cerr <<
"WARNING: " << s <<
" array is not " <<
sizeof(
Complex)
284 <<
"-byte aligned: address " << p << std::endl;
288 std::cerr <<
"Unable to construct FFTW plan" << std::endl;
293 #ifndef FFTWPP_SINGLE_THREAD
295 fftw_plan_with_nthreads(
threads);
314 for(
unsigned int i=0; i < N; ++i)
319 for(
unsigned int i=0; i < N; ++i)
324 if(S.
mean() < 100.0/CLOCKS_PER_SEC) N *= 2;
325 if(S.
count() >= 10) {
326 double error=S.
stdev();
328 if(diff >= 0.0 || t > stop) {
331 fftw_destroy_plan(planT);
336 fftw_destroy_plan(plan1);
351 #ifndef NO_CHECK_ALIGN
352 CheckAlign(in,constructor ?
"constructor input" :
"input");
353 if(out)
CheckAlign(out,constructor ?
"constructor output" :
"output");
379 planT=(*planner)(
this,in,
out);
402 fftw_execute_dft(
plan,(fftw_complex *) in,(fftw_complex *) out);
408 std::cerr <<
"ERROR: fft " <<
inout << std::endl;
442 #ifndef FFTWPP_SINGLE_THREAD
443 #pragma omp parallel for num_threads(threads)
445 for(
unsigned int i=0; i < stop; i++) out[i] *=
norm;
449 #ifndef FFTWPP_SINGLE_THREAD
450 #pragma omp parallel for num_threads(threads)
452 for(
unsigned int i=0; i <
doubles; i++) out[i] *=
norm;
472 template<
class I,
class O>
478 void Normalize(
unsigned int nx,
unsigned int M,
size_t ostride,
479 size_t odist, O *
out) {
480 unsigned int stop=nx*ostride;
481 O *outMdist=out+M*odist;
482 #ifndef FFTWPP_SINGLE_THREAD
483 #pragma omp parallel for num_threads(threads)
485 for(
unsigned int i=0; i < stop; i += ostride) {
487 for(O *
p=out+i;
p < pstop;
p += odist) {
493 template<
class I,
class O>
495 size_t odist,
I *in, O *
out=NULL,
bool shift=
false) {
510 #pragma GCC diagnostic push
511 #pragma GCC diagnostic ignored "-Wattributes"
513 #pragma GCC diagnostic pop
522 if(
size %
sizeof(
double) != 0) {
523 std::cerr <<
"ERROR: Transpose is not implemented for type of size "
528 if(rows == 0 || cols == 0)
return;
529 size /=
sizeof(double);
554 dims[0].is=cols*length;
559 dims[1].os=rows*length;
566 plan=fftw_plan_guru_r2r(0,NULL,3,dims,(
double *) in,(
double *)
out,
574 dims[0].n=rows-n*
ilast;
575 plan2=fftw_plan_guru_r2r(0,NULL,3,dims,(
double *) in,(
double *)
out,
581 dims[1].n=cols-m*
jlast;
582 plan2=fftw_plan_guru_r2r(0,NULL,3,dims,(
double *) in,(
double *)
out,
598 std::cerr <<
"ERROR: Transpose " <<
inout << std::endl;
601 #ifndef FFTWPP_SINGLE_THREAD
605 #pragma omp parallel for num_threads(A)
606 for(
unsigned int i=0; i <
a; ++i) {
608 #pragma omp parallel for num_threads(B)
609 for(
unsigned int j=0; j <
b; ++j) {
612 (
double *) in+
cols*I+J,
618 #pragma omp parallel for num_threads(A)
619 for(
unsigned int i=0; i <
a; ++i) {
622 (
double *) in+
cols*I,(
double *)
out+I);
627 #pragma omp parallel for num_threads(B)
628 for(
unsigned int j=0; j <
b; ++j) {
631 (
double *) in+J,(
double *)
out+
rows*J);
635 fftw_execute_r2r(
plan,(
double *) in,(
double*)
out);
639 template<
class T,
class L>
642 typedef std::map<T,threaddata,L>
Table;
645 typename Table::iterator
p=table.find(key);
646 return p == table.end() ?
threaddata() : p->second;
650 threadtable[key]=
data;
659 nx(nx), threads(threads), inplace(inplace) {}
664 return a.
nx < b.
nx || (a.
nx == b.
nx &&
677 nx(nx), ny(ny), threads(threads), inplace(inplace) {}
682 return a.
nx < b.
nx || (a.
nx == b.
nx &&
698 nx(nx), ny(ny), nz(nz), threads(threads), inplace(inplace) {}
703 return a.
nx < b.
nx || (a.
nx == b.
nx &&
745 fft1d(
int sign,
const Array::array1<Complex>& in,
746 const Array::array1<Complex>&
out=Array::NULL1,
759 return fftw_plan_dft_1d(
nx,(fftw_complex *) in,(fftw_complex *) out,
764 template<
class I,
class O>
776 :
fftw(), nx(nx), M(M), istride(istride), ostride(ostride),
777 idist(
Dist(nx,istride,idist)), odist(
Dist(nx,ostride,odist)),
784 fftw_plan planT1=
plan;
788 T=std::min(M,Threads);
796 fftw_destroy_plan(
plan2);
801 fftw_destroy_plan(
plan);
803 fftw_destroy_plan(
plan2);
812 fftw_destroy_plan(planT1);
820 fftw_plan
Plan(
int Q, fftw_complex *in, fftw_complex *
out) {
825 fftw_plan
Plan(
int Q,
double *in, fftw_complex *
out) {
830 fftw_plan
Plan(
int Q, fftw_complex *in,
double *
out) {
838 if(!
plan2)
return NULL;
841 return Plan(
Q,(
I *) in,(O *) out);
845 fftw_execute_dft(plan,in,out);
849 fftw_execute_dft_r2c(plan,in,out);
853 fftw_execute_dft_c2r(plan,in,out);
860 unsigned int extra=
T-
R;
861 #ifndef FFTWPP_SINGLE_THREAD
862 #pragma omp parallel for num_threads(T)
864 for(
unsigned int i=0; i <
T; ++i) {
869 unsigned int offset=iQ+i-extra;
917 fftw(std::
max(2*((nx-1)*istride+(M-1)*
Dist(nx,istride,idist)+1),
918 2*((nx-1)*ostride+(M-1)*
Dist(nx,ostride,odist)+1)),sign,
920 fftwblock<fftw_complex,fftw_complex>(nx,M,istride,ostride,idist,odist,in,
969 return fftw_plan_dft_r2c_1d(
nx,(
double *) in,(fftw_complex *) out,
effort);
973 fftw_execute_dft_r2c(
plan,(
double *) in,(fftw_complex *) out);
1017 return fftw_plan_dft_c2r_1d(
nx,(fftw_complex *) in,(
double *) out,
effort);
1021 fftw_execute_dft_c2r(
plan,(fftw_complex *) in,(
double *) out);
1059 2*(nx/2*ostride+(M-1)*odist+1)),-1,
threads,nx),
1075 #pragma GCC diagnostic push
1076 #pragma GCC diagnostic ignored "-Woverloaded-virtual"
1080 #pragma GCC diagnostic pop
1117 :
fftw(std::
max(2*(nx/2*istride+(M-1)*idist+1),
1134 #pragma GCC diagnostic push
1135 #pragma GCC diagnostic ignored "-Woverloaded-virtual"
1139 #pragma GCC diagnostic pop
1182 fft2d(
int sign,
const Array::array2<Complex>& in,
1183 const Array::array2<Complex>&
out=Array::NULL2,
1198 return fftw_plan_dft_2d(
nx,
ny,(fftw_complex *) in,(fftw_complex *) out,
1203 fftw_execute_dft(
plan,(fftw_complex *) in,(fftw_complex *) out);
1240 :
fftw(2*nx*(ny/2+1),-1,
threads,nx*ny), nx(nx), ny(ny) {
1245 return fftw_plan_dft_r2c_2d(
nx,
ny,(
double *) in,(fftw_complex *) out,
1254 fftw_execute_dft_r2c(
plan,(
double *) in,(fftw_complex *) out);
1259 unsigned int nyp=
ny/2+1;
1261 #ifndef FFTWPP_SINGLE_THREAD
1262 #pragma omp parallel for num_threads(threads)
1264 for(
unsigned int j=0; j < nyp; ++j)
1267 #ifndef FFTWPP_SINGLE_THREAD
1268 #pragma omp parallel for num_threads(threads)
1270 for(
unsigned int i=0; i <
nx; ++i)
1314 return fftw_plan_dft_c2r_2d(
nx,
ny,(fftw_complex *) in,(
double *) out,
1319 fftw_execute_dft_c2r(
plan,(fftw_complex *) in,(
double *) out);
1328 unsigned int nyp=
ny/2+1;
1330 #ifndef FFTWPP_SINGLE_THREAD
1331 #pragma omp parallel for num_threads(threads)
1333 for(
unsigned int j=0; j < nyp; ++j)
1336 #ifndef FFTWPP_SINGLE_THREAD
1337 #pragma omp parallel for num_threads(threads)
1339 for(
unsigned int i=0; i <
nx; ++i)
1382 fft3d(
int sign,
const Array::array3<Complex>& in,
1383 const Array::array3<Complex>&
out=Array::NULL3,
1390 return fftw_plan_dft_3d(
nx,
ny,
nz,(fftw_complex *) in,
1424 :
fftw(2*nx*ny*(nz/2+1),-1,
threads,nx*ny*nz), nx(nx), ny(ny), nz(nz) {
1431 nx(nx), ny(ny), nz(nz) {
Setup(in,
out);}
1434 return fftw_plan_dft_r2c_3d(
nx,
ny,
nz,(
double *) in,(fftw_complex *) out,
1443 fftw_execute_dft_r2c(
plan,(
double *) in,(fftw_complex *) out);
1448 unsigned int nzp=
nz/2+1;
1449 unsigned int yz=
ny*nzp;
1451 #ifndef FFTWPP_SINGLE_THREAD
1452 #pragma omp parallel for num_threads(threads)
1454 for(
unsigned int k=0; k < yz; ++k)
1459 #ifndef FFTWPP_SINGLE_THREAD
1460 #pragma omp parallel for num_threads(threads)
1462 for(
unsigned int i=0; i <
nx; ++i) {
1463 unsigned int iyz=i*yz;
1464 for(
unsigned int k=0; k < nzp; ++k)
1470 #ifndef FFTWPP_SINGLE_THREAD
1471 #pragma omp parallel for num_threads(threads)
1473 for(
unsigned int i=0; i <
nx; ++i)
1474 for(
unsigned int j=0; j <
ny; ++j)
1475 f[i*yz+(j+1)*nzp-1]=0.0;
1510 :
fftw(2*nx*ny*(nz/2+1),1,
threads,nx*ny*nz), nx(nx), ny(ny), nz(nz)
1519 return fftw_plan_dft_c2r_3d(
nx,
ny,
nz,(fftw_complex *) in,(
double *) out,
1524 fftw_execute_dft_c2r(
plan,(fftw_complex *) in,(
double *) out);
1533 unsigned int nzp=
nz/2+1;
1534 unsigned int yz=
ny*nzp;
1536 #ifndef FFTWPP_SINGLE_THREAD
1537 #pragma omp parallel for num_threads(threads)
1539 for(
unsigned int k=0; k < yz; ++k)
1544 #ifndef FFTWPP_SINGLE_THREAD
1545 #pragma omp parallel for num_threads(threads)
1547 for(
unsigned int i=0; i <
nx; ++i) {
1548 unsigned int iyz=i*yz;
1549 for(
unsigned int k=0; k < nzp; ++k)
1555 #ifndef FFTWPP_SINGLE_THREAD
1556 #pragma omp parallel for num_threads(threads)
1558 for(
unsigned int i=0; i <
nx; ++i)
1559 for(
unsigned int j=0; j <
ny; ++j)
1560 f[i*yz+(j+1)*nzp-1]=0.0;
crfft2d(unsigned int nx, unsigned int ny, Complex *in, double *out=NULL, unsigned int threads=maxthreads)
void Store(Table &threadtable, T key, const threaddata &data)
void Execute(fftw_plan plan, fftw_complex *in, double *out)
keytype1(unsigned int nx, unsigned int threads, bool inplace)
threaddata lookup(bool inplace, unsigned int threads)
static unsigned int effort
void store(bool inplace, const threaddata &data)
void fftNormalized(double *in, Complex *out, bool shift=false)
fftw_plan Plan(Complex *in, Complex *out)
void store(bool inplace, const threaddata &data)
void transpose(T *in, T *out=NULL)
Complex * Setout(Complex *in, Complex *out)
void fftNormalized(double *in, Complex *out=NULL)
rcfft1d(unsigned int nx, double *in, Complex *out=NULL, unsigned int threads=maxthreads)
static void Shift(double *data, unsigned int nx, unsigned int ny, unsigned int nz, unsigned int)
void multithread(unsigned int nx)
fftw_plan Plan(int Q, fftw_complex *in, double *out)
mfft1d(unsigned int nx, int sign, unsigned int M=1, size_t stride=1, size_t dist=0, Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads)
void fftNormalized(unsigned int nx, unsigned int M, size_t ostride, size_t odist, I *in, O *out=NULL, bool shift=false)
void store(bool inplace, const threaddata &data)
crfft3d(unsigned int nx, unsigned int ny, unsigned int nz, double *out=NULL, unsigned int threads=maxthreads)
threaddata lookup(bool inplace, unsigned int threads)
void Normalize(Complex *out)
void Normalize(Complex *out)
fftw(unsigned int doubles, int sign, unsigned int threads, unsigned int n=0)
static const char * oddshift
fftw_plan Plan(Complex *in, Complex *out)
void Execute(fftw_plan plan, double *in, fftw_complex *out)
fftw_plan Plan(Complex *in, Complex *out)
virtual void store(bool, const threaddata &)
fftwblock(unsigned int nx, unsigned int M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in, Complex *out, unsigned int Threads)
void fftNormalized(Complex *in, double *out, bool shift=false)
void store(bool inplace, const threaddata &data)
void Execute(fftw_plan plan, fftw_complex *in, fftw_complex *out)
mrcfft1d(unsigned int nx, unsigned int M, size_t istride, size_t ostride, size_t idist, size_t odist, double *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads)
void Execute(Complex *in, Complex *out, bool=false)
threaddata(unsigned int threads, double mean, double stdev)
keytype3(unsigned int nx, unsigned int ny, unsigned int nz, unsigned int threads, bool inplace)
rcfft3d(unsigned int nx, unsigned int ny, unsigned int nz, Complex *out=NULL, unsigned int threads=maxthreads)
void Execute(Complex *in, Complex *out, bool shift=false)
threaddata lookup(bool inplace, unsigned int threads)
static const char * WisdomName
unsigned int realsize(unsigned int n, Complex *in, Complex *out=NULL)
crfft2d(unsigned int nx, unsigned int ny, double *out=NULL, unsigned int threads=maxthreads)
vector< t2list > out
output of the algorithm: a list of clusters
void Normalize(double *out)
void fft0(Complex *in, double *out)
fftw_plan Plan(Complex *in, Complex *out)
ThreadBase(unsigned int threads)
void fft0(double *in, Complex *out=NULL)
static void Shift(Complex *data, unsigned int nx, unsigned int ny, unsigned int nz, unsigned int)
void fft(Complex *in, double *out)
bool operator()(const keytype3 &a, const keytype3 &b) const
void deNyquist(Complex *f)
double stdev(double var, double f)
static double testseconds
unsigned int ceilquotient(unsigned int a, unsigned int b)
void fft0Normalized(Complex *in, double *out=NULL)
virtual fftw_plan Plan(Complex *, Complex *)
std::complex< double > Complex
bool operator()(const keytype2 &a, const keytype2 &b) const
static unsigned int maxthreads
fft3d(unsigned int nx, unsigned int ny, unsigned int nz, int sign, Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads)
void deNyquist(Complex *f)
fftw_plan Planner(fftw *F, Complex *in, Complex *out)
void fft(Complex *in, Complex *out=NULL)
void Execute(Complex *in, Complex *out, bool=false)
void Threads(unsigned int nthreads)
void Execute(Complex *in, Complex *out, bool shift=false)
threaddata Setup(Complex *in, double *out)
fft1d(unsigned int nx, int sign, Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads)
virtual void Execute(Complex *in, Complex *out, bool=false)
#define FFTWPP_THREADS_PAR
unsigned int Dist(unsigned int n, size_t stride, size_t dist)
void Execute(Complex *in, Complex *out, bool shift=false)
void Normalize(double *out)
rcfft3d(unsigned int nx, unsigned int ny, unsigned int nz, double *in, Complex *out=NULL, unsigned int threads=maxthreads)
void Execute(Complex *in, Complex *out, bool=false)
fft2d(unsigned int nx, unsigned int ny, int sign, Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads)
void deNyquist(Complex *f)
fftw_plan Plan(Complex *in, Complex *out)
void Execute(Complex *in, Complex *out, bool=false)
Transpose(unsigned int rows, unsigned int cols, unsigned int length, T *in, T *out=NULL, unsigned int threads=fftw::maxthreads)
void Normalize(unsigned int nx, unsigned int M, size_t ostride, size_t odist, O *out)
virtual void fftNormalized(Complex *in, Complex *out=NULL, bool shift=false)
Complex * CheckAlign(Complex *in, Complex *out, bool constructor=true)
mfft1d(unsigned int nx, int sign, unsigned int M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in=NULL, Complex *out=NULL, unsigned int threads=maxthreads)
unsigned int innerthreads
void deNyquist(Complex *f)
rcfft1d(unsigned int nx, Complex *out=NULL, unsigned int threads=maxthreads)
fftw_plan Plan(Complex *in, Complex *out)
std::map< T, threaddata, L > Table
fftw_plan Plan(int Q, fftw_complex *in, fftw_complex *out)
virtual unsigned int Threads()
void Execute(Complex *in, Complex *out, bool shift=false)
void store(bool inplace, const threaddata &data)
static const double twopi
crfft1d(unsigned int nx, Complex *in, double *out=NULL, unsigned int threads=maxthreads)
threaddata Setup(Complex *in, Complex *out=NULL)
threaddata Lookup(Table &table, T key)
threaddata lookup(bool inplace, unsigned int threads)
threaddata lookup(bool inplace, unsigned int threads)
void fft0Normalized(I in, O out)
void store(bool inplace, const threaddata &data)
mcrfft1d(unsigned int nx, unsigned int M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in=NULL, double *out=NULL, unsigned int threads=maxthreads)
threaddata Setup(double *in, Complex *out=NULL)
Complex * ComplexAlign(size_t size)
rcfft2d(unsigned int nx, unsigned int ny, double *in, Complex *out=NULL, unsigned int threads=maxthreads)
static fftw_plan(* planner)(fftw *f, Complex *in, Complex *out)
void fftNormalized(Complex *in, double *out=NULL)
fftw_plan Plan(Complex *in, Complex *out)
crfft3d(unsigned int nx, unsigned int ny, unsigned int nz, Complex *in, double *out=NULL, unsigned int threads=maxthreads)
void fft(double *in, Complex *out=NULL)
void store(bool inplace, const threaddata &data)
virtual threaddata lookup(bool, unsigned int)
fftw_plan Plan(Complex *in, Complex *out)
void CheckAlign(Complex *p, const char *s)
static void planThreads(unsigned int)
static void Shift(double *data, unsigned int nx, unsigned int ny, unsigned int)
fftw_plan Plan(Complex *in, Complex *out)
threaddata lookup(bool inplace, unsigned int threads)
fftw_plan Plan(Complex *in, Complex *out)
void deleteAlign(T *v, size_t len)
rcfft2d(unsigned int nx, unsigned int ny, Complex *out=NULL, unsigned int threads=maxthreads)
fftw_plan Plan(int Q, double *in, fftw_complex *out)
void fft0(Complex *in, Complex *out=NULL)
threaddata lookup(bool inplace, unsigned int threads)
static void Shift(Complex *data, unsigned int nx, unsigned int ny, unsigned int)
crfft1d(unsigned int nx, double *out=NULL, unsigned int threads=maxthreads)
bool operator()(const keytype1 &a, const keytype1 &b) const
keytype2(unsigned int nx, unsigned int ny, unsigned int threads, bool inplace)
threaddata time(fftw_plan plan1, fftw_plan planT, Complex *in, Complex *out, unsigned int Threads)
void fft0Normalized(double *in, Complex *out=NULL)