V3FIT
fft99.f
1  SUBROUTINE fft99(a, work, trigs, ifax, inc, jump, n, lot, isign)
2  USE stel_kinds
3  IMPLICIT NONE
4 C-----------------------------------------------
5 C D u m m y A r g u m e n t s
6 C-----------------------------------------------
7  INTEGER inc, jump, n, lot, isign
8  INTEGER :: ifax(13)
9  REAL(rprec) :: a(lot*(n+2)), work(lot*(n+1)), trigs(3*n/2+1)
10 C-----------------------------------------------
11 C L o c a l V a r i a b l e s
12 C-----------------------------------------------
13  INTEGER::nfax,nx,nh,ink,igo,ibase,jbase,l,i,j,m,ia,la,k,ib
14 C-----------------------------------------------
15 c
16 c purpose performs multiple fast fourier transforms. this package
17 c will perform a number of simultaneous REAL/half-complex
18 c periodic fourier transforms or corresponding inverse
19 c transforms, i.e. given a set of REAL data vectors, the
20 c package returns a set of "half-complex" fourier
21 c coefficient vectors, or vice versa. the length of the
22 c transforms must be an even number greater than 4 that has
23 c no other factors except possibly powers of 2, 3, and 5.
24 c this is an ALL fortran version of the craylib package
25 c that is mostly written in cal.
26 c
27 c the package fft99f CONTAINS several user-level routines:
28 c
29 c SUBROUTINE fftfax
30 c an initialization routine that must be called once
31 c before a sequence of calls to the fft routines
32 c (provided that n is not changed).
33 c
34 c SUBROUTINEs fft99 and fft991
35 c two fft routines that RETURN slightly different
36 c arrangements of the data in gridpoint space.
37 c
38 c
39 c access this fortran version may be accessed with
40 c
41 c *fortran,p=xlib,sn=fft99f
42 c
43 c to access the cray object code, CALLing the user ENTRY
44 c points from a cray PROGRAM is sufficient. the source
45 c fortran and cal code for the craylib version may be
46 c accessed using
47 c
48 c fetch p=craylib,sn=fft99
49 c
50 c usage let n be of the form 2**p * 3**q * 5**r, WHERE p .ge. 1,
51 c q .ge. 0, and r .ge. 0. THEN a typical sequence of
52 c calls to transform a given set of REAL vectors of length
53 c n to a set of "half-complex" fourier coefficient vectors
54 c of length n is
55 c
56 c DIMENSION ifax(13),trigs(3*n/2+1),a(m*(n+2)),
57 c + work(m*(n+1))
58 c
59 c CALL fftfax (n, ifax, trigs)
60 c CALL fft99 (a,work,trigs,ifax,inc,jump,n,m,isign)
61 c
62 c see the individual WRITE-ups for fftfax, fft99, and
63 c fft991 below, for a detailed description of the
64 c arguments.
65 c
66 c history the package was written by clive temperton at ecmwf in
67 c november, 1978. it was modified, documented, and tested
68 c for ncar by russ rew in september, 1980.
69 c
70 c-----------------------------------------------------------------------
71 c
72 c SUBROUTINE fftfax (n,ifax,trigs)
73 c
74 c purpose a set-up routine for fft99 and fft991. it need ONLY be
75 c called once before a sequence of calls to the fft
76 c routines (provided that n is not changed).
77 c
78 c argument ifax(13),trigs(3*n/2+1)
79 c dimensions
80 c
81 c arguments
82 c
83 c on input n
84 c an even number greater than 4 that has no prime factor
85 c greater than 5. n is the length of the transforms (see
86 c the documentation for fft99 and fft991 for the
87 c definitions of the transforms).
88 c
89 c ifax
90 c an INTEGER array. the number of elements actually used
91 c will depend on the factorization of n. dimensioning
92 c ifax for 13 suffices for ALL n less than a million.
93 c
94 c trigs
95 c a floating point array of DIMENSION 3*n/2 IF n/2 is
96 c even, or 3*n/2+1 IF n/2 is odd.
97 c
98 c on output ifax
99 c CONTAINS the factorization of n/2. ifax(1) is the
100 c number of factors, and the factors themselves are stored
101 c in ifax(2),ifax(3),... IF fftfax is called with n odd,
102 c or IF n has ANY prime factors greater than 5, ifax(1)
103 c is set to -99.
104 c
105 c trigs
106 c an array of trignomentric FUNCTION values subsequently
107 c used by the fft routines.
108 c
109 c-----------------------------------------------------------------------
110 c
111 c SUBROUTINE fft991 (a,work,trigs,ifax,inc,jump,n,m,isign)
112 c and
113 c SUBROUTINE fft99 (a,work,trigs,ifax,inc,jump,n,m,isign)
114 c
115 c purpose perform a number of simultaneous REAL/half-complex
116 c periodic fourier transforms or corresponding inverse
117 c transforms, using ordinary spatial order of gridpoint
118 c values (fft991) or explicit cyclic continuity in the
119 c gridpoint values (fft99). given a set
120 c of REAL data vectors, the package returns a set of
121 c "half-complex" fourier coefficient vectors, or vice
122 c versa. the length of the transforms must be an even
123 c number that has no other factors except possibly powers
124 c of 2, 3, and 5. these version of fft991 and fft99 are
125 c optimized for USE on the cray-1.
126 c
127 c argument a(m*(n+2)), work(m*(n+1)), trigs(3*n/2+1), ifax(13)
128 c dimensions
129 c
130 c arguments
131 c
132 c on input a
133 c an array of length m*(n+2) containing the input data
134 c or coefficient vectors. this array is overwritten by
135 c the results.
136 c
137 c work
138 c a work array of DIMENSION m*(n+1)
139 c
140 c trigs
141 c an array set up by fftfax, which must be called first.
142 c
143 c ifax
144 c an array set up by fftfax, which must be called first.
145 c
146 c inc
147 c the increment (in words) between successive elements of
148 c each data or coefficient vector (e.g. inc=1 for
149 c consecutively stored data).
150 c
151 c jump
152 c the increment (in words) between the first elements of
153 c successive data or coefficient vectors. on the cray-1,
154 c try to arrange data so that jump is not a multiple of 8
155 c (to avoid memory bank conflicts). for clarification of
156 c inc and jump, see the examples below.
157 c
158 c n
159 c the length of each transform (see definition of
160 c transforms, below).
161 c
162 c m
163 c the number of transforms to be done simultaneously.
164 c
165 c isign
166 c = +1 for a transform from fourier coefficients to
167 c gridpoint values.
168 c = -1 for a transform from gridpoint values to fourier
169 c coefficients.
170 c
171 c on output a
172 c IF isign = +1, and m coefficient vectors are supplied
173 c each containing the sequence:
174 c
175 c a(0),b(0),a(1),b(1),...,a(n/2),b(n/2) (n+2 values)
176 c
177 c THEN the result consists of m data vectors each
178 c containing the corresponding n+2 gridpoint values:
179 c
180 c for fft991, x(0), x(1), x(2),...,x(n-1),0,0.
181 c for fft99, x(n-1),x(0),x(1),x(2),...,x(n-1),x(0).
182 c (explicit cyclic continuity)
183 c
184 c when isign = +1, the transform is defined by:
185 c x(j)=SUM(k=0,...,n-1)(c(k)*EXP(2*i*j*k*pi/n))
186 c WHERE c(k)=a(k)+i*b(k) and c(n-k)=a(k)-i*b(k)
187 c and i=SQRT (-1)
188 c
189 c IF isign = -1, and m data vectors are supplied each
190 c containing a sequence of gridpoint values x(j) as
191 c defined above, THEN the result consists of m vectors
192 c each containing the corresponding fourier cofficients
193 c a(k), b(k), 0 .le. k .le n/2.
194 c
195 c when isign = -1, the inverse transform is defined by:
196 c c(k)=(1/n)*SUM(j=0,...,n-1)(x(j)*EXP(-2*i*j*k*pi/n))
197 c WHERE c(k)=a(k)+i*b(k) and i=SQRT(-1)
198 c
199 c a CALL with isign=+1 followed by a CALL with isign=-1
200 c (or vice versa) returns the original data.
201 c
202 c note: the fact that the gridpoint values x(j) are REAL
203 c implies that b(0)=b(n/2)=0. for a CALL with isign=+1,
204 c it is not actually necessary to supply these zeros.
205 c
206 c examples given 19 data vectors each of length 64 (+2 for explicit
207 c cyclic continuity), compute the corresponding vectors of
208 c fourier coefficients. the data may, for example, be
209 c arranged like this:
210 c
211 c first data a(1)= . . . a(66)= a(70)
212 c vector x(63) x(0) x(1) x(2) ... x(63) x(0) (4 empty locations)
213 c
214 c second data a(71)= . . . a(140)
215 c vector x(63) x(0) x(1) x(2) ... x(63) x(0) (4 empty locations)
216 c
217 c and so on. here inc=1, jump=70, n=64, m=19, isign=-1,
218 c and fft99 should be used (because of the explicit cyclic
219 c continuity).
220 c
221 c alternatively the data may be arranged like this:
222 c
223 c first second last
224 c data data data
225 c vector vector vector
226 c
227 c a(1)= a(2)= a(19)=
228 c
229 c x(63) x(63) . . . x(63)
230 c a(20)= x(0) x(0) . . . x(0)
231 c a(39)= x(1) x(1) . . . x(1)
232 c . . .
233 c . . .
234 c . . .
235 c
236 c in which CASE we have inc=19, jump=1, and the remaining
237 c parameters are the same as before. in either CASE, each
238 c coefficient vector overwrites the corresponding input
239 c data vector.
240 c
241 c-----------------------------------------------------------------------
242 c
243 c SUBROUTINE 'fft99' - multiple fast REAL periodic transform
244 c corresponding to old scalar routine fft9
245 c PROCEDURE used to convert to half-length complex transform
246 c is given by cooley, lewis and welch (j. sound vib., vol. 12
247 c (1970), 315-337)
248 c
249 c a is the array containing input and output data
250 c work is an area of SIZE (n+1)*lot
251 c trigs is a previously prepared list of trig FUNCTION values
252 c ifax is a previously prepared list of factors of n/2
253 c inc is the increment within each data "vector"
254 c (e.g. inc=1 for consecutively stored data)
255 c jump is the increment between the start of each data vector
256 c n is the length of the data vectors
257 c lot is the number of data vectors
258 c isign = +1 for transform from spectral to gridpoint
259 c = -1 for transform from gridpoint to spectral
260 c
261 c ordering of coefficients:
262 c a(0),b(0),a(1),b(1),a(2),b(2),...,a(n/2),b(n/2)
263 c WHERE b(0)=b(n/2)=0; (n+2) locations required
264 c
265 c ordering of data:
266 c x(n-1),x(0),x(1),x(2),...,x(n),x(0)
267 c i.e. explicit cyclic continuity; (n+2) locations required
268 c
269 c vectorization is achieved on cray by DOing the transforms in
270 c parallel
271 c
272 c *** n.b. n is assumed to be an even number
273 c
274 c definition of transforms:
275 c -------------------------
276 c
277 c isign=+1: x(j)=SUM(k=0,...,n-1)(c(k)*EXP(2*i*j*k*pi/n))
278 c WHERE c(k)=a(k)+i*b(k) and c(n-k)=a(k)-i*b(k)
279 c
280 c isign=-1: a(k)=(1/n)*SUM(j=0,...,n-1)(x(j)*COS(2*j*k*pi/n))
281 c b(k)=-(1/n)*SUM(j=0,...,n-1)(x(j)*SIN(2*j*k*pi/n))
282 c
283  nfax=ifax(1)
284  nx=n+1
285  nh=n/2
286  ink=inc+inc
287  IF (isign.eq.+1) GOTO 30
288 c
289 c IF necessary, transfer data to work area
290  igo=50
291  IF (mod(nfax,2).eq.1) GOTO 40
292  ibase=inc+1
293  jbase=1
294  DO 20 l=1,lot
295  i=ibase
296  j=jbase
297 cdir$ ivdep
298  DO 10 m=1,n
299  work(j)=a(i)
300  i=i+inc
301  j=j+1
302  10 CONTINUE
303  ibase=ibase+jump
304  jbase=jbase+nx
305  20 CONTINUE
306 c
307  igo=60
308  GOTO 40
309 c
310 c preprocessing (isign=+1)
311 c ------------------------
312 c
313  30 CONTINUE
314  CALL fft99a(a,work,trigs,inc,jump,n,lot)
315  igo=60
316 c
317 c complex transform
318 c -----------------
319 c
320  40 CONTINUE
321  ia=inc+1
322  la=1
323  DO 80 k=1,nfax
324  IF (igo.eq.60) GOTO 60
325  50 CONTINUE
326  CALL vpassm(a(ia),a(ia+inc),work(1),work(2),trigs,
327  * ink,2,jump,nx,lot,nh,ifax(k+1),la)
328  igo=60
329  GOTO 70
330  60 CONTINUE
331  CALL vpassm(work(1),work(2),a(ia),a(ia+inc),trigs,
332  * 2,ink,nx,jump,lot,nh,ifax(k+1),la)
333  igo=50
334  70 CONTINUE
335  la=la*ifax(k+1)
336  80 CONTINUE
337 c
338  IF (isign.eq.-1) GOTO 130
339 c
340 c IF necessary, transfer data from work area
341  IF (mod(nfax,2).eq.1) GOTO 110
342  ibase=1
343  jbase=ia
344  DO 100 l=1,lot
345  i=ibase
346  j=jbase
347 cdir$ ivdep
348  DO 90 m=1,n
349  a(j)=work(i)
350  i=i+1
351  j=j+inc
352  90 CONTINUE
353  ibase=ibase+nx
354  jbase=jbase+jump
355  100 CONTINUE
356 c
357 c fill in cyclic boundary points
358  110 CONTINUE
359  ia=1
360  ib=n*inc+1
361 cdir$ ivdep
362  DO 120 l=1,lot
363  a(ia)=a(ib)
364  a(ib+inc)=a(ia+inc)
365  ia=ia+jump
366  ib=ib+jump
367  120 CONTINUE
368  GOTO 140
369 c
370 c postprocessing (isign=-1):
371 c --------------------------
372 c
373  130 CONTINUE
374  CALL fft99b(work,a,trigs,inc,jump,n,lot)
375 c
376  140 CONTINUE
377 
378  END SUBROUTINE fft99