2 USE stel_kinds,
ONLY: dp
3 USE stel_constants,
ONLY: one, zero
8 INTEGER :: m, mblk_size, icntl(9), info(3)
9 INTEGER :: endglobrow, startglobrow, iam, nprocs,
12 INTEGER :: my_comm=mpi_comm_world,
13 1 my_comm_world=mpi_comm_world
18 INTEGER,
POINTER :: rcounts(:), disp(:)
19 LOGICAL :: lactive = .true.
20 REAL(dp) :: cntl(5), ftol
21 LOGICAL :: lverbose = .true.
22 LOGICAL :: l_nonlinear = .true.
25 INTEGER,
PARAMETER :: finish=0, matveci=1, precondleft=2,
26 1 precondright=3, dotprod=4, peek=5
30 SUBROUTINE gmres_ser (n, gi, yAx, apply_precond,
36 TYPE(GMRES_INFO) :: gi
37 REAL(dp),
INTENT(IN) :: b(n)
38 REAL(dp),
INTENT(INOUT) :: x0(n)
43 INTEGER :: revcom, colx, coly, colz, nbscal, m, iam
45 INTEGER :: nout, lwork, icount, jcount, kout, kprint
46 REAL(dp) :: rinfo(2), fsq_nl=-1, fsq_lin, fsq_min,
47 1 delfsq, fsq_last, bnorm, xmod, xmax,
49 REAL(dp),
TARGET,
ALLOCATABLE :: work(:)
50 REAL(dp),
ALLOCATABLE :: xmin(:)
51 REAL(dp),
POINTER :: sx(:), sy(:), sz(:)
52 LOGICAL :: lprint, lstore, l_nonlinear
54 EXTERNAL yax, apply_precond, getnlforce
63 bnorm = sqrt(sum(b*b))
64 IF (bnorm .EQ. 0)
RETURN
68 fsq_min = 1; fsq_min_lin = 1
70 jcount = 0; icount = 0
72 l_nonlinear = gi%l_nonlinear
76 lprint = (iam.EQ.0 .AND. gi%lverbose)
79 lwork = m**2 + m*(n+6) + 6*n + 1
81 ALLOCATE (work(lwork), stat=nout)
82 IF (nout .NE. 0) stop
'Allocation error in gmres!'
84 IF (gi%icntl(6) .EQ. 1) work(1:n) = x0/bnorm
85 work(n+1:2*n) = b(1:n)/bnorm
88 900
FORMAT(/,1x,
'GMRES (SERIAL) CONVERGENCE SUMMARY',/,1x,15(
'-'))
95 CALL drive_dgmres(n, n, m, lwork, work, irc,
96 & gi%icntl, gi%cntl, gi%info, rinfo)
102 sx => work(colx:); sy => work(coly:); sz => work(colz:)
104 IF (revcom .EQ. matveci)
THEN
113 ELSE IF (revcom.eq.precondleft)
THEN
118 CALL dcopy(n,sx,1,sz,1)
120 CALL apply_precond(sz)
123 ELSE IF (revcom .EQ. precondright)
THEN
127 CALL dcopy(n,sx,1,sz,1)
129 CALL apply_precond(sz)
132 ELSE IF (revcom .EQ. dotprod)
THEN
138 DO nout = 0, nbscal-1
139 work(colz+nout) = sum(work(colx:colx+n-1)*work(coly:coly+n-1))
140 CALL truncate(work(colz+nout), 15)
154 ELSE IF (revcom .EQ. peek)
THEN
156 IF (l_nonlinear)
CALL getnlforce(work(colx), fsq_nl, bnorm)
158 fsq_lin = (rinfo(1)*bnorm)**2
160 lstore = (fsq_nl.LT.fsq_min .OR. icount.EQ.1)
161 fsq_min = min(fsq_min, fsq_nl)
162 fsq_min_lin = min(fsq_min_lin, fsq_lin)
163 IF (fsq_lin .LT. 1.e-30_dp) gi%icntl(7)=gi%info(1)
165 IF (lprint .AND. icount.EQ.1)
THEN
166 IF (l_nonlinear)
THEN
174 IF (icount.EQ.1 .OR. mod(icount,10).EQ.0)
THEN
176 xmod = bnorm*sqrt(sum(work(colx:colx+n-1)**2))
177 xmax = bnorm*maxval(abs(work(colx:colx+n-1)))
178 IF (icount .EQ. 1)
THEN
179 fsq_min = fsq_nl; fsq_min_lin = fsq_lin
182 IF (l_nonlinear)
THEN
183 print 910, kout, fsq_nl, xmod, xmax, fsq_lin
185 print 911, kout, xmod, xmax, fsq_lin
190 IF (gi%ngmres_type .EQ. 1)
GOTO 10
192 delfsq = (fsq_last-fsq_nl)/fsq_min
193 IF (delfsq .LT. 0.05_dp)
THEN
200 IF (fsq_nl.GT.(3*fsq_min) .OR. jcount.GT.3
201 & .OR. fsq_nl.LE.gi%ftol) gi%icntl(7) = gi%info(1)
203 IF (.NOT.
ALLOCATED(xmin))
ALLOCATE (xmin(n))
204 xmin = work(colx:colx+n-1)
209 905
FORMAT(1x,
'ITER',7x,
'FSQ_NL',10x,
'||X||',9x,
'MAX|X|',9x,
'FSQ_ARN')
210 906
FORMAT(1x,
'ITER',7x,
'||X||',11x,
'MAX|X|',9x,
'FSQ_ARN')
211 910
FORMAT(i5, 4(3x,1pe12.3))
212 911
FORMAT(i5, 3(3x,1pe12.3))
216 IF (
ALLOCATED (xmin))
THEN
222 IF (.NOT.l_nonlinear) fsq_min = fsq_lin
227 IF (lprint .AND. kprint.NE.kout)
THEN
228 xmod = sqrt(sum(x0*x0)); xmax = maxval(abs(x0))
229 IF (l_nonlinear)
THEN
230 print 910, kout, fsq_min, xmod, xmax, fsq_min_lin
232 print 911, kout, xmod, xmax, fsq_lin
242 END SUBROUTINE gmres_ser
244 SUBROUTINE gmres_par (n, gi, yAx, apply_precond, GetNLForce,
249 INTEGER,
INTENT(IN) :: n
250 TYPE(GMRES_INFO) :: gi
251 REAL(dp),
INTENT(IN) :: b(n)
252 REAL(dp),
INTENT(INOUT) :: x0(n)
253 EXTERNAL apply_precond, yax, getnlforce
258 INTEGER :: revcom, colx, coly, colz, nbscal, m, iam
259 INTEGER :: irc(5), jcount, icount
260 INTEGER :: nout, lwork, kout, kprint, MPI_ERR,
261 & my_comm_world, my_comm
262 INTEGER,
ALLOCATABLE :: itest(:)
263 REAL(dp) :: rinfo(2), fsq_nl, fsq_min, fsq_last, delfsq, fsq_lin,
265 REAL(dp),
ALLOCATABLE :: work(:), xmin(:)
266 INTEGER :: nloc, myrowstart, myrowend
267 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: aux, tmpbuf
268 REAL(dp) :: skston, skstoff, xmax, xmod, bnorm, lsum, gsum
269 LOGICAL,
PARAMETER :: lfast=.false.
270 LOGICAL :: lactive = .true., lprint, lstore, l_nonlinear
280 my_comm_world = gi%MY_COMM_WORLD
282 l_nonlinear = gi%l_nonlinear
284 fsq_min = 1; fsq_min_lin = 1
286 jcount = 0; icount = 0; kout = 0; kprint = 0
288 nloc=(gi%endglobrow-gi%startglobrow+1)*gi%mblk_size
289 myrowstart=(gi%startglobrow-1)*gi%mblk_size+1
290 myrowend=myrowstart+nloc-1
292 IF (.NOT.lactive)
THEN
294 myrowstart = 1; myrowend = myrowstart
297 ALLOCATE(tmpbuf(n), aux(n), itest(gi%nprocs), stat=nout)
298 IF (nout .NE. 0) stop
'Allocation error in gmres_fun!'
300 m = gi%m; iam = gi%iam;
301 lprint = (iam.EQ.0 .AND. gi%lverbose .AND. lactive)
302 lwork = m**2 + m*(nloc+6) + 6*nloc + 1
303 ALLOCATE (work(lwork), stat=nout)
304 IF (nout .NE. 0) stop
'Allocation error in gmres_fun!'
309 lactive0:
IF (lactive)
THEN
310 lsum=sum(b(myrowstart:myrowend)**2)
311 CALL mpi_allreduce(lsum,gsum,1,mpi_real8,mpi_sum,my_comm,mpi_err)
313 IF (bnorm .EQ. 0)
RETURN
315 IF (gi%icntl(6) .EQ. 1)
316 & work(1:nloc) = x0(myrowstart:myrowend)/bnorm
318 work(nloc+1:2*nloc) = b(myrowstart:myrowend)/bnorm
320 IF (lprint) print 900
321 900
FORMAT(/,1x,
'GMRES (PARALLEL) CONVERGENCE SUMMARY',/,1x,15(
'-'))
325 CALL mpi_bcast(bnorm, 1, mpi_real8, 0, my_comm_world,
335 CALL drive_dgmres(n, nloc, m, lwork, work, irc,
336 & gi%icntl, gi%cntl, gi%info, rinfo)
342 CALL mpi_bcast(irc(1), 1, mpi_integer, 0, my_comm_world,
344 CALL mpi_bcast(rinfo(1), 1, mpi_real8, 0, my_comm_world,
353 revcom_loop:
IF (revcom .EQ. matveci)
THEN
356 CALL yax (work(colx), work(colz), nloc)
359 ELSE IF (revcom .EQ. precondleft)
THEN
363 IF (lactive)
CALL dcopy(nloc,work(colx),1,work(colz),1)
366 ELSE IF (revcom .EQ. precondright)
THEN
367 CALL dcopy(nloc,work(colx),1,work(colz),1)
370 aux(myrowstart:myrowend) = work(colz:colz+nloc-1)
372 CALL apply_precond(aux)
373 work(colz:colz+nloc-1)=aux(myrowstart:myrowend)
377 ELSE IF (revcom .EQ. dotprod)
THEN
381 lactive_dp:
IF (lactive)
THEN
383 CALL mpi_allgather(nbscal, 1, mpi_integer, itest, 1,
384 & mpi_integer, my_comm, mpi_err)
385 IF (lprint .AND. any(itest(1:gi%nprocs) .NE. nbscal))
THEN
386 print *,
'itest: ',itest(1:gi%nprocs)
387 stop
'nbscal not same on all procs!'
397 aux(nout)= sum(work(colx:colx+nloc-1)*work(coly:coly+nloc-1))
401 CALL mpi_allreduce(aux,work(colz),nbscal,mpi_real8,mpi_sum,
408 CALL mpi_gatherv(work(coly), nloc, mpi_real8, aux, gi%rcounts,
409 & gi%disp, mpi_real8, 0, my_comm, mpi_err)
412 CALL mpi_gatherv(work(colx),nloc,mpi_real8,tmpbuf,gi%rcounts,
413 & gi%disp,mpi_real8,0,my_comm,mpi_err)
414 IF (iam .EQ. 0) work(colz+nout) = sum(tmpbuf*aux)
418 CALL mpi_bcast(work(colz), nbscal, mpi_real8, 0, my_comm,
423 CALL truncate(work(colz+nout), 15)
429 ELSE IF (revcom .EQ. peek)
THEN
432 aux(myrowstart:myrowend) = work(colx:colx+nloc-1)
435 IF (l_nonlinear)
CALL getnlforce(aux, fsq_nl, bnorm)
437 fsq_lin = (rinfo(1)*bnorm)**2
439 lstore = (fsq_nl.LT.fsq_min .OR. icount.EQ.1)
440 fsq_min = min(fsq_min, fsq_nl)
441 fsq_min_lin = min(fsq_min_lin, fsq_lin)
443 IF (fsq_lin .LT. 1.e-30_dp) gi%icntl(7)=gi%info(1)
445 IF (lprint .AND. icount.EQ.1)
THEN
446 IF (l_nonlinear)
THEN
454 lactive1:
IF (lactive)
THEN
456 IF (icount.EQ.1 .OR. mod(icount,10).EQ.0)
THEN
457 lsum=sum(aux(myrowstart:myrowend)**2)
458 CALL mpi_reduce(lsum, gsum, 1, mpi_real8, mpi_sum, 0,
460 xmod=bnorm*sqrt(gsum)
461 lsum=maxval(abs(aux(myrowstart:myrowend)))
462 CALL mpi_reduce(lsum, xmax, 1, mpi_real8, mpi_max, 0,
467 IF (l_nonlinear)
THEN
468 print 910, kout, fsq_nl, xmod, xmax, fsq_lin
470 print 911, kout, xmod, xmax, fsq_lin
477 IF (gi%ngmres_type .EQ. 1)
GOTO 10
480 delfsq = (fsq_last-fsq_nl)/fsq_min
481 IF (delfsq .LT. 0.05_dp)
THEN
488 IF (fsq_nl.GT.(3*fsq_min) .OR. jcount.GT.3
489 & .OR. fsq_nl.LE.gi%ftol) gi%icntl(7) = gi%info(1)
491 IF (.NOT.
ALLOCATED(xmin))
ALLOCATE (xmin(nloc))
492 xmin = work(colx:colx+nloc-1)
498 905
FORMAT(1x,
'ITER',7x,
'FSQ_NL',10x,
'||X||',9x,
'MAX|X|',9x,
'FSQ_ARN')
499 906
FORMAT(1x,
'ITER',7x,
'||X||',11x,
'MAX|X|',9x,
'FSQ_ARN')
500 910
FORMAT(i5, 4(3x,1pe12.3))
501 911
FORMAT(i5, 3(3x,1pe12.3))
508 lactive2:
IF (lprint .AND. lactive)
THEN
511 OPEN(nout,file=
'sol_dTestgmres',status=
'unknown')
512 IF (gi%icntl(5).EQ.0)
THEN
513 WRITE(nout,*)
'Orthogonalisation : MGS'
514 ELSEIF (gi%icntl(5).eq.1)
THEN
515 WRITE(nout,*)
'Orthogonalisation : IMGS'
516 ELSEIF (gi%icntl(5).eq.2)
THEN
517 WRITE(nout,*)
'Orthogonalisation : CGS'
518 ELSEIF (gi%icntl(5).eq.3)
THEN
519 WRITE(nout,*)
'Orthogonalisation : ICGS'
521 WRITE(nout,*)
'Restart : ', m
522 WRITE(nout,*)
'info(1) = ',gi%info(1),
' info(2) = ',gi%info(2)
523 WRITE(nout,*)
'rinfo(1) = ',rinfo(1),
' rinfo(2) = ',rinfo(2)
524 WRITE(nout,*)
'Optimal workspace = ', gi%info(3)
525 WRITE(nout,*)
'Solution : '
527 WRITE(nout,*) work(jcount)
535 lactive3:
IF (lactive)
THEN
536 IF (
ALLOCATED (xmin))
THEN
537 work(1:nloc) = xmin(1:nloc)
543 CALL mpi_allgatherv(work, nloc, mpi_real8, tmpbuf, gi%rcounts,
544 & gi%disp, mpi_real8, my_comm, mpi_err)
547 IF (lprint .AND. kout.NE.kprint)
THEN
548 xmod = sqrt(sum(x0*x0)); xmax = maxval(abs(x0))
549 IF (l_nonlinear)
THEN
550 print 910, kout, fsq_min, xmod, xmax, fsq_min_lin
552 print 911, kout, xmod, xmax, fsq_lin
565 DEALLOCATE (work, tmpbuf, aux, itest)
567 END SUBROUTINE gmres_par
569 SUBROUTINE truncate(num, iprec0)
570 USE stel_kinds,
ONLY: dp
574 INTEGER,
INTENT(IN) :: iprec0
575 REAL(dp),
INTENT(INOUT) :: num
579 CHARACTER*24 :: chnum, tchnum
587 WRITE (chnum,
'(a,i2,a,i2,a)')
'(1p,e',iprec0+7,
'.',iprec0,
')'
588 WRITE (tchnum, chnum) num
590 READ (tchnum, chnum) num
592 END SUBROUTINE truncate