2 USE stel_kinds,
ONLY: dp
5 USE vparams,
ONLY: nthreed, one, zero
6 USE vmec_input,
ONLY: ntor, nzeta, lfreeb, lasym
10 USE parallel_include_module
16 INTEGER,
PRIVATE,
PARAMETER :: sp = dp
17 INTEGER,
PRIVATE :: ntyptot, m_2d, n_2d, ntype_2d
18 INTEGER,
PRIVATE,
ALLOCATABLE :: ipiv_blk(:,:)
19 INTEGER,
PRIVATE :: mblk_size
20 INTEGER,
PRIVATE :: mystart(3), myend(3)
21 LOGICAL,
PRIVATE :: FIRSTPASS=.true.
22 REAL(sp),
PRIVATE,
ALLOCATABLE,
DIMENSION(:,:,:,:,:,:,:) ::
23 1 block_diag, block_plus, block_mins,
24 2 block_dsave, block_msave, block_psave
25 REAL(sp),
PRIVATE,
ALLOCATABLE,
DIMENSION(:,:,:,:,:,:) ::
26 1 block_diag_sw, block_plus_sw, block_mins_sw
28 REAL(dp),
PRIVATE,
DIMENSION(:,:,:,:),
ALLOCATABLE :: gc_save
29 REAL(dp) :: ctor_prec2d
30 INTEGER :: ictrl_prec2d
31 LOGICAL :: lHess_exact = .true.,
32 1 l_backslv = .false.,
33 2 l_comp_prec2d = .true.,
37 LOGICAL,
PARAMETER,
PRIVATE :: lscreen = .false.
38 INTEGER,
PARAMETER,
PRIVATE :: jstart(3) = (/1,2,3/)
40 PRIVATE :: swap_forces, reswap_forces
45 CHARACTER(LEN=3) :: FlashDrive =
""
46 CHARACTER(LEN=128) :: ScratchFile=
""
47 INTEGER,
PARAMETER :: blmin=1, bldia=2, blpls=3
48 REAL(sp),
ALLOCATABLE,
DIMENSION(:,:,:) :: DataItem
49 INTEGER :: iunit_dacess=10
50 LOGICAL :: lswap2disk = .false.
51 INTEGER,
PARAMETER :: LOWER=3,diag=2,upper=1
72 SUBROUTINE swap_forces(gc, temp, mblk, nblocks)
76 INTEGER :: mblk, nblocks
77 REAL(dp),
DIMENSION(nblocks,mblk),
INTENT(in) :: gc
78 REAL(dp),
DIMENSION(mblk,nblocks),
INTENT(out) :: temp
87 END SUBROUTINE swap_forces
89 SUBROUTINE reswap_forces(temp, gc, mblk, nblocks)
93 INTEGER :: mblk, nblocks
94 REAL(dp),
DIMENSION(nblocks,mblk),
INTENT(inout) :: gc
95 REAL(dp),
DIMENSION(mblk,nblocks),
INTENT(in) :: temp
103 END SUBROUTINE reswap_forces
105 SUBROUTINE block_precond(gc)
109 REAL(dp),
DIMENSION(ns,0:ntor,0:mpol1,ntyptot) :: gc
113 INTEGER :: mblk, m, n, js, ntype, istat
114 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:) :: temp
115 REAL(dp) :: t1, error
120 IF (ntyptot .le. 0) stop
'ntyptot must be > 0'
122 IF (l_backslv) gc_save = gc
124 mblk = ntyptot*mnsize
125 ALLOCATE (temp(mblk,ns), stat=ntype)
126 IF (ntype .ne. 0) stop
'Allocation error1 in block_precond'
129 CALL swap_forces(gc, temp, mblk, ns)
133 CALL blk3d_slv_swp(temp, ipiv_blk, mblk, ns)
135 CALL blk3d_slv(block_diag, block_mins, block_plus, temp,
136 1 ipiv_blk, mblk, ns)
140 CALL reswap_forces(temp, gc, mblk, ns)
145 WRITE (6, *)
' Writing block Hessian check to unit 34'
147 WRITE (34, *)
' BLK3D FACTORIZATION CHECK: Ax = b ?'
150 WRITE (34, *)
' N = ', n
152 WRITE (34, *)
' M = ', m
153 DO ntype = 1, ntyptot
154 WRITE (34, *)
' TYPE = ', ntype
156 1
' js Ax b Ax - b' //
159 t1 = sum(block_dsave(n,m,ntype,:,:,:,js)*gc(js,:,:,:)
160 1 + block_psave(n,m,ntype,:,:,:,js)*gc(js+1,:,:,:))
162 error = t1 + gc_save(js,n,m,ntype)
163 IF (t1 .eq. zero) t1 = epsilon(t1)
164 WRITE (34, 100) js, t1, -gc_save(js,n,m,ntype),
169 1 block_msave(n,m,ntype,:,:,:,js)*gc(js-1,:,:,:)
170 2 + block_dsave(n,m,ntype,:,:,:,js)*gc(js,:,:,:)
171 3 + block_psave(n,m,ntype,:,:,:,js)*gc(js+1,:,:,:))
172 error = t1 + gc_save(js,n,m,ntype)
173 IF (t1 .eq. zero) t1 = epsilon(t1)
174 WRITE (34, 100) js, t1, -gc_save(js,n,m,ntype),
179 t1 = sum(block_msave(n,m,ntype,:,:,:,js)*gc(js-1,:,:,:)
180 1 + block_dsave(n,m,ntype,:,:,:,js)*gc(js,:,:,:))
181 error = t1 + gc_save(js,n,m,ntype)
182 IF (t1 .eq. zero) t1 = epsilon(t1)
183 WRITE (34, 100) js, t1, -gc_save(js,n,m,ntype),
189 IF (.not.l_backslv)
DEALLOCATE(block_dsave, block_msave,
190 1 block_psave, gc_save, stat=istat)
192 100
FORMAT(i6,1p,4e14.4)
196 DEALLOCATE (temp, stat=istat)
198 END SUBROUTINE block_precond
200 SUBROUTINE block_precond_par(gc)
204 USE parallel_include_module
208 REAL(dp),
DIMENSION(0:ntor,0:mpol1,ns,ntyptot) :: gc
212 INTEGER :: mblk, istat, globrow
213 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:) :: tmp
214 REAL(dp) :: ton, toff
215 REAL(dp),
DIMENSION (:,:),
ALLOCATABLE :: solvec
217 IF (.NOT.lactive)
THEN
223 IF (ntyptot .LE. 0)
THEN
224 stop
'ntyptot must be > 0'
227 mblk = ntyptot*mnsize
231 ALLOCATE (tmp(mblk,ns), stat=istat)
232 CALL tolastns(gc, tmp)
233 tmp(:,tlglob:trglob) = -tmp(:,tlglob:trglob)
235 DO globrow=tlglob, trglob
236 CALL setmatrixrhs(globrow,tmp(:,globrow))
238 DEALLOCATE (tmp, stat=istat)
243 bcyclic_backwardsolve_time=bcyclic_backwardsolve_time+(toff-ton)
245 ALLOCATE (solvec(mblk,ns), stat=istat)
246 IF (istat .NE. 0)
THEN
247 stop
'Allocation error in block_precond before gather'
250 DO globrow=tlglob, trglob
251 CALL getsolutionvector (globrow,solvec(:,globrow))
254 CALL tolastntype(solvec, gc)
256 CALL gather4xarray(gc)
260 END SUBROUTINE block_precond_par
262 SUBROUTINE compute_blocks_par (xc, xcdot, gc)
264 USE parallel_include_module
265 USE vmec_main,
ONLY: iter2
269 REAL(dp),
DIMENSION(0:ntor,0:mpol1,ns,3*ntmax) :: xc, gc, xcdot
273 REAL(dp),
PARAMETER :: p5 = 0.5_dp
274 INTEGER :: m, n, i, ntype, istat, mblk, ibsize, iunit
275 REAL(dp) :: time_on, time_off, bsize, tprec2don, tprec2doff
276 REAL(dp) :: ton, toff
277 CHARACTER(LEN=100):: label
278 LOGICAL,
PARAMETER :: lscreen = .false.
289 CALL second0(tprec2don)
291 IF (l_backslv .and. sp.ne.dp)
THEN
292 stop
'Should set sp = dp!'
296 IF (ntyptot .NE. 3*ntmax)
THEN
297 stop
' NTYPTOT != 3*ntmax'
299 mblk = ntyptot*mnsize
301 bsize = real(mblk*mblk, dp)*3*kind(block_diag)
302 IF (bsize .gt. huge(mblk))
THEN
303 WRITE (6, *)
' bsize: ', bsize,
' exceeds HUGE(int): ',
312 IF (bsize .lt. 1.e6_dp)
THEN
313 ibsize = bsize/1.e1_dp
315 ELSE IF (bsize .lt. 1.e9_dp)
THEN
316 ibsize = bsize/1.e4_dp
319 ibsize = bsize/1.e7_dp
331 WRITE (iunit,
'(/,2x,a,i5,a,/,2x,a,i5,a)')
332 &
'Initializing 2D block preconditioner at ', iter2,
334 &
'Estimated time to compute Hessian = ',
335 & 3*ntyptot*mnsize,
' VMEC time steps'
336 WRITE (iunit,
'(2x,a,i4,a,f12.2,a)')
'Block dim: ', mblk,
337 &
'^2 Preconditioner size: ', real(ibsize)/100,
344 CALL second0(time_on)
346 ALLOCATE (gc_save(0:ntor,0:mpol1,ns,ntyptot), stat=istat)
347 IF (istat .NE. 0)
THEN
348 stop
'Allocation error: gc_save in compute_blocks'
351 IF (
ALLOCATED(block_diag))
THEN
352 DEALLOCATE (block_diag, block_plus, block_mins, stat=istat)
353 IF (istat .ne. 0)
THEN
354 stop
'Deallocation error in compute blocks'
361 CALL sweep3_blocks_par (xc, xcdot, gc)
363 CALL compute_col_scaling_par
368 CALL second0(time_off)
369 IF (grank .EQ. 0)
THEN
370 WRITE (6,1000) time_off - time_on
371 WRITE (nthreed,1000) time_off - time_on
377 CALL second0(time_on)
378 IF (
ALLOCATED(ipiv_blk))
THEN
379 DEALLOCATE(ipiv_blk, stat=ntype)
381 ALLOCATE (ipiv_blk(mblk,ns), stat=ntype)
382 IF (ntype .ne. 0)
THEN
383 stop
'Allocation error2 in block_precond'
391 CALL second0(time_off)
393 bcyclic_forwardsolve_time = bcyclic_forwardsolve_time
397 WRITE(6,1001) time_off - time_on
398 WRITE(nthreed,1001) time_off - time_on
401 IF (.NOT.l_backslv)
THEN
405 CALL second0(tprec2doff)
407 timer(tprec2d) = timer(tprec2d) + (tprec2doff - tprec2don)
408 compute_blocks_time = compute_blocks_time
409 & + (tprec2doff - tprec2don)
411 1000
FORMAT(1x,
' Time to compute blocks: ',f10.2,
' s')
412 1001
FORMAT(1x,
' Time to factor blocks: ',f10.2,
' s')
414 END SUBROUTINE compute_blocks_par
416 SUBROUTINE sweep3_blocks_par(xc, xcdot, gc)
417 USE vmec_main,
ONLY: ncurr, r01, z01, lthreed, chips, delt0r
420 USE parallel_vmec_module,
ONLY: mpi_stat
424 REAL(dp),
DIMENSION(0:ntor,0:mpol1,ns,ntyptot) :: xc, xcdot, gc
428 INTEGER :: js, js1, istat, mesh, lamtype, rztype, icol
429 INTEGER :: nsmin, nsmax
430 INTEGER :: lastrank, left, right
431 REAL(dp) :: eps, hj, hj_scale
432 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: diag_val
433 REAL(dp) :: ton, toff
440 ALLOCATE(diag_val(ns), stat=istat)
444 &
" Using non-symmetric sweep to compute Hessian elements"
447 eps = sqrt(epsilon(eps))
455 ALLOCATE(dataitem(0:ntor,0:mpol1,1:3*ntmax), stat=istat)
456 IF (istat .ne. 0)
THEN
457 stop
'Allocation error in sweep3_blocks'
465 CALL funct3d_par(lscreen, istat)
466 IF (istat .NE. 0)
THEN
467 print *,
' ier_flag = ', istat,
468 1
' in SWEEP3_BLOCKS_PAR call to funct3d_par'
474 xcdot(:,:,nsmin:nsmax,:) = 0
481 mblk_size = (ntor + 1)*(mpol1 + 1)*3*ntmax
482 IF (mblk_size .NE. ntmaxblocksize)
THEN
483 stop
'wrong mblk_size in precon2d!'
485 CALL initialize(.false.,ns,mblk_size)
489 icol = mod(jstart(mesh) - nsmin, 3)
490 IF (icol .LT. 0)
THEN
493 mystart(mesh) = nsmin + icol
494 IF (mod(jstart(mesh) - ns, 3) .EQ. 0)
THEN
495 edge_mesh(mesh) = .true.
501 #if defined(CHI_FORCE)
502 IF (ncurr .EQ. 1)
THEN
503 xc(0,0,nsmin:nsmax,lamtype) = chips(nsmin:nsmax)
507 IF (rank .EQ. 0)
THEN
511 IF (rank .EQ. nranks - 1)
THEN
512 right = mpi_proc_null
519 CALL restart_iter(delt0r)
520 gc_save(:,:,nsmin:nsmax,:) = gc(:,:,nsmin:nsmax,:)
523 CALL mpi_comm_size(ns_comm,lastrank,mpi_err)
524 lastrank = lastrank - 1
531 ntype_2d = zsc + ntmax
536 hj = eps * max(abs(r01(ns)), abs(z01(ns)))
537 IF (nranks .GT. 1)
THEN
538 CALL mpi_bcast(hj,1,mpi_real8,lastrank,ns_comm,mpi_err)
539 CALL mpi_bcast(edge_mesh,3,mpi_logical,lastrank,ns_comm,
542 DO js = mystart(1), myend(1), 3
543 xcdot(n_2d,m_2d,js,ntype_2d) = hj
547 CALL funct3d_par (lscreen, istat)
549 lactive0:
IF (lactive)
THEN
550 IF (nranks .GT. 1)
THEN
551 CALL gather4xarray(gc)
553 IF (istat .NE. 0)
THEN
554 stop
'Error computing Hessian jog!'
557 xcdot(:,:,nsmin:nsmax,:) = 0
558 DO js = mystart(1), myend(1), 3
559 dataitem = (gc(:,:,js,:) - gc_save(:,:,js,:))/hj
560 diag_val(js) = dataitem(0,0,ntype_2d)
563 IF (nranks .GT. 1)
THEN
565 IF (trglob_arr(1) .LT. 4)
THEN
568 CALL mpi_bcast(diag_val(4), 1, mpi_real8, icol, ns_comm,
571 IF (tlglob_arr(nranks) .GT. ns - 3)
THEN
574 CALL mpi_bcast(diag_val(ns - 3), 1, mpi_real8, icol,
577 IF (diag_val(1) .EQ. zero)
THEN
578 diag_val(1) = diag_val(4)
580 IF (diag_val(ns) .EQ. zero)
THEN
581 diag_val(ns) = diag_val(ns - 3)
584 IF (nranks .GT. 1)
THEN
585 CALL mpi_sendrecv(diag_val(trglob), 1, mpi_real8, right, 1,
586 & diag_val(t1lglob), 1, mpi_real8, left, 1,
587 & ns_comm, mpi_stat, mpi_err)
589 DO js = mystart(2), myend(2), 3
590 diag_val(js) = diag_val(js - 1)
593 hj_scale = max(abs(r01(ns)), abs(z01(ns)))
595 IF (nranks .GT. 1)
THEN
596 CALL mpi_sendrecv(diag_val(trglob), 1, mpi_real8, right, 1,
597 & diag_val(t1lglob), 1, mpi_real8, left, 1,
598 & ns_comm, mpi_stat, mpi_err)
599 CALL mpi_bcast(hj_scale, 1, mpi_real8, lastrank, ns_comm,
603 DO js = mystart(3), myend(3), 3
604 diag_val(js) = diag_val(js - 1)
607 IF (any(diag_val(tlglob:trglob) .EQ. zero))
THEN
608 print *,
'For rank: ', rank,
' some diag_val == 0'
619 ntype2d:
DO ntype_2d = 1, ntyptot
621 IF (ntype_2d .LT. lamtype)
THEN
625 m2d:
DO m_2d = 0, mpol1
627 n2d:
DO n_2d = 0, ntor
631 mesh_3pt:
DO mesh = 1,3
635 DO js = mystart(mesh), myend(mesh), 3
636 xcdot(n_2d,m_2d,js,ntype_2d) = hj
638 IF (m_2d.GT.0 .AND. mystart(mesh).EQ.1)
THEN
639 xcdot(n_2d,m_2d,1,ntype_2d) = 0
643 l_edge = edge_mesh(mesh)
644 CALL funct3d_par (lscreen, istat)
645 IF (istat .NE. 0) stop
'Error computing Hessian jog!'
664 lactive1:
IF (lactive)
THEN
665 skip3_mesh:
DO js = mystart(mesh), myend(mesh), 3
668 xcdot(n_2d,m_2d,js,ntype_2d) = 0
672 IF (tlglob.LE.js1 .AND. js1.LE.trglob)
THEN
674 & (gc(:,:,js1,:)-gc_save(:,:,js1,:))/hj
675 CALL setblockrowcol(js1,icol,dataitem,lower)
679 IF (tlglob.LE.js .AND. js.LE.trglob)
THEN
680 dataitem = (gc(:,:,js,:)
681 & - gc_save(:,:,js,:))/hj
683 IF (rank .EQ. lastrank .AND.
687 & 1:rztype) .NE. zero))
THEN
688 stop
'DIAGONAL BLOCK AT EDGE != 0'
692 IF (ntype_2d .GE. lamtype)
THEN
693 dataitem(n_2d,m_2d,ntype_2d) =
694 & 1.0001_dp*dataitem(n_2d,m_2d,ntype_2d)
697 IF (dataitem(n_2d,m_2d,
698 & ntype_2d) .EQ. zero)
THEN
699 dataitem(n_2d,m_2d,ntype_2d) =
703 CALL setblockrowcol(js,icol,dataitem,diag)
708 IF (tlglob .LE. js1 .AND. js1 .LE. trglob)
THEN
709 dataitem = (gc(:,:,js1,:) -
710 & gc_save(:,:,js1,:))/hj
711 CALL setblockrowcol(js1,icol,dataitem,upper)
723 DEALLOCATE(dataitem, diag_val)
725 fill_blocks_time=fill_blocks_time + (toff - ton)
727 END SUBROUTINE sweep3_blocks_par
729 SUBROUTINE compute_blocks(xc, xcdot, gc)
730 USE vmec_main,
ONLY: iter2
734 REAL(dp),
DIMENSION(ns,0:ntor,0:mpol1,3*ntmax) :: xc, gc, xcdot
738 REAL(dp),
PARAMETER :: p5 = 0.5_dp
739 INTEGER :: m, n, i, ntype, istat,
740 1 mblk, ibsize, iunit
741 REAL(dp) :: time_on, time_off, bsize, tprec2don, tprec2doff
742 CHARACTER(LEN=100):: label
743 LOGICAL,
PARAMETER :: lscreen = .false.
753 CALL second0(tprec2don)
754 IF (l_backslv .and. sp.ne.dp)
THEN
755 stop
'Should set sp = dp!'
759 mblk = ntyptot*mnsize
761 bsize = real(mblk*mblk, dp)*3*ns*kind(block_diag)
771 IF (bsize .lt. 1.e6_dp)
THEN
772 ibsize = bsize/1.e1_dp
774 ELSE IF (bsize .lt. 1.e9_dp)
THEN
775 ibsize = bsize/1.e4_dp
778 ibsize = bsize/1.e7_dp
782 WRITE (6, 1000) iter2, 3*ntyptot*mnsize, mblk, real(ibsize)/100,
784 WRITE (nthreed, 1000) iter2, 3*ntyptot*mnsize, mblk,
785 & real(ibsize)/100, trim(label)
789 CALL second0(time_on)
791 ALLOCATE (gc_save(ns,0:ntor,0:mpol1,ntyptot), stat=istat)
792 IF (istat .ne. 0)
THEN
793 stop
'Allocation error: gc_save in compute_blocks'
796 IF (
ALLOCATED(block_diag))
THEN
797 DEALLOCATE (block_diag, block_plus, block_mins, stat=istat)
798 IF (istat .ne. 0)
THEN
799 stop
'Deallocation error in compute blocks'
801 ELSE IF (
ALLOCATED(block_diag_sw))
THEN
802 DEALLOCATE (block_diag_sw, block_plus_sw, block_mins_sw,
804 IF (istat .ne. 0)
THEN
805 stop
'Deallocation error in compute blocks'
809 ALLOCATE (block_diag(0:ntor,0:mpol1,ntyptot,
810 & 0:ntor,0:mpol1,ntyptot,ns),
811 & block_plus(0:ntor,0:mpol1,ntyptot,
812 & 0:ntor,0:mpol1,ntyptot,ns),
813 & block_mins(0:ntor,0:mpol1,ntyptot,
814 & 0:ntor,0:mpol1,ntyptot,ns),
817 lswap2disk = (istat .NE. 0)
823 WRITE (6,
'(a,i4,a)')
' Allocation error(1) = ', istat,
824 &
': Not enough memory in compute_blocks'
825 WRITE (6,*)
' Writing blocks to disk file'
826 ALLOCATE (block_diag_sw(0:ntor,0:mpol1,ntyptot,
827 & 0:ntor,0:mpol1,ntyptot),
828 & block_plus_sw(0:ntor,0:mpol1,ntyptot,
829 & 0:ntor,0:mpol1,ntyptot),
830 & block_mins_sw(0:ntor,0:mpol1,ntyptot,
831 & 0:ntor,0:mpol1,ntyptot),
834 IF (istat .ne. 0)
THEN
835 WRITE (6,
'(a,i4)')
' Allocation error(2) = ', istat
844 scratchfile =
"PRCND2A.bin"
845 IF (flashdrive .ne.
"")
THEN
846 scratchfile = flashdrive // scratchfile
848 CALL opendafile(mblk, mblk**2, 3, scratchfile, iunit_dacess, 0)
849 scratchfile =
"PRCND2B.bin"
850 IF (flashdrive .ne.
"")
THEN
851 scratchfile = flashdrive // scratchfile
865 CALL sweep3_blocks (xc, xcdot, gc)
866 CALL compute_col_scaling
872 CALL second0(time_off)
873 WRITE (6,1001) time_off - time_on
874 WRITE (6,1001) nthreed
879 ALLOCATE (block_dsave(0:ntor,0:mpol1,ntyptot,
880 & 0:ntor,0:mpol1,ntyptot,ns),
881 & block_msave(0:ntor,0:mpol1,ntyptot,
882 & 0:ntor,0:mpol1,ntyptot,ns),
883 & block_psave(0:ntor,0:mpol1,ntyptot,
884 & 0:ntor,0:mpol1,ntyptot,ns),
886 IF (istat .ne. 0)
THEN
887 WRITE (6,*)
'Allocation error in l_backslv block: stat = ',
891 block_dsave = block_diag; block_msave = block_mins
892 block_psave = block_plus
899 CALL second0(time_on)
900 IF (
ALLOCATED(ipiv_blk))
THEN
901 DEALLOCATE(ipiv_blk, stat=ntype)
903 ALLOCATE (ipiv_blk(mblk,ns), stat=ntype)
904 IF (ntype .ne. 0) stop
'Allocation error2 in block_precond'
907 CALL blk3d_factor_swp(ipiv_blk, mblk, ns)
909 CALL blk3d_factor(block_diag, block_mins, block_plus,
910 1 ipiv_blk, mblk, ns)
913 CALL second0(time_off)
914 WRITE (6,1002) time_off - time_on
915 WRITE (nthreed,1002) time_off - time_on
917 IF (.NOT.l_backslv)
DEALLOCATE (gc_save)
919 CALL second0(tprec2doff)
921 timer(tprec2d) = timer(tprec2d) + (tprec2doff - tprec2don)
923 1000
FORMAT(/,2x,
'Initializing 2D block preconditioner at ',i5,
925 & /,2x,
'Estimated time to compute Hessian = ',i5,
926 &
' VMEC time steps',
927 & /,2x,
'Block dim: ',i4,
'^2 Preconditioner size: ',f12.2,a)
928 1001
FORMAT(1x,
' Time to compute blocks: ',f10.2,
' s')
929 1002
FORMAT(1x,
' Time to factor blocks: ',f10.2,
' s')
931 END SUBROUTINE compute_blocks
933 SUBROUTINE sweep3_blocks(xc, xcdot, gc )
934 USE vmec_main,
ONLY: ncurr, r01, z01, lthreed, chips, delt0r
938 REAL(dp),
DIMENSION(ns,0:ntor,0:mpol1,ntyptot) :: xc, xcdot,
943 REAL(dp) :: eps, hj, diag_val(ns)
944 INTEGER :: js, istat, mesh, lamtype, rztype
953 &
" Using non-symmetric sweep to compute Hessian elements"
955 eps = sqrt(epsilon(eps))
963 ALLOCATE(dataitem(0:ntor,0:mpol1,3*ntmax), stat=istat)
964 IF (istat .NE. 0)
THEN
965 stop
'Allocation error in sweep3_blocks'
974 CALL funct3d (lscreen, istat)
975 IF (istat .ne. 0)
THEN
976 print *,
' ier_flag = ', istat,
977 &
' in SWEEP3_BLOCKS call to funct3d'
981 #if defined(CHI_FORCE)
983 IF (ncurr .EQ. 1)
THEN
984 xc(1:ns,0,0,lamtype) = chips(1:ns)
987 CALL restart_iter(delt0r)
998 ntype_2d = zsc + ntmax
1004 DO js = jstart(mesh), ns, 3
1005 IF (js .EQ. ns)
THEN
1006 edge_mesh(mesh) = .true.
1012 hj = eps * max(abs(r01(ns)), abs(z01(ns)))
1013 DO js = jstart(1), ns, 3
1014 xcdot(js,n_2d,m_2d,ntype_2d) = hj
1017 CALL funct3d (lscreen, istat)
1018 IF (istat .NE. 0)
THEN
1019 stop
'Error computing Hessian jog!'
1023 DO js = jstart(1), ns, 3
1024 dataitem = (gc(js,:,:,:) - gc_save(js,:,:,:))/hj
1025 diag_val(js) = dataitem(0,0,ntype_2d)
1027 IF (diag_val(1) .EQ. zero)
THEN
1028 diag_val(1) = diag_val(4)
1030 IF (diag_val(ns).EQ. zero)
THEN
1031 diag_val(ns) = diag_val(ns-3)
1034 DO js = jstart(2), ns, 3
1035 diag_val(js) = diag_val(js-1)
1037 DO js = jstart(3), ns, 3
1038 diag_val(js) = diag_val(js-1)
1041 IF (any(diag_val .EQ. zero))
THEN
1042 stop
'diag_val == 0'
1049 ntype2d:
DO ntype_2d = 1, ntyptot
1050 IF (ntype_2d .LT. lamtype)
THEN
1051 hj = eps*max(abs(r01(ns)), abs(z01(ns)))
1056 m2d:
DO m_2d = 0, mpol1
1057 n2d:
DO n_2d = 0, ntor
1059 IF (ntype_2d.GT.ntmax .AND. ntype_2d.LE.2*ntmax)
THEN
1060 IF (m_2d .NE. 0)
THEN
1061 block_diag(n_2d,m_2d,ntype_2d,
1062 & n_2d,m_2d,ntype_2d,:) = diag_val
1067 mesh_3pt:
DO mesh = 1,3
1069 DO js = jstart(mesh), ns, 3
1070 xcdot(js,n_2d,m_2d,ntype_2d) = hj
1073 l_edge = edge_mesh(mesh)
1074 CALL funct3d(lscreen, istat)
1075 IF (istat .NE. 0)
THEN
1076 stop
'Error computing Hessian jog!'
1103 skip3_mesh:
DO js = jstart(mesh), ns, 3
1106 IF (js .lt. ns)
THEN
1107 dataitem = (gc(js+1,:,:,:) -
1108 & gc_save(js+1,:,:,:))/hj
1111 IF (lswap2disk)
THEN
1112 CALL writedaitem_seq(dataitem)
1113 ELSE IF (js .lt. ns)
THEN
1114 block_mins(:,:,:,n_2d,m_2d,ntype_2d,js+1) =
1119 dataitem = (gc(js,:,:,:) - gc_save(js,:,:,:))/hj
1121 IF (dataitem(n_2d,m_2d,ntype_2d) .EQ. zero)
THEN
1122 dataitem(n_2d,m_2d,ntype_2d) = diag_val(js)
1126 IF (ntype_2d .GE. lamtype)
THEN
1127 dataitem(n_2d,m_2d,ntype_2d) =
1128 & 1.0001_dp*dataitem(n_2d,m_2d,ntype_2d)
1131 IF (lswap2disk)
THEN
1132 CALL writedaitem_seq(dataitem)
1134 block_diag(:,:,:,n_2d,m_2d,ntype_2d,js) =
1140 dataitem = (gc(js-1,:,:,:) -
1141 & gc_save(js-1,:,:,:))/hj
1144 IF (lswap2disk)
THEN
1145 CALL writedaitem_seq(dataitem)
1146 ELSE IF (js .GT. 1)
THEN
1147 block_plus(:,:,:,n_2d,m_2d,ntype_2d,js-1) =
1158 DEALLOCATE(dataitem)
1160 END SUBROUTINE sweep3_blocks
1162 SUBROUTINE blk3d_factor(a, bm1, bp1, ipiv, mblk, nblocks)
1169 REAL(sp),
PARAMETER :: zero = 0, one = 1
1173 INTEGER,
INTENT(in) :: nblocks, mblk
1174 INTEGER,
TARGET,
INTENT(out) :: ipiv(mblk,nblocks)
1175 REAL(sp),
TARGET,
DIMENSION(mblk,mblk,nblocks),
INTENT(inout) ::
1181 INTEGER :: k, k1, ier
1182 INTEGER,
POINTER :: ipivot(:)
1183 REAL(sp),
POINTER :: amat(:,:), bmat(:,:), cmat(:,:)
1184 REAL(sp),
ALLOCATABLE,
DIMENSION(:,:) :: temp
1248 blocks:
DO k = nblocks, 1, -1
1252 amat => a(:,:,k); ipivot => ipiv(:,k)
1253 CALL dgetrf(mblk, mblk, amat, mblk, ipivot, ier)
1254 IF (ier .ne. 0)
GOTO 200
1258 CALL dgetrs(
'n', mblk, mblk, amat, mblk, ipivot, bmat, mblk,
1261 IF (ier .ne. 0)
GOTO 305
1270 CALL dgemm(
'N',
'N',mblk,mblk,mblk,-one,amat,mblk,
1271 1 bmat, mblk, one, cmat, mblk)
1279 ALLOCATE (temp(mblk,mblk), stat=k)
1280 IF (k .ne. 0) stop
'Allocation error in blk3d_factor!'
1283 IF (k .ne. nblocks)
THEN
1284 temp = transpose(bp1(:,:,k))
1288 temp = transpose(bm1(:,:,k))
1317 1000
FORMAT(2x,
'Error factoring matrix in blk3d: block = ',i4)
1318 1001
FORMAT(i4,
'th argument has illegal value')
1319 1002
FORMAT(i4,
'th diagonal factor exactly zero')
1320 1003
FORMAT(2/
' BLK3D: error detected: ier =',i4,2/)
1322 END SUBROUTINE blk3d_factor
1325 SUBROUTINE blk3d_factor_swp(ipiv, mblk, nblocks)
1332 REAL(sp),
PARAMETER :: zero = 0, one = 1
1336 INTEGER,
INTENT(in) :: nblocks, mblk
1337 INTEGER,
TARGET,
INTENT(out) :: ipiv(mblk,nblocks)
1341 REAL(sp),
ALLOCATABLE,
DIMENSION(:,:) ::
1342 & amat, bmat, cmat, temp
1343 INTEGER :: k, k1, ier
1344 INTEGER,
POINTER :: ipivot(:)
1380 CALL changedafileparams(mblk**2, mblk**2, 3, scratchfile, nblocks)
1382 ALLOCATE(amat(mblk,mblk), bmat(mblk,mblk), cmat(mblk,mblk),
1383 & temp(mblk,mblk), stat=ier)
1384 IF (ier .ne. 0) stop
'Allocation error in blk3d_factor_swp!'
1386 CALL readdaitem2(temp, nblocks, bldia)
1388 blocks:
DO k = nblocks, 1, -1
1394 CALL dgetrf(mblk, mblk, amat, mblk, ipivot, ier)
1395 IF (ier .ne. 0)
GOTO 200
1397 CALL writedaitem_ra(amat, k, bldia, 1)
1401 CALL readdaitem2(bmat, k, blmin)
1402 CALL dgetrs(
'n', mblk, mblk, amat, mblk, ipivot, bmat, mblk,
1404 IF (ier .ne. 0)
GOTO 305
1409 temp = transpose(bmat)
1410 CALL writedaitem_ra(temp, k, blmin, 1)
1416 CALL readdaitem2(amat, k1, blpls)
1417 CALL readdaitem2(temp, k1, bldia)
1419 CALL dgemm(
'N',
'N',mblk,mblk,mblk,-one,amat,mblk, bmat, mblk,
1421 cmat = transpose(amat)
1422 CALL writedaitem_ra(cmat, k1, blpls, 1)
1447 DEALLOCATE(amat, bmat, cmat, temp, stat=ier)
1451 1000
FORMAT(2x,
'Error factoring matrix in blk3d: block = ',i4)
1452 1001
FORMAT(i4,
'th argument has illegal value')
1453 1002
FORMAT(i4,
'th diagonal factor exactly zero')
1454 1003
FORMAT(2/
' BLK3D: error detected: ier =',i4,2/)
1456 END SUBROUTINE blk3d_factor_swp
1458 SUBROUTINE blk3d_factor_swp2(ipiv, mblk, nblocks)
1465 REAL(sp),
PARAMETER :: zero = 0, one = 1
1469 INTEGER,
INTENT(in) :: nblocks, mblk
1470 INTEGER,
TARGET,
INTENT(out) :: ipiv(mblk,nblocks)
1474 REAL(sp),
ALLOCATABLE,
DIMENSION(:,:) ::
1475 1 amat, bmat, cmat, temp
1476 INTEGER :: k, k1, ier
1477 INTEGER,
POINTER :: ipivot(:)
1514 CALL changedafileparams(mblk**2, mblk**2, 3, scratchfile, nblocks)
1516 ALLOCATE(amat(mblk,mblk), bmat(mblk,mblk), cmat(mblk,mblk),
1517 & temp(mblk,mblk), stat=ier)
1518 IF (ier .ne. 0) stop
'Allocation error in blk3d_factor_swp!'
1520 CALL readdaitem2(temp, 1, bldia)
1522 blocks:
DO k = 1,nblocks
1528 CALL dgetrf (mblk, mblk, amat, mblk, ipivot, ier)
1529 IF (ier .ne. 0)
GOTO 200
1531 CALL writedaitem_ra(amat, k, bldia, 1)
1533 IF (k .eq. nblocks)
EXIT
1535 CALL readdaitem2(bmat, k, blpls)
1536 CALL dgetrs(
'n', mblk, mblk, amat, mblk, ipivot, bmat, mblk,
1538 IF (ier .ne. 0)
GOTO 305
1543 temp = transpose(bmat)
1544 CALL writedaitem_ra(temp, k, blpls, 1)
1550 CALL readdaitem2(amat, k1, blmin)
1551 CALL readdaitem2(temp, k1, bldia)
1553 CALL dgemm(
'N',
'N',mblk,mblk,mblk,-one,amat,mblk, bmat, mblk,
1555 cmat = transpose(amat)
1556 CALL writedaitem_ra(cmat, k1, blmin, 1)
1581 DEALLOCATE(amat, bmat, cmat, temp, stat=ier)
1585 1000
FORMAT(2x,
'Error factoring matrix in blk3d: block = ',i4)
1586 1001
FORMAT(i4,
'th argument has illegal value')
1587 1002
FORMAT(i4,
'th diagonal factor exactly zero')
1588 1003
FORMAT(2/
' BLK3D: error detected: ier =',i4,2/)
1590 END SUBROUTINE blk3d_factor_swp2
1593 SUBROUTINE blk3d_slv(ablk, qblk, bp1, source,
1594 1 ipiv, mblk, nblocks)
1603 INTEGER,
INTENT(in) :: nblocks, mblk
1604 INTEGER,
TARGET,
INTENT(in) :: ipiv(mblk,nblocks)
1605 REAL(sp),
TARGET,
DIMENSION(mblk,mblk,nblocks),
INTENT(in) ::
1607 REAL(dp),
DIMENSION(mblk,nblocks),
INTENT(inout)
1612 INTEGER,
POINTER :: ipivot(:)
1614 REAL(sp),
POINTER :: amat(:,:)
1615 REAL(sp) :: source_sp(mblk)
1685 blocks:
DO k = nblocks, 1, -1
1687 source_sp = source(:,k)
1688 ipivot => ipiv(:,k); amat => ablk(:,:,k)
1689 CALL dgetrs(
'n', mblk, 1, amat, mblk,
1690 1 ipivot, source_sp, mblk, ier)
1691 source(:,k) = source_sp
1693 IF (ier .ne. 0)
GOTO 305
1700 amat => bp1(:,:,k-1)
1701 source(:,k-1) = source(:,k-1) - matmul(source(:,k),amat)
1714 source(:,k) = source(:,k) - matmul(source(:,k-1),amat)
1727 WRITE (6,
'(2/a,i4,2/)')
' BLK3D: error detected: ier =',
1733 END SUBROUTINE blk3d_slv
1736 SUBROUTINE blk3d_slv_swp(source, ipiv, mblk, nblocks)
1745 INTEGER,
INTENT(in) :: nblocks, mblk
1746 INTEGER,
TARGET,
INTENT(in) :: ipiv(mblk,nblocks)
1747 REAL(dp),
DIMENSION(mblk,nblocks),
INTENT(inout)
1752 REAL(sp),
ALLOCATABLE,
DIMENSION(:,:) :: amat
1753 INTEGER,
POINTER :: ipivot(:)
1754 INTEGER :: k, k1, ier
1755 REAL(sp) :: source_sp(mblk)
1788 CALL opendafile(mblk**2, mblk**2, 3, scratchfile, iunit_dacess, 1)
1790 ALLOCATE (amat(mblk,mblk), stat=ier)
1791 IF (ier .ne. 0) stop
'Allocation error in blk3d_slv_swp!'
1795 blocks:
DO k = nblocks, 1, -1
1797 source_sp = source(:,k)
1799 CALL readdaitem2(amat, k, bldia)
1800 CALL dgetrs(
'n', mblk, 1, amat, mblk,
1801 1 ipivot, source_sp, mblk, ier)
1802 source(:,k) = source_sp
1804 IF (ier .ne. 0)
GOTO 305
1812 CALL readdaitem2(amat, k1, blpls)
1813 source(:,k1) = source(:,k1) - matmul(source(:,k),amat)
1826 CALL readdaitem2(amat, k, blmin)
1827 source(:,k) = source(:,k) - matmul(source(:,k-1),amat)
1835 WRITE(100,
'(1p,6e14.4)') source(:,ns/2)
1842 WRITE (6,
'(2/a,i4,2/)')
' BLK3D: error detected: ier =',
1850 END SUBROUTINE blk3d_slv_swp
1853 SUBROUTINE blk3d_slv_swp2(source, ipiv, mblk, nblocks)
1862 INTEGER,
INTENT(in) :: nblocks, mblk
1863 INTEGER,
TARGET,
INTENT(in) :: ipiv(mblk,nblocks)
1864 REAL(dp),
DIMENSION(mblk,nblocks),
INTENT(inout)
1869 REAL(sp),
ALLOCATABLE,
DIMENSION(:,:) :: amat
1870 INTEGER,
POINTER :: ipivot(:)
1871 INTEGER :: k, k1, ier
1872 REAL(sp) :: source_sp(mblk)
1909 CALL opendafile(mblk**2, mblk**2, 3, scratchfile, iunit_dacess, 1)
1911 ALLOCATE (amat(mblk,mblk), stat=ier)
1912 IF (ier .ne. 0) stop
'Allocation error in blk3d_slv_swp!'
1916 blocks:
DO k = 1, nblocks
1918 source_sp = source(:,k)
1920 CALL readdaitem2(amat, k, bldia)
1921 CALL dgetrs(
'n', mblk, 1, amat, mblk,
1922 1 ipivot, source_sp, mblk, ier)
1923 source(:,k) = source_sp
1925 IF (ier .ne. 0)
GOTO 305
1926 IF (k .eq. nblocks)
EXIT
1933 CALL readdaitem2(amat, k1, blmin)
1934 source(:,k1) = source(:,k1) - matmul(source(:,k),amat)
1941 DO k = nblocks-1, 1, -1
1943 CALL readdaitem2(amat, k, blpls)
1945 source(:,k) = source(:,k) - matmul(source(:,k1),amat)
1949 WRITE(100,
'(1p,6e14.4)') source(:,ns/2)
1956 WRITE (6,
'(2/a,i4,2/)')
' BLK3D: error detected: ier =',
1964 END SUBROUTINE blk3d_slv_swp2
1967 SUBROUTINE compute_col_scaling_par
1968 USE xstuff,
ONLY: pcol_scale
1974 INTEGER :: nsmin, nsmax
1975 REAL(dp),
ALLOCATABLE :: tmp(:)
1976 REAL(dp),
ALLOCATABLE :: colsum(:,:)
1977 REAL(dp),
PARAMETER :: levmarq_param = 1.e-6_dp
1985 nsmin = tlglob; nsmax = trglob
1987 ALLOCATE (colsum(mblk_size,nsmin:nsmax))
1988 CALL getcolsum(colsum)
1989 CALL vectorcopypar (colsum, pcol_scale)
1990 CALL parallelscaling(levmarq_param,colsum)
1995 ALLOCATE (tmp(ntmaxblocksize*ns))
1996 CALL tolastntype(pcol_scale,tmp)
2000 END SUBROUTINE compute_col_scaling_par
2002 SUBROUTINE compute_col_scaling
2003 USE xstuff,
ONLY: col_scale
2011 END SUBROUTINE compute_col_scaling
2013 SUBROUTINE vectorcopypar (colsum, colscale)
2018 REAL(dp),
INTENT(IN) :: colsum(mblk_size,tlglob:trglob)
2019 REAL(dp),
INTENT(OUT) :: colscale(mblk_size,ns)
2025 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
2028 DO js = tlglob, trglob
2029 colscale(:,js) = colsum(:,js)
2035 IF (rank.LT.nranks-1)
THEN
2036 CALL mpi_send(colsum(:,trglob),m1,mpi_real8,
2037 1 rank+1,1,ns_comm,mpi_err)
2040 CALL mpi_recv(colscale(:,tlglob-1),m1,
2041 1 mpi_real8,rank-1,1,ns_comm,mpi_stat,mpi_err)
2046 CALL mpi_send(colsum(:,tlglob),m1,mpi_real8,
2047 1 rank-1,1,ns_comm,mpi_err)
2049 IF (rank.LT.nranks-1)
THEN
2050 CALL mpi_recv(colscale(:,trglob+1),m1,mpi_real8,
2051 1 rank+1,1,ns_comm,mpi_stat,mpi_err)
2054 END SUBROUTINE vectorcopypar
2057 SUBROUTINE free_mem_precon
2061 IF (
ALLOCATED(block_diag))
2062 1
DEALLOCATE (block_diag, block_plus, block_mins, stat=istat)
2063 IF (istat .ne. 0) stop
'Deallocation error-1 in free_mem_precon'
2066 IF (
ALLOCATED(block_diag_sw))
2067 1
DEALLOCATE (block_diag_sw, block_plus_sw, block_mins_sw,
2069 IF (istat .ne. 0) stop
'Deallocation error-2 in free_mem_precon'
2072 IF (
ALLOCATED(ipiv_blk))
DEALLOCATE (ipiv_blk, stat=istat)
2073 IF (istat .ne. 0) stop
'Deallocation error-3 in free_mem_precon'
2075 END SUBROUTINE free_mem_precon