5 USE stel_constants,
ONLY: pi
14 USE nscalingtools,
ONLY: parsolver, mpi_err, parfunctisl, &
15 rcounts, disp, startglobrow, endglobrow, nranks, rank, &
16 sksdbg, tofu, upper, lower, diag, savediag, nrecd, &
17 symmetrycheck, totrecvs, packsize, writetime, send, receive, &
18 setblocktridatastruct, getfullsolution
20 forwardsolve, setmatrixrhs, backwardsolve, getsolutionvector, &
21 checksymmetry, dmin_tri, maxeigen_tri, findminmax_tri, &
32 MODULE PROCEDURE gather_array1, gather_array2, gather_array3
38 INTEGER,
PARAMETER :: jstart(3) = (/1,2,3/)
39 INTEGER,
ALLOCATABLE :: ipiv_blk(:,:)
41 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:,:) :: ublk, dblk, lblk
42 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:,:) :: ublk_s, dblk_s, lblk_s
43 LOGICAL,
PUBLIC :: l_backslv = .false.
45 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: DataItem
47 INTEGER :: myStartBlock, myEndBlock, mynumBlockRows
48 INTEGER :: mystart(3), myend(3)
50 REAL(dp) :: eps, ton, toff, rcond, anorm
52 REAL(dp),
DIMENSION(:,:),
ALLOCATABLE :: colsum
53 REAL(dp),
PARAMETER,
PUBLIC :: eps_factor = 1._dp
54 REAL(dp),
ALLOCATABLE,
PUBLIC :: mupar_norm(:)
55 REAL(dp),
PUBLIC :: levmarq_param, levmarq_param0, asym_index, &
56 mupar, mupar0, maxeigen, mindiag
57 LOGICAL,
PUBLIC :: l_Compute_Hessian, l_Diagonal_Only
58 LOGICAL,
PUBLIC,
PARAMETER :: l_Hess_sym=.false.
59 INTEGER,
PUBLIC :: HESSPASS=0
60 PUBLIC :: apply_precond, dealloc_hessian, inithess,
61 compute_hessian_blocks, apply_colscale
70 SUBROUTINE gather_array1(buffer)
75 REAL(dp),
DIMENSION(:),
INTENT(inout) :: buffer
79 CALL mpi_allgatherv(mpi_in_place, rcounts(iam + 1), mpi_real8,
80 buffer, rcounts, disp, mpi_real8, siesta_comm
91 SUBROUTINE gather_array2(buffer)
96 REAL(dp),
DIMENSION(:,:),
INTENT(inout) :: buffer
100 CALL mpi_allgatherv(mpi_in_place, rcounts(iam + 1), mpi_real8,
101 buffer, rcounts, disp, mpi_real8, siesta_comm
112 SUBROUTINE gather_array3(buffer)
113 USE nscalingtools,
ONLY: mnspcounts, mnspdisps
118 REAL (dp),
DIMENSION(:,:,:),
INTENT(inout) :: buffer
122 CALL mpi_allgatherv(mpi_in_place, mnspcounts(iam + 1), mpi_real8
123 buffer, mnspcounts, mnspdisps, mpi_real8,
124 siesta_comm, mpi_err)
135 SUBROUTINE gathercols(colgath, coltmp)
140 REAL(dp),
DIMENSION(mblk_size),
INTENT(out) :: colgath
141 REAL(dp),
DIMENSION(mblk_size2),
INTENT(in) :: coltmp
151 REAL(dp),
DIMENSION(:),
ALLOCATABLE :: unordered
157 CALL mpi_allgatherv(coltmp, mblk_size2, mpi_real8,
158 unordered, scalcounts, scaldisps,
159 mpi_real8, siesta_comm, mpi_err)
160 CALL assert(mpi_err.EQ.0,
'MPI ERR IN GatherCols')
166 icol_mpi = np - nprocs
167 mbloc = scalcounts(np)
169 icol_mpi = icol_mpi + nprocs
170 imax = max(imax, icol_mpi)
172 colgath(icol_mpi) = unordered(iorder)
179 DEALLOCATE(unordered)
184 INTEGER :: icol, mesh, nsmin, nsmax
186 IF (.NOT.parsolver)
THEN
187 startglobrow = 1; endglobrow = ns
189 mystartblock=startglobrow
190 myendblock=endglobrow
191 mynumblockrows=myendblock-mystartblock+1
193 nsmin = max(1, startglobrow-1); nsmax = min(ns, endglobrow+1)
196 icol = mod(jstart(mesh)-nsmin, 3)
197 IF (icol .LT. 0) icol = icol+3
198 mystart(mesh) = nsmin+icol
201 END SUBROUTINE inithess
211 SUBROUTINE apply_colscale(gc, colscale, nsmin, nsmax)
216 REAL (dp),
DIMENSION(mblk_size,ns),
INTENT(inout) :: gc
217 REAL (dp),
DIMENSION(mblk_size,ns),
INTENT(in) :: colscale
218 INTEGER,
INTENT(in) :: nsmin
219 INTEGER,
INTENT(in) :: nsmax
222 gc(:,nsmin:nsmax) = gc(:,nsmin:nsmax)*colscale(:,nsmin:nsmax)
226 SUBROUTINE compute_hessian_blocks (func, ldiagonal)
231 LOGICAL,
INTENT(IN) :: ldiagonal
237 CHARACTER(LEN=3) :: label
241 l_compute_hessian = .true.
242 l_diagonal_only = ldiagonal
244 IF (lcolscale) col_scale = 1
247 DO iunit = 6, unit_out, unit_out-6
248 IF (.NOT.lverbose .AND. iunit.EQ.6) cycle
249 IF (l_diagonal_only)
THEN
250 WRITE (iunit, 90) levmarq_param, mupar
252 WRITE (iunit, 100) levmarq_param, mupar, asym_index
258 90
FORMAT (/,
' Computing diagonal preconditioner - ',
259 ' LM parameter:',1pe9.2,
' mu||:',1pe9.2)
260 100
FORMAT (/,
' Computing block preconditioner - ',
261 ' LM parameter:',1pe9.2,
' mu||:',1pe9.2,
262 ' Asym index:',1pe9.2)
264 lprint = (iam.EQ.0 .AND. hesspass.EQ.0 .AND. lverbose)
267 IF (parfunctisl)
THEN
268 IF(sksdbg)
WRITE(tofu,*)
'Executing Compute_Hessian_Blocks_With_No_Col_Redist'IFCALL flush
269 IF (lprint) print *,
'HESSIAN AND INVERSE CALCULATION' //
270 ' USING BCYCLIC WITH NO COLUMN REDISTRIBUTION'
271 CALL compute_hessian_blocks_with_no_col_redist (
xc,
gc, func)
273 IF (sksdbg)
WRITE(tofu,*)
'Executing Compute_Hessian_Blocks_With_Col_Redist'IFCALL flush
274 IF (lprint) print *,
'HESSIAN AND INVERSE CALCULATION' //
275 ' USING BCYCLIC WITH COLUMN REDISTRIBUTION'
276 CALL compute_hessian_blocks_with_col_redist (
xc,
gc, func)
279 IF (sksdbg)
WRITE(tofu,*)
'Executing Compute_Hessian_Blocks_Thomas'CALL flush
280 IF (lprint) print *,
'HESSIAN AND INVERSE CALCULATION' //
281 ' USING SERIAL THOMAS ALGORITHM'
282 CALL compute_hessian_blocks_thomas (
xc,
gc, func)
285 IF (hesspass.EQ.0)
THEN
286 bsize = 3*ns*kind(dblk)
287 bsize = bsize*real(mblk_size*mblk_size,dp)
288 IF (bsize .LT. 1.e6_dp)
THEN
291 ELSE IF (bsize .lt. 1.e9_dp)
THEN
292 bsize = bsize/1.e6_dp
295 bsize = bsize/1.e9_dp
300 DO iunit = 6, unit_out, unit_out-6
301 IF (.NOT.lverbose .AND. iunit.EQ.6) cycle
302 WRITE (iunit,
'(1x,a,i4,a,f6.2,a)')
'Block dim: ',
303 mblk_size,
'^2 Preconditioner size: ', bsize,
310 l_compute_hessian = .false.
312 hesspass = hesspass+1
314 END SUBROUTINE compute_hessian_blocks
316 SUBROUTINE compute_hessian_blocks_with_no_col_redist (xc, gc, &
321 REAL(dp),
DIMENSION(mblk_size,ns) :: xc, gc
325 REAL(dp),
PARAMETER :: zero=0, one=1
326 INTEGER :: n, m, js, mesh, ntype, istat, iunit, icol
330 REAL(dp) :: starttime, endtime, usedtime
331 REAL(dp) :: colstarttime, colendtime, colusedtime
332 INTEGER :: nsmin, nsmax
333 REAL(dp) :: skston, skstoff, temp
336 starttime=0; endtime=0; usedtime=0;
337 colstarttime=0; colendtime=0; colusedtime=0
344 eps = sqrt(epsilon(eps))*abs(eps_factor)
347 nsmin = max(1, startglobrow-1); nsmax = min(ns, endglobrow+1)
349 ALLOCATE (dataitem(
mblk_size), stat=istat)
350 CALL assert(istat.EQ.0,
'DataItem allocation failed:')
353 xc(:,nsmin:nsmax) = 0
358 CALL second0(colstarttime)
361 var_type_lg:
DO ntype = 1,
ndims
362 var_n_lg:
DO n = -ntor, ntor
363 var_m_lg:
DO m = 0, mpol
367 mesh_3pt_lg:
DO mesh = 1, 3
369 IF (.NOT.(m.EQ.0 .AND. n.LT.0))
THEN
370 DO js = mystart(mesh), myend(mesh), 3
379 CALL second0(skstoff)
380 hessian_funct_island_time=hessian_funct_island_time+(skstoff
384 skip3_mesh_lg:
DO js = mystart(mesh), myend(mesh), 3
390 IF (startglobrow.LE.js1 .AND. js1.LE.endglobrow)
THEN
391 dataitem = gc(:,js1)/eps
392 IF (l_diagonal_only)
THEN
397 CALL setblocktridatastruct(upper,js1,icol,dataitem)
401 IF (startglobrow.LE.js .AND. js.LE.endglobrow)
THEN
402 dataitem = gc(:,js)/eps
403 IF (all(dataitem .EQ. zero)) dataitem(icol) = one
405 IF (l_diagonal_only)
THEN
411 CALL setblocktridatastruct(savediag,js,icol,dataitem)
412 CALL setblocktridatastruct(diag,js,icol,dataitem)
417 IF (startglobrow.LE.js1 .AND. js1.LE.endglobrow)
THEN
418 dataitem = gc(:,js1)/eps
419 IF (l_diagonal_only)
THEN
424 CALL setblocktridatastruct(lower,js1,icol,dataitem)
432 CALL second0(colendtime)
433 construct_hessian_time=construct_hessian_time+(colendtime-colstarttime
436 DEALLOCATE (dataitem, stat=istat)
437 CALL assert(istat.EQ.0,
'DataItem deallocation error:')
443 CALL applyparallelscaling(levmarq_param,
col_scale)
445 CALL findminmax_tri (levmarq_param)
449 maxeigen = maxeigen_tri
453 IF (nprocs .EQ. 1)
THEN
455 CALL checkconditionnumber(ns,
mblk_size,anorm,rcond,info)
457 IF (info .EQ. 0)
THEN
458 print
'(1x,3(a,1p,e12.3))',
'RCOND = ', rcond,
459 ' ||A|| = ', anorm,
' TIME: ', toff-ton
466 IF (symmetrycheck)
THEN
468 CALL checksymmetry(asym_index)
469 CALL second0(skstoff)
470 asymmetry_check_time=asymmetry_check_time+(skstoff-skston)
474 IF (l_diagonal_only)
THEN
475 time_diag_prec = time_diag_prec+(toff-ton)
477 time_block_prec = time_block_prec+(toff-ton)
487 IF (
ALLOCATED(ipiv_blk))
DEALLOCATE(ipiv_blk,stat=istat)
491 block_factorization_time=block_factorization_time+(skstoff-skston)
492 time_factor_blocks = time_factor_blocks + (toff-ton)
495 print
'(a,1p,e12.3)',
' BLOCK FACTORIZATION TIME: ', toff-ton
497 END SUBROUTINE compute_hessian_blocks_with_no_col_redist
500 SUBROUTINE compute_hessian_blocks_with_col_redist (xc, gc, func)
504 REAL(dp),
DIMENSION(mblk_size,ns) :: xc, gc
508 REAL(dp),
PARAMETER :: zero=0, one=1
509 INTEGER :: n, m, js, js1, mesh, ntype, istat, iunit, icol, icolmpi
511 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: SavedDiag
512 INTEGER,
ALLOCATABLE,
DIMENSION (:) :: sendCount, recvCount
513 REAL(dp),
ALLOCATABLE,
DIMENSION (:) :: recvBuf
515 REAL(dp) :: starttime, endtime, usedtime
516 REAL(dp) :: colstarttime, colendtime, colusedtime
519 REAL(dp) :: skston, skstoff, temp
534 eps = sqrt(epsilon(eps))*abs(eps_factor)
536 ALLOCATE (dataitem(
mblk_size), stat=istat)
537 CALL assert(istat.EQ.0,
'DataItem allocation failed!')
539 ALLOCATE (saveddiag(
mblk_size), stat=istat)
540 CALL assert(istat.EQ.0,
'SavedDiag allocation failed!')
547 IF(.NOT.
ALLOCATED(sendcount))
ALLOCATE(sendcount(nprocs))
548 IF(.NOT.
ALLOCATED(recvcount))
ALLOCATE(recvcount(nprocs))
549 IF(.NOT.
ALLOCATED(recvbuf))
ALLOCATE(recvbuf(packsize))
556 CALL second0(colstarttime)
557 var_type:
DO ntype = 1,
ndims
558 var_n:
DO n = -ntor, ntor
559 var_m:
DO m = 0, mpol
562 IF(mod(icol-1,nprocs)==iam)
THEN
565 mesh_3pt:
DO mesh = 1, 3
567 IF (.NOT.(m.EQ.0 .AND. n.LT.0))
THEN
568 DO js = jstart(mesh), ns, 3
576 CALL second0(skstoff)
577 hessian_funct_island_time=hessian_funct_island_time+(skstoff
580 skip3_mesh:
DO js = jstart(mesh), ns, 3
587 dataitem = gc(:,js1)/eps
589 IF (m.eq.0 .and. n.lt.0) dataitem = 0
590 IF (l_diagonal_only)
THEN
596 CALL receive(probeflag)
597 CALL send(dataitem,js1,istat,icol,procid)
598 IF(procid-1.NE.iam) sendcount(procid) = sendcount
599 CALL receive(probeflag)
603 dataitem = gc(:,js)/eps
604 IF (m.eq.0 .and. n.lt.0) dataitem = 0
605 IF (all(dataitem .EQ. zero)) dataitem(icol) = one
608 IF (l_diagonal_only)
THEN
613 saveddiag=dataitem; istat=4
614 CALL receive(probeflag)
615 CALL send(saveddiag,js,istat,icol,procid)
616 IF(procid-1.NE.iam) sendcount(procid) = sendcount(procid
617 CALL receive(probeflag)
622 CALL receive(probeflag)
623 CALL send(dataitem,js1,istat,icol,procid)
624 IF(procid-1.NE.iam) sendcount(procid) = sendcount(procid
625 CALL receive(probeflag)
630 dataitem = gc(:,js1)/eps
632 IF (m.eq.0 .and. n.lt.0) dataitem=0
633 IF (l_diagonal_only)
THEN
639 CALL receive(probeflag)
640 CALL send(dataitem,js1,istat,icol,procid)
641 IF(procid-1.NE.iam) sendcount(procid) = sendcount
642 CALL receive(probeflag)
652 CALL second0(colendtime)
653 colusedtime=colendtime-colstarttime
655 construct_hessian_time=construct_hessian_time+(colendtime-colstarttime
660 probeflag=.NOT.probeflag
662 CALL mpi_alltoall(sendcount,1,mpi_integer,recvcount,1,
663 mpi_integer,siesta_comm,mpi_err)
666 totrecvs=totrecvs+recvcount(js)
669 DO WHILE (nrecd.LT.totrecvs)
670 CALL receive(probeflag)
674 DEALLOCATE (dataitem, saveddiag, stat=istat)
675 CALL assert(istat.EQ.0,
'DataItem deallocation error!')
677 CALL second0(skstoff)
678 construct_hessian_time=construct_hessian_time+(skstoff-skston)
679 CALL mpi_barrier(siesta_comm, mpi_err)
684 IF (symmetrycheck)
THEN
687 CALL checksymmetry(asym_index)
688 CALL second0(skstoff)
689 asymmetry_check_time=asymmetry_check_time+(skstoff-skston)
697 IF (l_diagonal_only)
THEN
698 time_diag_prec = time_diag_prec+(toff-ton)
700 time_block_prec = time_block_prec+(toff-ton)
711 block_factorization_time=block_factorization_time+(skstoff-skston)
712 time_factor_blocks = time_factor_blocks + (toff-ton)
713 usedtime=endtime-starttime
716 print
'(a,1p,e12.3)',
' BLOCK FACTORIZATION TIME: ', toff-ton
718 END SUBROUTINE compute_hessian_blocks_with_col_redist
721 SUBROUTINE compute_hessian_blocks_thomas (xc, gc, func)
725 REAL(dp),
DIMENSION(mblk_size,ns),
INTENT(INOUT) :: xc, gc
729 REAL(dp),
PARAMETER :: zero=0, one=1
730 INTEGER :: n, m, js, js1, mesh, ntype, istat, iunit, icol, icolmpi
731 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:) :: gc1
732 REAL(dp) :: skston, skstoff, temp
735 INTEGER :: neqtotal, numroc
736 REAL(dp) :: starttime, endtime, usedtime
737 REAL(dp) :: colstarttime, colendtime, colusedtime
741 starttime=0; endtime=0; usedtime=0
742 colstarttime=0; colendtime=0; colusedtime=0
750 eps = sqrt(epsilon(eps))*abs(eps_factor)
752 IF (
ALLOCATED(ublk))
DEALLOCATE(ublk, dblk, lblk, stat=istat)
755 CALL blacs_gridinfo(icontxt_1xp,nprow,npcol,myrow,mycol)
761 locq = numroc(
mblk_size, nb, mycol, csrc, npcol )
762 locp = numroc(
mblk_size, mb, myrow, rsrc, nprow )
763 mblk_size2=max(1,locq)
766 icontxt_1xp,lld,info)
768 write(*,*)
'myrow,mycol,nprow,npcol,desc(LLD_),lld ',
769 myrow,mycol,nprow,npcol,desca_1xp(lld_),lld
770 write(*,*)
'Locp,m,mb ', locp,
mblk_size,mb
773 call assert(info.eq.0,
'descinit descA_1xp')
774 ineed = max(1,lld*locq)
778 locqr = numroc( nrhs1, nb, mycol,csrc,npcol)
779 locpr = numroc( mm, mb, myrow,rsrc,nprow)
781 call descinit(descr_all,mm,nrhs1, mb,nb,rsrc,csrc,icontxt,lld,info
782 call assert(info.eq.0,
'test_pdtrd:descinit return info != 0')
784 call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol)
791 locq = numroc(
mblk_size, nb, mycol, csrc, npcol )
792 locp = numroc(
mblk_size, mb, myrow, rsrc, nprow )
796 call assert(info.eq.0,
'descinit descA')
797 ineed = max(1,lld*locq)
798 IF (
ALLOCATED(ublkp))
DEALLOCATE(ublkp, dblkp, lblkp, stat=istat
799 IF (.NOT.
ALLOCATED(ublkp))
THEN
800 ALLOCATE(ublkp(ineed,ns-1),
804 CALL assert(istat.EQ.0,
'Not enough memory to store a single block!'
811 CALL assert(istat.EQ.0,
'Not enough memory to store a single block!'
813 CALL assert(.false.,
'LSCALAPACK=T BUT NOT MPI!')
825 IF (
ALLOCATED(ublk))
THEN
826 ublk = 0; dblk = 0; lblk = 0
829 ALLOCATE (dataitem(
mblk_size), stat=istat)
830 CALL assert(istat.EQ.0,
'DataItem allocation failed!')
841 CALL second0(skstoff)
842 hessian_funct_island_time=hessian_funct_island_time+(skstoff-skston
845 CALL second0(colstarttime)
847 var_type:
DO ntype = 1,
ndims
848 var_n:
DO n = -ntor, ntor
849 var_m:
DO m = 0, mpol
853 IF(mod(icol-1,nprocs)==iam .OR. .NOT.lscalapack)
THEN
862 mesh_3pt:
DO mesh = 1, 3
864 IF (.NOT.(m.EQ.0 .AND. n.LT.0))
THEN
865 DO js = jstart(mesh), ns, 3
873 CALL second0(skstoff)
874 hessian_funct_island_time=hessian_funct_island_time+(skstoff
891 skip3_mesh:
DO js = jstart(mesh), ns, 3
900 dataitem = gc(:,js1)/eps
902 IF (l_diagonal_only)
THEN
907 ublk(:,icolmpi,js1) = dataitem
911 dataitem = gc(:,js)/eps
912 IF (all(dataitem .EQ. zero)) dataitem(icol) = one
914 IF (l_diagonal_only)
THEN
920 dblk(:,icolmpi,js) = dataitem
925 dataitem = gc(:,js1)/eps
926 IF (l_diagonal_only)
THEN
931 lblk(:,icolmpi,js1) = dataitem
945 IF (l_diagonal_only)
THEN
946 time_diag_prec = time_diag_prec+(toff-ton)
948 time_block_prec = time_block_prec+(toff-ton)
955 CALL findminmax_tri_serial
960 colusedtime=colendtime-colstarttime
961 time_generate_blocks=time_generate_blocks+colusedtime
962 construct_hessian_time=construct_hessian_time+(colendtime-colstarttime
964 DEALLOCATE (dataitem, stat=istat)
965 CALL assert(istat.EQ.0,
'DataItem deallocation error!')
967 IF (nprocs .EQ. 1)
THEN
972 IF (.NOT.l_hess_sym .OR. nprocs.GT.1)
GOTO 1000
975 DO icol = 1, mblk_size2
976 gc1(:,1:nsh) = (ublk(icol,:,1:nsh) + lblk(:,icol,2:ns))/2
977 ublk(icol,:,1:nsh) = gc1(:,1:nsh)
978 lblk(:,icol,2:ns) = gc1(:,1:nsh)
979 dblk_s(:,icol,:) = (dblk(icol,:,:) + dblk(:,icol,:))/2
984 DEALLOCATE (gc1, dblk_s)
991 CALL checkeigenvalues_serial(ns,
mblk_size)
992 CALL checkconditionnumber_serial(ns,
mblk_size,anorm,rcond,info)
994 IF (info .EQ. 0)
THEN
995 print
'(1x,3(a,1p,e12.3))',
'RCOND = ', rcond,
996 ' ||A|| = ', anorm,
' TIME: ', toff-ton
1003 CALL second0(skston)
1004 IF (lscalapack)
THEN
1005 CALL check3d_symmetry(dblk,lblk,ublk,dblkp,lblkp,ublkp,
1008 CALL check3d_symmetry_serial(dblk,lblk,ublk,mpol,ntor,
1011 CALL second0(skstoff)
1012 asymmetry_check_time=asymmetry_check_time+(skstoff-skston)
1022 IF (.NOT.
ALLOCATED(dblk_s))
1027 CALL assert(istat.EQ.0,
'Allocation error2 in Compute_Hessian_Blocks'
1028 ublk_s = ublk; dblk_s = dblk; lblk_s = lblk
1033 IF (
ALLOCATED(ipiv_blk))
DEALLOCATE(ipiv_blk,stat=istat)
1034 CALL second0(skston)
1035 IF (lscalapack)
THEN
1036 #if defined(MPI_OPT)
1037 ineed = numroc(desca(m_),desca(mb_),0,0,nprow) + desca(mb_)
1038 ALLOCATE( ipiv_blk(ineed, ns), stat=istat )
1039 CALL assert(istat.EQ.0,
'ipiv_blk allocation error2 in Compute_Hessian_Blocks'
1040 CALL blk3d_parallel(dblk,lblk,ublk,dblkp,lblkp,ublkp,
mblk_size,mblk_size2
1041 CALL blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol)
1043 locqr = numroc( nrhs1, nb, mycol,csrc,npcol)
1044 locpr = numroc( mm, mb, myrow,rsrc,nprow)
1046 CALL descinit(descx,mm,nrhs1, mb,nb,rsrc,csrc,icontxt,lld,info)
1047 CALL assert(info.eq.0,
'test_pdtrd:descinit return info!=0')
1050 ineedr = max(1,lld*locqr)
1052 CALL blacs_barrier(icontxt,
'All')
1053 CALL profstart(
'factor call:123')
1054 CALL pdtrdf(
mblk_size,ns,dblkp,lblkp,ublkp,ipiv_blk,desca)
1055 CALL blacs_barrier(icontxt,
'All')
1056 CALL profend(
'factor call:123')
1058 CALL assert(.false.,
'LSCALAPACK=T BUT NOT IN MPI!')
1061 ALLOCATE (ipiv_blk(
mblk_size,ns), stat=istat)
1062 CALL assert(istat.EQ.0,
'ipiv_blk allocation error2 in Compute_Hessian_Blocks'
1063 CALL blk3d_factor(dblk, lblk, ublk, ipiv_blk,
mblk_size, ns)
1066 CALL second0(skstoff)
1067 block_factorization_time=block_factorization_time+(skstoff-skston)
1069 time_factor_blocks = time_factor_blocks + (toff-ton)
1072 print
'(a,1p,e12.3)',
' BLOCK FACTORIZATION TIME: ', toff-ton
1074 END SUBROUTINE compute_hessian_blocks_thomas
1077 SUBROUTINE serialscaling
1082 REAL(dp),
PARAMETER :: zero=0, one=1
1083 INTEGER :: js, icol, icount
1084 REAL(dp) :: eps, minScale, ton, toff, colcnd
1085 REAL(dp),
ALLOCATABLE :: col2(:), coltmp(:), col0(:,:)
1087 CALL assert(all(col_scale.EQ.1),
'COL_SCALE != 1 INITIALLY IN SERIAL SCALING'
1098 ALLOCATE(colsum(mblk_size, ns), col2(mblk_size),
1099 coltmp(mblk_size2), col0(mblk_size2, ns))
1103 block_row1:
DO js = 1, ns
1104 cols1:
DO icol = 1, mblk_size2
1105 col2 = abs(dblk(:,icol,js))
1107 coltmp(icol) = sum(col2)
1109 coltmp(icol) = maxval(col2)
1112 col2 = abs(ublk(:,icol,js-1))
1114 coltmp(icol) = coltmp(icol) + sum(col2)
1116 coltmp(icol) = max(coltmp(icol),maxval(col2))
1119 IF (js .LT. ns)
THEN
1120 col2 = abs(lblk(:,icol,js+1))
1122 coltmp(icol) = coltmp(icol) + sum(col2)
1124 coltmp(icol) = max(coltmp(icol),maxval(col2))
1128 eps = minscale*maxval(coltmp)
1129 CALL assert(eps.GT.zero,
' coltmp == 0 in SerialScaling')
1130 coltmp = max(coltmp, eps)
1131 coltmp = one/sqrt(coltmp)
1136 IF (lscalapack)
THEN
1137 CALL gathercols(colsum(:,js), coltmp)
1139 colsum(:,js) = coltmp
1144 CALL vector_copy_ser (colsum, col_scale)
1147 block_row2:
DO js = 1, ns
1148 cols2:
DO icol = 1, mblk_size2
1149 dblk(:,icol,js) = colsum(:,js)*dblk(:,icol,js)*col0(icol,js)
1151 lblk(:,icol,js) = colsum(:,js)*lblk(:,icol,js)*col0(icol,js
1153 IF (js .LT. ns)
THEN
1154 ublk(:,icol,js) = colsum(:,js)*ublk(:,icol,js)*col0(icol,js
1160 colcnd = maxval(colsum)
1161 IF (colcnd .NE. zero)
THEN
1162 colcnd = abs(minval(colsum)/colcnd)
1171 DEALLOCATE(colsum, col0, col2, coltmp)
1173 CALL findminmax_tri_serial
1175 IF (iam.EQ.0 .AND. lverbose) print
'(1x,3(a,1p,e10.3))',
1176 'COLUMN-SCALING TIME: ', toff-ton,
' COLCND: ', colcnd,
1177 ' DMINL: ', dmin_tri*abs(levmarq_param)
1180 END SUBROUTINE serialscaling
1183 SUBROUTINE findminmax_tri_serial
1187 REAL(dp),
PARAMETER :: minThreshold=1.e-6, zero=0
1188 INTEGER :: js, icol, jrow
1189 REAL(dp) :: eps, DminL, temp(2), sign_diag=-1
1193 dmin_tri = huge(dmin_tri)
1195 block_row3:
DO js = 1, ns
1198 cols1:
DO icol = 1, mblk_size2
1199 eps = dblk(jrow,icol,js)
1200 IF (abs(eps) .GT. mindiag)
THEN
1202 sign_diag = eps/mindiag
1204 maxeigen = max(maxeigen, abs(lblk(jrow,icol,js))
1205 + abs(dblk(jrow,icol,js)) + abs(ublk(jrow,icol,js))
1207 jrow = jrow + nprocs
1210 #if defined(MPI_OPT)
1211 IF (lscalapack)
THEN
1212 temp(1) = mindiag; temp(2) = maxeigen
1213 CALL mpi_allreduce(mpi_in_place,temp,1,mpi_real8, mpi_max,
1214 siesta_comm, mpi_err)
1215 mindiag = temp(1); maxeigen = temp(2)
1218 mindiag = minthreshold*mindiag
1219 IF (mindiag .EQ. zero) mindiag = minthreshold
1222 cols2:
DO icol = 1, mblk_size2
1223 eps = dblk(jrow,icol,js)
1224 IF (abs(eps) .LE. mindiag)
THEN
1225 dblk(jrow,icol,js) = sign_diag*mindiag
1226 ELSE IF (js .LT. ns)
THEN
1227 dmin_tri = min(max(1.e-12_dp,abs(eps)), dmin_tri)
1229 jrow = jrow + nprocs
1233 #if defined(MPI_OPT)
1234 IF (lscalapack)
THEN
1235 dmin_tri = abs(dmin_tri)
1236 CALL mpi_allreduce(mpi_in_place,dmin_tri,1,mpi_real8, mpi_min,
1237 siesta_comm, mpi_err)
1240 mindiag = sign_diag*dmin_tri
1241 dminl = abs(levmarq_param)*mindiag
1246 cols3:
DO icol = 1, mblk_size2
1247 eps = dblk(jrow,icol,js)
1248 CALL assert(sign_diag*eps.GT.zero,
1249 ' EPS*SIGN_DIAG < 0 IN FindMinMax_TRI_Serial')
1251 dblk(jrow,icol,js) = eps + sign(dminl,eps)
1253 dblk(jrow,icol,js) = eps*(1+abs(levmarq_param))
1255 jrow = jrow + nprocs
1261 END SUBROUTINE findminmax_tri_serial
1264 SUBROUTINE vector_copy_ser(colsum, colscale)
1268 REAL(dp),
INTENT(IN) :: COLSUM(mblk_size,ns)
1269 REAL(dp),
INTENT(OUT) :: COLSCALE(mblk_size,ns)
1272 colscale = colsum * colscale
1274 END SUBROUTINE vector_copy_ser
1277 SUBROUTINE checkeigenvalues_serial(nblock, bsize)
1281 INTEGER,
INTENT(IN) :: nblock, bsize
1283 INTEGER,
SAVE :: nCount=0
1284 CHARACTER*1,
PARAMETER :: JOBZ=
'N', uplo=
'U'
1285 INTEGER :: KD, N, LWORK, LDZ, OFFK, OFFS, OFFSU, &
1286 rowmin, rowmax, i, j, icol, js, ldab
1287 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: W, ACOL, WORK
1288 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:) :: AB, Z, BAND
1289 REAL(dp) :: ton, toff
1293 IF (ncount .NE. 17)
RETURN
1297 n = nblock*bsize; ldab = kd+1; ldz = 1
1298 lwork = max(1,3*n-2)
1303 ALLOCATE (w(n), acol(n), work(lwork), ab(ldab,n), stat=i)
1304 CALL assert(i.EQ.0,
'Allocation error in CheckEigenvalues_Serial')
1305 IF (jobz .NE.
'N')
ALLOCATE(z(ldz,n))
1312 rowmin = offs-bsize+1; rowmax = offsu+bsize
1314 acol(rowmin:offs) = ublk(:,icol,js-1)
1316 acol(offsu+1:rowmax) = lblk(:,icol,js+1)
1317 acol(offs+1:offsu) = dblk(:,icol,js)
1319 DO i = max(1,j-kd,rowmin), min(j,n,rowmax)
1320 ab(offk+i-j,j) = acol(i)
1326 offsu = offsu + bsize
1331 CALL dsbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work, info )
1333 DEALLOCATE (ab, acol)
1336 WRITE (4000+ncount,
'(i5,1p,2e12.4)') j, w(j), w(j)/w(1)
1338 CALL flush(4000+ncount)
1341 IF (jobz .NE.
'N')
DEALLOCATE(z)
1344 print 100,
' Eigenvalue TIME: ', (toff-ton),
' INFO: ', info,
1345 ' Eigenvalues written to FORT.',4000+ncount
1346 100
FORMAT(a,1p,e10.2,2(a,i4))
1348 END SUBROUTINE checkeigenvalues_serial
1351 SUBROUTINE checkconditionnumber_serial(nblock,bsize,anorm,rcond,info)
1355 INTEGER,
INTENT(IN) :: nblock, bsize
1356 INTEGER,
INTENT(OUT) :: info
1357 REAL(dp),
INTENT(OUT) :: anorm, rcond
1359 REAL(dp),
PARAMETER :: zero=0, one=1
1360 CHARACTER(LEN=1),
PARAMETER :: CHNORM=
'1'
1361 INTEGER :: N1, JS, ICOL, ISTAT, OFFS, OFFSU, I, J, KU, KL, &
1362 ldab, offk, rowmin, rowmax
1363 INTEGER,
ALLOCATABLE :: IWORK(:), IPIV(:)
1364 REAL(dp),
ALLOCATABLE :: WORK(:), ACOL(:), AB(:,:)
1365 REAL(dp) :: ROWCND, COLCND, AMAX
1369 ALLOCATE (acol(n1), stat=istat)
1370 CALL assert(istat.EQ.0,
'COND # ALLOCATION1 FAILED IN HESSIAN')
1377 ALLOCATE(ab(ldab, n1), ipiv(n1), iwork(n1), work(3*n1), &
1379 CALL assert(istat.EQ.0,
'COND # ALLOCATION2 FAILED IN HESSIAN')
1390 rowmin = offs-bsize+1; rowmax = offsu+bsize
1392 acol(rowmin:offs) = ublk(:,icol,js-1)
1394 acol(offsu+1:rowmax) = lblk(:,icol,js+1)
1395 acol(offs+1:offsu) = dblk(:,icol,js)
1399 ELSE IF (js.EQ.nblock)
THEN
1402 anorm = max(anorm, sum(abs(acol(rowmin:rowmax))))
1404 DO i = max(1,j-ku,rowmin), min(n1,j+kl,rowmax)
1405 ab(offk+i-j,j) = acol(i)
1411 offsu = offsu + bsize
1417 CALL dgbtrf( n1, n1, kl, ku, ab, ldab, ipiv, info )
1419 CALL dgbcon(
'1', n1, kl, ku, ab, ldab, ipiv, anorm, rcond,
1422 DEALLOCATE(ab, work, iwork, ipiv)
1424 IF (info.EQ.0 .AND. rcond.NE.zero)
THEN
1430 END SUBROUTINE checkconditionnumber_serial
1435 SUBROUTINE blk3d_factor(a, bm1, bp1, ipiv, mblk, nblocks)
1436 USE stel_constants,
ONLY: one, zero
1440 INTEGER,
INTENT(in) :: nblocks, mblk
1441 INTEGER,
TARGET,
INTENT(OUT) :: ipiv(mblk,nblocks)
1442 REAL(dp),
TARGET,
DIMENSION(mblk,mblk,nblocks), &
1443 INTENT(INOUT) :: a, bm1, bp1
1448 INTEGER :: k, k1, ier
1449 INTEGER,
POINTER :: ipivot(:)
1450 REAL(dp),
POINTER :: amat(:,:), bmat(:,:), cmat(:,:)
1451 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:) :: temp
1453 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: w, work
1454 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:) :: vt, u
1455 REAL(dp),
PARAMETER :: small = 1.e-300_dp
1458 CHARACTER(LEN=1),
PARAMETER :: jobu=
'A', jobvt=
'A'
1532 ALLOCATE(vt(mblk,mblk), w(mblk),
1533 u(mblk,mblk), work(lwork), stat=ier)
1534 CALL assert(ier.EQ.0,
'Allocation error in blk3d_factor!')
1537 ALLOCATE (temp(mblk,mblk), stat=ier)
1538 CALL assert(ier.EQ.0,
'Allocation error in blk3d_factor!')
1540 blocks:
DO k = nblocks, 1, -1
1548 CALL dgesvd (jobu, jobvt, mblk, mblk, amat, mblk, w, u, mblk,
1549 vt, mblk, work, lwork, ier)
1552 WRITE (35,
'(a,i4,a,1pe12.4)')
'Block: ',k,
' Condition #: ', sqrt
1553 print
'(a,i4,a,1pe12.4)',
' BLOCK: ',k,
' SVD COND #: ', sqrt(w(1
1555 IF (w(nw) .LE. small*w(1)) w(nw) = 0
1557 CALL assert(ier.EQ.0,
'SVD ERROR')
1560 CALL svdinv2 (amat, u, vt, w, mblk)
1563 CALL dgetrf (mblk, mblk, amat, mblk, ipivot, ier)
1564 CALL assert(ier.EQ.0,
'DGETRF ERROR')
1575 bmat = matmul(amat, temp)
1577 CALL dgetrs(
'n', mblk, mblk, amat, mblk, ipivot,
1579 CALL assert(ier.EQ.0,
'dgetrs INFO != 0')
1591 CALL dgemm(
'N',
'N',mblk,mblk,mblk,-one,amat,mblk,
1592 bmat, mblk, one, cmat, mblk)
1597 DEALLOCATE(vt, w, u, work)
1605 IF (k .NE. nblocks)
THEN
1606 temp = transpose(bp1(:,:,k))
1610 temp = transpose(bm1(:,:,k))
1617 END SUBROUTINE blk3d_factor
1620 SUBROUTINE block_precond(gc)
1625 REAL(dp),
DIMENSION(mblk_size,ns),
INTENT(INOUT) :: gc
1629 REAL(dp),
PARAMETER :: zero = 0
1630 INTEGER :: m, n, js, ntype, istat, irow
1631 REAL(dp) :: t1, error, error_sum, b_sum
1632 REAL(dp),
ALLOCATABLE :: gc_s(:,:)
1633 #if defined(MPI_OPT)
1634 REAL(dp) :: alpha0,beta0
1635 INTEGER :: i,j,k,nn,kk,ia,ja,ix,jx,ic,jc,ib,jb
1637 REAL(dp) :: starttime, endtime, usedtime
1638 INTEGER :: PRECONDPASS=0
1650 DO globrow = mystartblock, myendblock
1651 CALL setmatrixrhs(globrow, gc(1:
mblk_size,globrow))
1656 DO globrow = mystartblock, myendblock
1657 CALL getsolutionvector(globrow, gc(:,globrow))
1663 CALL second0(starttime)
1667 IF (.NOT.
ALLOCATED(gc_s))
ALLOCATE (gc_s(
mblk_size,ns), stat
1668 CALL assert(istat.EQ.0,
'Allocation error0 in block_precond')
1674 IF (lscalapack)
THEN
1675 #if defined(MPI_OPT)
1676 ALLOCATE (tempp(ineedr), stat=istat)
1685 call profstart(
'pdgeadd call:123')
1686 call pdgeadd(
'N', mm0, nrhs0, alpha0, gc, ia, ja, descr_all,
1687 beta0, tempp, ib, jb, descr)
1688 call profend(
'pdgeadd call:123')
1691 call blacs_barrier(icontxt,
'All')
1692 call profstart(
'solver call:123')
1693 call pdtrds(
mblk_size,ns,dblkp,lblkp,ublkp,ipiv_blk,desca,
1694 nrhs1,tempp,ir,jr,descr)
1695 call blacs_barrier(icontxt,
'All')
1696 call profend(
'solver call:123')
1708 call profstart(
'pdgeadd2 call:123')
1709 call pdgeadd(
'N', mm0,nrhs0, alpha0, tempp,ia,ja,descr,
1710 beta0,gc,ib,jb,descr_all)
1711 call dgsum2d( icontxt,
'A',
' ', mm,1,gc,mm,-1,-1)
1712 call profend(
'pdgeadd2 call:123')
1714 DEALLOCATE (tempp, stat=istat)
1716 CALL assert(.false.,
'MPI_OPT must be true for LSCALAPACK!')
1720 CALL second0(endtime)
1721 usedtime=endtime-starttime
1722 IF (sksdbg)
WRITE(tofu,*)
1723 'HessianTag-2 : Time to BackwardSolve:',usedtime,
1724 'PRECONDPASS',precondpass,
'in native run'
1725 IF (sksdbg)
CALL flush(tofu)
1728 CALL blk3d_slv(dblk, lblk, ublk, gc, ipiv_blk,
mblk_size, ns
1731 precondpass = precondpass + 1
1733 IF (l_backslv .AND.
ALLOCATED(dblk_s) .AND. .NOT.
l_linearize)
THEN
1734 error_sum = 0; b_sum = sqrt(sum(gc_s*gc_s)/
neqs)
1735 CALL assert(
ALLOCATED(gc_s),
'gc_s is not allocated')
1737 WRITE (34,
'(2(/,a))')
' BLK3D FACTORIZATION CHECK: Ax = b',
1738 ' IROW M N NTYPE FORCE DELTA-FORCE'
1740 DO js = startglobrow, endglobrow
1742 WRITE (34,*)
' JS: ', js
1747 t1 = sum(dblk_s(irow,:,js)*gc(:,js))
1748 IF (js .LT. ns)
THEN
1749 t1 = t1 + sum(ublk_s(irow,:,js)*gc(:,js+1))
1752 t1 = t1 + sum(lblk_s(irow,:,js)*gc(:,js-1))
1755 error = t1 - gc_s(irow,js)
1756 error_sum = error_sum + error**2
1757 WRITE(34,110) irow, m, n, ntype, gc_s(irow,js)/b_sum
1764 DEALLOCATE(dblk_s, lblk_s, ublk_s, gc_s, stat=istat)
1766 print
'(3(a,1pe12.4))',
' |b|: ', b_sum,
1767 ' |x| = ', sqrt(sum(gc*gc)/
neqs),
1768 ' Hessian Error: ', sqrt(error_sum/
neqs)/b_sum
1772 110
FORMAT(4i6,1p,2e16.4)
1774 100
FORMAT(i6,1p,5e14.4)
1775 101
FORMAT(i6,1p,5e14.4,
' *')
1777 END SUBROUTINE block_precond
1780 SUBROUTINE apply_precond(gc)
1784 REAL(dp),
DIMENSION(mblk_size,ns),
INTENT(INOUT) :: gc
1788 REAL(dp) :: ton, toff
1794 CALL block_precond(gc)
1797 time_apply_precon = time_apply_precon+(toff-ton)
1799 END SUBROUTINE apply_precond
1802 SUBROUTINE blk3d_slv(ablk, qblk, bp1, source, ipiv, mblk, nblocks)
1804 USE stel_constants,
ONLY: one
1808 INTEGER,
INTENT(in) :: nblocks, mblk
1809 INTEGER,
TARGET,
INTENT(in) :: ipiv(mblk,nblocks)
1810 REAL(dp),
TARGET,
DIMENSION(mblk,mblk,nblocks),
INTENT(IN) :: &
1812 REAL(dp),
DIMENSION(mblk,nblocks),
TARGET,
INTENT(INOUT) :: source
1816 INTEGER,
POINTER :: ipivot(:)
1819 REAL(dp),
POINTER :: amat(:,:), x1(:), source_ptr(:)
1888 blocks:
DO k = nblocks, 1, -1
1890 source_ptr => source(:,k)
1893 source_ptr = matmul(amat, source_ptr)
1895 ipivot => ipiv(:,k);
1896 CALL dgetrs(
'n', mblk, 1, amat, mblk, ipivot, source_ptr, mblk,
1897 CALL assert(ier.EQ.0,
'DGETRS INFO != 0')
1904 amat => bp1(:,:,k-1)
1905 x1 => source(:,k); source_ptr => source(:,k-1)
1906 CALL dgemv(
'T',mblk,mblk,-one,amat,mblk,x1,1,one,source_ptr,1)
1919 x1 => source(:,k-1); source_ptr => source(:,k)
1922 CALL dgemv(
'T',mblk,mblk,-one,amat,mblk,x1,1,one,source_ptr,1)
1926 END SUBROUTINE blk3d_slv
1929 SUBROUTINE dealloc_hessian
1932 IF (
ALLOCATED(ublk))
DEALLOCATE(ublk, dblk, lblk, stat=istat)
1933 IF (
ALLOCATED(ublkp))
DEALLOCATE(dblkp,lblkp,ublkp,stat=istat)
1934 IF (
ALLOCATED(mupar_norm))
DEALLOCATE(mupar_norm, stat=istat)
1935 IF (
ALLOCATED(ipiv_blk))
DEALLOCATE (ipiv_blk)
1937 END SUBROUTINE dealloc_hessian
1939 #if defined(MPI_OPT)
1940 SUBROUTINE blk3d_parallel(a,b,c,ap,bp,cp,mblk,mblkc,nblocks)
1944 INTEGER :: ipart,jpart,k
1945 INTEGER :: ia,ja,inc1,ib,jb,inc2,nsm
1949 INTEGER,
INTENT(IN) :: nblocks, mblk, mblkc
1950 REAL(dp) :: aij2,aij
1951 REAL(dp),
TARGET,
DIMENSION(mblk,mblkc,nblocks), &
1952 INTENT(INOUT) :: a, b, c
1953 REAL(dp),
TARGET,
DIMENSION(Locq,Locp,nblocks), &
1955 REAL(dp),
TARGET,
DIMENSION(Locq,Locp,nblocks-1), &
1956 INTENT(INOUT) :: bp, cp
1957 REAL(dp),
POINTER :: amat(:,:), bmat(:,:), cmat(:,:)
1958 REAL(dp),
POINTER :: amatp(:,:), bmatp(:,:), cmatp(:,:)
1961 call profstart(
'change context:123')
1962 call blacs_barrier(desca(ctxt_),
'All')
1965 call pdgemr2d(mblk,mblk,amat,1,1,desca_1xp,
1966 amatp,1,1,desca, icontxt_global)
1970 call pdgemr2d(mblk,mblk,bmat,1,1,desca_1xp,
1971 bmatp,1,1,desca, icontxt_global)
1974 call pdgemr2d(mblk,mblk,cmat,1,1,desca_1xp,
1975 cmatp,1,1,desca, icontxt_global)
1977 call blacs_barrier(desca(ctxt_),
'All')
1978 call profend(
'change context:123')
1985 END SUBROUTINE blk3d_parallel