8 USE v3_utilities,
ONLY:
assert
12 USE descriptor_mod,
ONLY: siesta_comm
19 INTEGER,
PARAMETER :: rprec = selected_real_kind(12,100)
20 INTEGER,
PARAMETER :: iprec = selected_int_kind(8)
21 INTEGER,
PARAMETER :: cprec = kind((1.0_rprec,1.0_rprec))
22 INTEGER,
PARAMETER :: dp = rprec
31 REAL(dp),
ALLOCATABLE :: L(:,:), D(:,:), U(:,:), b(:,:)
32 INTEGER,
ALLOCATABLE :: pivot(:)
40 REAL(dp),
ALLOCATABLE :: x(:)
75 INTEGER :: startglobrow
84 CHARACTER*100 :: kenvvar
85 CHARACTER*100 :: kenvval
86 LOGICAL :: lbcylic_init=.false.
87 LOGICAL :: lplb_init=.false.
91 LOGICAL :: writeproblemfile
92 LOGICAL :: writesolution
93 LOGICAL :: usebarriers
100 LOGICAL(dp) :: l_colscale=.false.
103 LOGICAL :: use_mpiwtime=.false.
104 DOUBLE PRECISION :: loctimer1, loctimer2
105 DOUBLE PRECISION :: mattimer1, mattimer2
106 DOUBLE PRECISION :: globtimer1, globtimer2
107 DOUBLE PRECISION :: timerfreq
108 DOUBLE PRECISION :: tottime
110 DOUBLE PRECISION :: totcommtime
111 INTEGER :: totcommcount
112 DOUBLE PRECISION :: totinvtime
113 INTEGER :: totinvcount
114 DOUBLE PRECISION :: totmatmultime
115 INTEGER :: totmatmulcount
116 DOUBLE PRECISION :: totmatsoltime
117 INTEGER :: totmatsolcount
124 REAL(dp),
ALLOCATABLE :: origdiagelement(:)
125 REAL(dp),
ALLOCATABLE :: topscalefac(:), botscalefac(:)
134 LOGICAL :: doblasonly
135 LOGICAL :: doblacscomm
143 INTEGER :: myrow, mycol
144 INTEGER :: nrows, ncols
145 INTEGER :: blockszrows, blockszcols
146 INTEGER,
ALLOCATABLE :: map(:,:)
157 INTEGER :: maincontext
158 INTEGER :: levelcontext
159 TYPE(BlacsProcessGrid) :: pgrid
171 INTEGER :: masterrank
173 INTEGER,
ALLOCATABLE :: slaveranks(:)
183 TYPE(MasterToSlaveMapping) :: msmap
185 INTEGER :: mpicomm, mpitag, mpierr
187 INTEGER :: mpistatus(MPI_STATUS_SIZE)
192 TYPE(PBLASLevelParameters) :: pblas
199 INTEGER,
PARAMETER :: op_none = 0
200 INTEGER,
PARAMETER :: op_done = 1
201 INTEGER,
PARAMETER :: op_dgemm = 2
202 INTEGER,
PARAMETER :: op_dgemv = 3
203 INTEGER,
PARAMETER :: op_dgetrf = 4
204 INTEGER,
PARAMETER :: op_dgetrs = 5
212 DOUBLE PRECISION :: tm
214 DOUBLE PRECISION :: t1, t2
217 TYPE(TimeCount) :: wait, comm, comp
218 TYPE(TimeCount) :: mm, trf, pmm, ptrf
219 TYPE(TimeCount) :: mma, mmb, mmc, mmalpha, mmbeta, mmrc
220 TYPE(TimeCount) :: extract, waitall
223 TYPE(PBLASStats) :: pstats
227 DOUBLE PRECISION,
ALLOCATABLE :: temparray(:)
232 PUBLIC :: setmatrixrowcoll, setmatrixrowcold, setmatrixrowcolu, &
233 getmatrixrowcoll, getmatrixrowcold, getmatrixrowcolu, &
234 getmatrixrhs, storediagonal, initialize, forwardsolve, &
235 setmatrixrhs, backwardsolve, getsolutionvector, padsides, &
236 checksymmetry, applyparallelscaling, findminmax_tri, getranks,&
237 refactorhessian, parmatvec, finalize, checkconditionnumber
238 REAL(dp),
PUBLIC :: maxeigen_tri
239 REAL(dp),
PUBLIC :: dmin_tri
248 SUBROUTINE bclockinit()
254 IF ( use_mpiwtime )
THEN
257 CALL system_clock(count_rate=tempint)
260 END SUBROUTINE bclockinit
267 SUBROUTINE bsystemclock( ts )
271 DOUBLE PRECISION,
INTENT(INOUT) :: ts
277 DOUBLE PRECISION :: tempdbl
279 IF ( use_mpiwtime )
THEN
281 tempdbl = mpi_wtime()
285 CALL system_clock( tempint )
288 END SUBROUTINE bsystemclock
299 INTEGER,
INTENT(IN) :: u
309 SUBROUTINE chargememory( bytes )
313 REAL,
INTENT(IN) :: bytes
316 membytes = membytes + bytes
317 END SUBROUTINE chargememory
324 SUBROUTINE chargetime( tot, t2, t1, cnt )
328 DOUBLE PRECISION,
INTENT(INOUT) :: tot
329 DOUBLE PRECISION,
INTENT(IN) :: t2
330 DOUBLE PRECISION,
INTENT(IN) :: t1
331 INTEGER,
INTENT(INOUT) :: cnt
334 tot = tot + (real(t2-t1))/timerfreq
336 END SUBROUTINE chargetime
344 SUBROUTINE timecountinit( tc )
353 END SUBROUTINE timecountinit
360 SUBROUTINE timecountprint( tc, msg )
365 CHARACTER(*),
INTENT(IN) :: msg
369 DOUBLE PRECISION :: avg
373 IF (tc%cnt .GT. 0) avg = tc%tm / tc%cnt
374 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)') msg,tc%cnt,
' * ',avg,
' sec = ',tc%tm,
' sec'
375 END SUBROUTINE timecountprint
382 SUBROUTINE plbinitstats()
383 CALL timecountinit( pstats%wait )
384 CALL timecountinit( pstats%comm )
385 CALL timecountinit( pstats%comp )
386 CALL timecountinit( pstats%mm )
387 CALL timecountinit( pstats%trf )
388 CALL timecountinit( pstats%pmm )
389 CALL timecountinit( pstats%ptrf )
391 CALL timecountinit( pstats%mma )
392 CALL timecountinit( pstats%mmb )
393 CALL timecountinit( pstats%mmc )
394 CALL timecountinit( pstats%mmalpha )
395 CALL timecountinit( pstats%mmbeta )
396 CALL timecountinit( pstats%mmrc )
397 CALL timecountinit( pstats%extract )
398 CALL timecountinit( pstats%waitall )
399 END SUBROUTINE plbinitstats
406 SUBROUTINE plbprintstats()
407 CALL timecountprint( pstats%wait,
'PBLAS Wait ' )
408 CALL timecountprint( pstats%comm,
'PBLAS Comm ' )
409 CALL timecountprint( pstats%comp,
'PBLAS Comp ' )
410 CALL timecountprint( pstats%mm,
'PBLAS MM ' )
411 CALL timecountprint( pstats%trf,
'PBLAS TRF ' )
412 CALL timecountprint( pstats%pmm,
'PBLAS PMM ' )
413 CALL timecountprint( pstats%ptrf,
'PBLAS PTRF ' )
415 CALL timecountprint( pstats%mma,
'PBLAS MMA ' )
416 CALL timecountprint( pstats%mmb,
'PBLAS MMB ' )
417 CALL timecountprint( pstats%mmc,
'PBLAS MMC ' )
418 CALL timecountprint( pstats%mmalpha,
'PBLAS MMalpha ' )
419 CALL timecountprint( pstats%mmbeta,
'PBLAS MMbeta ' )
420 CALL timecountprint( pstats%mmrc,
'PBLAS MMRC ' )
421 CALL timecountprint( pstats%extract,
'PBLAS Extract ' )
422 CALL timecountprint( pstats%waitall,
'PBLAS Waitall ' )
423 END SUBROUTINE plbprintstats
430 SUBROUTINE plbinitialize
434 CHARACTER*100 :: envvar
435 CHARACTER*100 :: envval
438 IF(kpdbg)
WRITE(ofu,*)
'PLBInitialize Started';
CALL fl(ofu)
442 IF ( m .GE. 2048 )
THEN
444 envvar =
'BLOCKTRI_BLASONLY'
445 CALL getenv( envvar, envval )
446 IF ( envval .EQ.
'TRUE' )
THEN
448 IF(kpdbg)
WRITE(ofu,*)
'BLAS ONLY -- obeying env var ', envvar;
CALL fl(ofu)
451 IF(kpdbg)
WRITE(ofu,*)
'doblasonly = ', doblasonly;
CALL fl(ofu)
454 doblacscomm = .false.
455 envvar =
'BLOCKTRI_BLACSCOMM'
456 CALL getenv( envvar, envval )
457 IF ( envval .EQ.
'TRUE' )
THEN
459 IF(kpdbg)
WRITE(ofu,*)
'BLACS COMM -- obeying env var ', envvar;
CALL fl(ofu)
461 IF(kpdbg)
WRITE(ofu,*)
'doblacscomm = ', doblacscomm;
CALL fl(ofu)
465 envvar =
'BLOCKTRI_NBPP'
466 CALL getenv( envvar, envval )
467 IF ( envval .NE.
'' )
THEN
468 READ( envval, *) blacs%nbpp
469 IF(kpdbg)
WRITE(ofu,*)
'NBPP -- obeying env var ', envvar;
CALL fl(ofu)
471 IF(kpdbg)
WRITE(ofu,*)
'NBPP = ', blacs%nbpp;
CALL fl(ofu)
478 IF(kpdbg)
WRITE(ofu,*)
'BLAS only (not using PBLAS)';
CALL fl(ofu)
481 CALL blacs_pinfo( blacs%iam, blacs%nprocs )
482 IF(kpdbg)
WRITE(ofu,*)
'BLACS_PINFO ', blacs%iam,
' ', blacs%nprocs;
CALL fl(ofu)
483 CALL blacs_get( 0, 0, blacs%maincontext )
484 IF(kpdbg)
WRITE(ofu,*)
'BLACS_GET ', blacs%maincontext;
CALL fl(ofu)
485 CALL blacs_gridinit( blacs%maincontext,
'R', 1, blacs%nprocs )
486 IF(kpdbg)
WRITE(ofu,*)
'BLACS_GRIDINIT';
CALL fl(ofu)
487 CALL blacs_barrier( blacs%maincontext,
'All' )
491 IF(kpdbg)
WRITE(ofu,*)
'PLBInitialize Done';
CALL fl(ofu)
492 END SUBROUTINE plbinitialize
499 SUBROUTINE plbfinalize
501 IF(.NOT.lplb_init)
RETURN
503 IF(kpdbg)
WRITE(ofu,*)
'PLBFinalize Started';
CALL fl(ofu)
505 IF(kpdbg)
WRITE(ofu,*)
'BLAS only (not using PBLAS)';
CALL fl(ofu)
508 CALL blacs_barrier( blacs%maincontext,
'All' )
509 IF(kpdbg)
WRITE(ofu,*)
'BLACS_BARRIER';
CALL fl(ofu)
511 IF(kpdbg)
WRITE(ofu,*)
'BLACS_EXIT nprocs= ', blacs%nprocs;
CALL fl(ofu)
515 IF(kpdbg)
WRITE(ofu,*)
'PLBFinalize Done';
CALL fl(ofu)
516 END SUBROUTINE plbfinalize
523 SUBROUTINE plbforwardinitialize
526 END SUBROUTINE plbforwardinitialize
533 SUBROUTINE plbforwardfinalize
536 END SUBROUTINE plbforwardfinalize
543 SUBROUTINE plbbackwardinitialize
546 END SUBROUTINE plbbackwardinitialize
553 SUBROUTINE plbbackwardfinalize
556 END SUBROUTINE plbbackwardfinalize
563 SUBROUTINE plbforwardinitializelevel( lvl, ammaster )
573 INTEGER :: maxslaves, actualslaves
577 IF ( doblasonly )
THEN
578 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel BLAS only';
CALL fl(ofu)
582 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Started', ammaster;
CALL fl(ofu)
586 pblas%ammaster = ammaster
587 pblas%msmap%masterrank = -1
588 pblas%msmap%nslaves = 0
589 CALL determinemasterslaveranks()
593 blacs%levelcontext = -1
594 CALL blacs_get( blacs%maincontext, 10, blacs%levelcontext )
595 IF(kpdbg)
WRITE(ofu,*)
'Created new context';
CALL fl(ofu)
596 IF(kpdbg)
WRITE(ofu,*)
'Nslaves ',pblas%msmap%nslaves;
CALL fl(ofu)
598 blacs%pgrid%blockszrows = 64
599 IF ( blacs%pgrid%blockszrows .GT. m ) blacs%pgrid%blockszrows = m
600 blacs%pgrid%blockszcols = blacs%pgrid%blockszrows
601 IF(kpdbg)
WRITE(ofu,*)
'Block NR=',blacs%pgrid%blockszrows;
CALL fl(ofu)
602 IF(kpdbg)
WRITE(ofu,*)
'Block NC=',blacs%pgrid%blockszcols;
CALL fl(ofu)
604 maxslaves = (m*m) / (blacs%nbpp*blacs%nbpp* &
605 blacs%pgrid%blockszrows*blacs%pgrid%blockszcols)
606 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
607 IF ( maxslaves .LT. 1 ) maxslaves = 1
608 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
609 IF ( maxslaves .GT. pblas%msmap%nslaves ) maxslaves = pblas%msmap%nslaves
610 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
611 actualslaves = maxslaves
612 IF(kpdbg)
WRITE(ofu,*)
' Actual slaves ', actualslaves;
CALL fl(ofu)
614 blacs%pgrid%nrows = int( sqrt( real( actualslaves ) ) )
615 IF ( blacs%pgrid%nrows .LT. 1 ) blacs%pgrid%nrows = 1
616 blacs%pgrid%ncols = int( actualslaves / blacs%pgrid%nrows )
617 IF(kpdbg)
WRITE(ofu,*)
'NR=',blacs%pgrid%nrows,
' NC=',blacs%pgrid%ncols;
CALL fl(ofu)
618 ALLOCATE( blacs%pgrid%map( 1 : blacs%pgrid%nrows, 1 : blacs%pgrid%ncols ) )
620 DO i = 1, blacs%pgrid%nrows
621 DO j = 1, blacs%pgrid%ncols
622 blacs%pgrid%map(i,j) = pblas%msmap%slaveranks(k+1)
626 IF(kpdbg)
WRITE(ofu,*)
'NR*NC=',k;
CALL fl(ofu)
627 CALL blacs_gridmap( blacs%levelcontext, blacs%pgrid%map, &
628 blacs%pgrid%nrows, blacs%pgrid%nrows, blacs%pgrid%ncols )
629 IF(kpdbg)
WRITE(ofu,*)
'GridMap done';
CALL fl(ofu)
630 CALL blacs_gridinfo( blacs%levelcontext, &
631 blacs%pgrid%nrows, blacs%pgrid%ncols, &
632 blacs%pgrid%myrow, blacs%pgrid%mycol )
633 IF(kpdbg)
WRITE(ofu,*)
'GridInfo done';
CALL fl(ofu)
634 IF(kpdbg)
WRITE(ofu,*)
'Myrowcol ',blacs%pgrid%myrow,
' ',blacs%pgrid%mycol;
CALL fl(ofu)
637 pblas%mpicomm = siesta_comm
641 CALL blacs_barrier( blacs%maincontext,
'All' )
642 IF(kpdbg)
WRITE(ofu,*)
'BLACS_BARRIER';
CALL fl(ofu)
646 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Master';
CALL fl(ofu)
648 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Slave';
CALL fl(ofu)
652 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Done', ammaster;
CALL fl(ofu)
655 END SUBROUTINE plbforwardinitializelevel
663 SUBROUTINE plbforwardfinalizelevel( lvl, ammaster )
672 IF ( doblasonly )
THEN
673 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel BLAS only';
CALL fl(ofu)
678 IF ( ammaster .AND. (pblas%msmap%nslaves .GT. 1) )
THEN
679 CALL masterbcastnextop( op_done )
682 IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
683 (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) )
THEN
684 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel pariah !level-barrier';
CALL fl(ofu)
686 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel level-barrier';
CALL fl(ofu)
687 CALL blacs_barrier( blacs%levelcontext,
'All' )
688 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel level grid exit';
CALL fl(ofu)
689 CALL blacs_gridexit( blacs%levelcontext )
692 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel main-barrier';
CALL fl(ofu)
693 CALL blacs_barrier( blacs%maincontext,
'All' )
696 pblas%msmap%masterrank = -1
697 pblas%msmap%nslaves = 0
698 DEALLOCATE( pblas%msmap%slaveranks )
699 DEALLOCATE( blacs%pgrid%map )
704 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel ', ammaster;
CALL fl(ofu)
706 END SUBROUTINE plbforwardfinalizelevel
714 SUBROUTINE plbbackwardinitializelevel( lvl, ammaster )
723 END SUBROUTINE plbbackwardinitializelevel
730 SUBROUTINE plbbackwardfinalizelevel( lvl, ammaster )
739 END SUBROUTINE plbbackwardfinalizelevel
747 SUBROUTINE determinemasterslaveranks()
751 LOGICAL,
ALLOCATABLE :: recv_ammaster(:), assigned(:)
753 INTEGER :: totmasters, nslavespermaster, adjustednslavespermaster, tempmaster
754 INTEGER :: mpi_err, I, J, masterindex
756 IF(kpdbg)
WRITE(ofu,*)
'DetermineMasterSlaveRanks started';
CALL fl(ofu)
760 ALLOCATE( recv_ammaster(1:nranks) )
761 ALLOCATE( assigned(1:nranks) )
763 assigned( i ) = .false.
765 ALLOCATE( allmsmaps(1:nranks) )
767 allmsmaps(i)%masterrank = -1
768 allmsmaps(i)%nslaves = 0
772 CALL mpi_allgather(pblas%ammaster, 1, mpi_logical, &
773 recv_ammaster, 1, mpi_logical, &
774 siesta_comm, mpi_err)
780 IF(kpdbg)
WRITE(ofu,*)
' recv_ammaster ',i,
' ',recv_ammaster(i);
CALL fl(ofu)
781 IF ( recv_ammaster(i) )
THEN
782 totmasters = totmasters + 1
786 IF ( totmasters .LE. 0 )
THEN
787 IF(kpdbg)
WRITE(ofu,*)
'Total masters must be greater than 0';
CALL fl(ofu)
788 CALL assert(.false.,
'blocktri #1')
791 nslavespermaster = (nranks / totmasters)
792 IF(kpdbg)
WRITE(ofu,*)
'Avg nslavespermaster',nslavespermaster;
CALL fl(ofu)
797 masterloop:
DO i = 1, nranks
798 IF ( recv_ammaster(i) )
THEN
800 tempmaster = tempmaster + 1
804 adjustednslavespermaster = nslavespermaster
805 IF ( tempmaster .LE. mod( nranks, totmasters ) )
THEN
806 adjustednslavespermaster = adjustednslavespermaster + 1
809 IF(kpdbg)
WRITE(ofu,*)
'Adjusted nslavespm',adjustednslavespermaster;
CALL fl(ofu)
810 allmsmaps(i)%masterrank = i-1
811 ALLOCATE( allmsmaps(i)%slaveranks(1:adjustednslavespermaster) )
816 allmsmaps(i)%nslaves = 1
817 allmsmaps(i)%slaveranks(1) = i-1
821 slaveloop:
DO j = 1, nranks
822 IF ( allmsmaps(i)%nslaves .GE. adjustednslavespermaster )
THEN
825 IF ( (.NOT. assigned(j)) .AND. (.NOT. recv_ammaster(j)) )
THEN
827 allmsmaps(j)%masterrank = i-1
828 allmsmaps(i)%nslaves = allmsmaps(i)%nslaves + 1
829 allmsmaps(i)%slaveranks(allmsmaps(i)%nslaves) = j-1
834 IF(kpdbg)
WRITE(ofu,*)
'Computed all mappings';
CALL fl(ofu)
839 IF ( allmsmaps(masterindex)%masterrank .NE. rank )
THEN
840 masterindex = allmsmaps(masterindex)%masterrank+1
842 pblas%msmap%masterrank = allmsmaps(masterindex)%masterrank
843 pblas%msmap%nslaves = allmsmaps(masterindex)%nslaves
844 ALLOCATE(pblas%msmap%slaveranks(1:allmsmaps(masterindex)%nslaves))
845 pblas%msmap%slaveranks(:) = allmsmaps(masterindex)%slaveranks(:)
847 IF(kpdbg)
WRITE(ofu,*)
'Extracted my mappings';
CALL fl(ofu)
848 IF(kpdbg)
WRITE(ofu,*)
'My master rank',masterindex-1;
CALL fl(ofu)
849 IF(kpdbg)
WRITE(ofu,*)
'Nslaves in my set',allmsmaps(masterindex)%nslaves;
CALL fl(ofu)
850 DO j = 1, allmsmaps(masterindex)%nslaves
851 IF(kpdbg)
WRITE(ofu,*)
'Slave',j,
' ',allmsmaps(masterindex)%slaveranks(j);
CALL fl(ofu)
857 IF ( recv_ammaster(i) )
THEN
858 DEALLOCATE( allmsmaps(i)%slaveranks )
861 DEALLOCATE( assigned )
862 DEALLOCATE( allmsmaps )
863 DEALLOCATE( recv_ammaster )
865 IF(kpdbg)
WRITE(ofu,*)
'DetermineMasterSlaveRanks done';
CALL fl(ofu)
866 END SUBROUTINE determinemasterslaveranks
874 SUBROUTINE masterbcastnextop( nextop )
878 INTEGER,
INTENT(IN) :: nextop
882 INTEGER :: K, destrank
883 REAL(dp) :: realnextop
885 realnextop = real(nextop)
886 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastNextOp started ', nextop;
CALL fl(ofu)
887 CALL masterbcastvalue( realnextop )
888 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastNextOp done ', nextop;
CALL fl(ofu)
889 END SUBROUTINE masterbcastnextop
896 SUBROUTINE slavegetnextop( nextop )
900 REAL(dp) :: realnextop
901 INTEGER,
INTENT(INOUT) :: nextop
903 IF(kpdbg)
WRITE(ofu,*)
'SlaveGetNextOp started';
CALL fl(ofu)
904 CALL slavereceivevalue( realnextop )
905 nextop = int( realnextop )
906 IF(kpdbg)
WRITE(ofu,*)
'SlaveGetNextOp done ', nextop;
CALL fl(ofu)
907 END SUBROUTINE slavegetnextop
914 SUBROUTINE slaveservice()
921 IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
922 (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) )
THEN
923 IF(kpdbg)
WRITE(ofu,*)
'SlaveService pariah falling out ';
CALL fl(ofu)
925 IF(kpdbg)
WRITE(ofu,*)
'SlaveService started ';
CALL fl(ofu)
926 oploop:
DO WHILE (.true.)
928 CALL slavegetnextop( nextop )
929 IF ( nextop .EQ. op_done )
THEN
931 ELSE IF ( nextop .EQ. op_dgemm )
THEN
933 ELSE IF ( nextop .EQ. op_dgetrf )
THEN
935 ELSE IF ( nextop .EQ. op_dgetrs )
THEN
938 IF(kpdbg)
WRITE(ofu,*)
'Bad Next Op', nextop;
CALL fl(ofu)
939 CALL assert(.false.,
'blocktri #2')
942 IF(kpdbg)
WRITE(ofu,*)
'SlaveService done ';
CALL fl(ofu)
944 END SUBROUTINE slaveservice
951 SUBROUTINE extractsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
952 A, nrows, ncols, subA, subnrows, subncols )
956 INTEGER,
INTENT(IN) :: bszr, bszc
957 INTEGER,
INTENT(IN) :: pnr, pnc
958 INTEGER,
INTENT(IN) :: pi, pj
959 REAL(dp),
INTENT(IN) :: A(:,:)
960 INTEGER,
INTENT(IN) :: nrows, ncols
961 REAL(dp),
INTENT(OUT) :: subA(:)
962 INTEGER,
INTENT(IN) :: subnrows, subncols
966 INTEGER :: I, J, K, Q, R
967 INTEGER :: thisblkr, thisblkc
969 IF(kpdbg)
WRITE(ofu,*)
'ExtractSubMatrix NR=', subnrows,
' NC=', subncols;
CALL fl(ofu)
971 CALL bsystemclock(pstats%extract%t1)
974 DO j = 1, ncols, bszc
975 thisblkc = (j-1) / bszc
976 IF ( mod(thisblkc, pnc) .EQ. pj-1 )
THEN
978 IF ( j+r-1 .LE. ncols )
THEN
979 DO i = 1, nrows, bszr
980 thisblkr = (i-1) / bszr
981 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
983 IF ( i+q-1 .LE. nrows )
THEN
985 suba(k) = a(i+q-1,j+r-1)
996 IF ( k .NE. subnrows*subncols )
THEN
997 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
998 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows,
' subnc=', subncols;
CALL fl(ofu)
999 CALL assert(.false.,
'blocktri #3')
1002 CALL bsystemclock(pstats%extract%t2)
1003 CALL chargetime( pstats%extract%tm, pstats%extract%t2, pstats%extract%t1, pstats%extract%cnt )
1005 IF(kpdbg)
WRITE(ofu,*)
'ExtractSubMatrix done K', k;
CALL fl(ofu)
1007 END SUBROUTINE extractsubmatrix
1016 SUBROUTINE mastersendmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1020 REAL(dp),
INTENT(IN) :: A(:,:)
1021 INTEGER,
INTENT(IN) :: nrows, ncols
1022 REAL(dp),
INTENT(OUT) :: ssubA(:)
1023 INTEGER,
INTENT(IN) :: ssubnrows, ssubncols
1028 INTEGER :: aslaverank
1030 INTEGER :: subnrows, subncols
1031 INTEGER :: maxsubnrows, maxsubncols
1032 INTEGER :: bszr, bszc
1034 #if defined(MPI_OPT)
1035 INTEGER,
EXTERNAL :: NUMROC
1036 INTEGER :: mpistatus(MPI_STATUS_SIZE)
1040 INTEGER :: mpinisends
1041 INTEGER,
ALLOCATABLE :: mpireqs(:)
1043 INTEGER,
ALLOCATABLE :: mpistatuses(:,:)
1044 INTEGER :: waitmatchindex
1047 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix started';
CALL fl(ofu)
1048 CALL bsystemclock(pstats%comm%t1)
1050 #if defined(MPI_OPT)
1051 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1052 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1056 ALLOCATE( mpireqs( pnr * pnc ) )
1057 ALLOCATE( mpistatuses( pnr * pnc, mpi_status_size ) )
1060 maxsubnrows = numroc( nrows, bszr, 0, 0, pnr )
1061 maxsubncols = numroc( ncols, bszc, 0, 0, pnc )
1062 ALLOCATE( suba( 1 : pnr, 1 : pnc ) )
1066 aslaverank = blacs%pgrid%map(i,j)
1068 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1069 subncols = numroc( ncols, bszc, j-1, 0, pnc )
1071 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix to ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1073 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1075 IF ( aslaverank .NE. rank )
THEN
1076 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1077 CALL assert(.false.,
'blocktri #4')
1079 IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) )
THEN
1080 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1081 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows,
' SSNC ', ssubncols;
CALL fl(ofu)
1082 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows,
' SNC ', subncols;
CALL fl(ofu)
1083 CALL assert(.false.,
'blocktri #5')
1087 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracting self submatrix';
CALL fl(ofu)
1088 CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1089 i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1090 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix kept self submatrix';
CALL fl(ofu)
1093 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracting submatrix',i,j;
CALL fl(ofu)
1094 ALLOCATE( suba(i,j)%temparray( subnrows*subncols ) )
1095 CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1096 i, j, a, nrows, ncols, suba(i,j)%temparray, &
1098 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracted submatrix';
CALL fl(ofu)
1099 IF ( doblacscomm )
THEN
1100 CALL dgesd2d( blacs%levelcontext, subnrows, subncols, &
1101 suba(i,j)%temparray, subnrows, i-1, j-1 )
1103 mpinisends = mpinisends + 1
1104 CALL mpi_isend( suba(i,j)%temparray, subnrows*subncols, mpi_real8, &
1105 aslaverank, pblas%mpitag, pblas%mpicomm, mpireqs(mpinisends), mpierr )
1107 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix sent slave submatrix';
CALL fl(ofu)
1108 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix deallocated temp submatrix';
CALL fl(ofu)
1113 CALL bsystemclock(pstats%waitall%t1)
1114 CALL mpi_waitall( mpinisends, mpireqs, mpistatuses, mpierr )
1115 CALL bsystemclock(pstats%waitall%t2)
1116 CALL chargetime( pstats%waitall%tm, pstats%waitall%t2, pstats%waitall%t1, pstats%waitall%cnt )
1120 IF ( .NOT. (i .EQ. 1 .AND. j .EQ. 1) )
THEN
1121 DEALLOCATE( suba(i,j)%temparray )
1126 DEALLOCATE( mpireqs )
1127 DEALLOCATE( mpistatuses )
1129 CALL bsystemclock(pstats%comm%t2)
1130 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1132 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix done';
CALL fl(ofu)
1134 END SUBROUTINE mastersendmatrix
1141 SUBROUTINE slavereceivematrix( nrows, ncols, subA, subnrows, subncols )
1145 #if defined(MPI_OPT)
1146 INTEGER :: mpistatus(MPI_STATUS_SIZE)
1148 INTEGER,
INTENT(IN) :: nrows, ncols
1149 REAL(dp),
INTENT(OUT) :: subA(:)
1150 INTEGER,
INTENT(IN) :: subnrows, subncols
1154 INTEGER :: masterrank
1157 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveMatrix started ',subnrows,
' ',subncols;
CALL fl(ofu)
1159 #if defined(MPI_OPT)
1160 masterrank = blacs%pgrid%map(1,1)
1162 CALL bsystemclock(pstats%comm%t1)
1164 IF ( doblacscomm )
THEN
1165 CALL dgerv2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1167 CALL mpi_recv( suba, subnrows*subncols, mpi_real8, &
1168 masterrank, pblas%mpitag, pblas%mpicomm, mpistatus, mpierr )
1171 CALL bsystemclock(pstats%comm%t2)
1172 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1175 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveMatrix done';
CALL fl(ofu)
1176 END SUBROUTINE slavereceivematrix
1183 SUBROUTINE masterbcastvalue( val )
1187 REAL(dp),
INTENT(IN) :: val
1189 #if defined(MPI_OPT)
1190 CALL dgebs2d( blacs%levelcontext,
'All',
' ', 1, 1, val, 1 );
1192 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastValue bcast to slaves';
CALL fl(ofu)
1194 END SUBROUTINE masterbcastvalue
1201 SUBROUTINE slavereceivevalue( val )
1205 REAL(dp),
INTENT(OUT) :: val
1207 #if defined(MPI_OPT)
1208 CALL dgebr2d( blacs%levelcontext,
'All',
' ', 1, 1, val, 1, 0, 0 )
1210 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveValue bcast from master'
1213 END SUBROUTINE slavereceivevalue
1220 SUBROUTINE injectsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
1221 A, nrows, ncols, subA, subnrows, subncols )
1225 INTEGER,
INTENT(IN) :: bszr, bszc
1226 INTEGER,
INTENT(IN) :: pnr, pnc
1227 INTEGER,
INTENT(IN) :: pi, pj
1228 REAL(dp),
INTENT(OUT) :: A(:,:)
1229 INTEGER,
INTENT(IN) :: nrows, ncols
1230 REAL(dp),
INTENT(IN) :: subA(:)
1231 INTEGER,
INTENT(IN) :: subnrows, subncols
1235 INTEGER :: I, J, K, Q, R
1236 INTEGER :: thisblkr, thisblkc
1238 IF(kpdbg)
WRITE(ofu,*)
'InjectSubMatrix NR=', subnrows,
' NC=', subncols;
CALL fl(ofu)
1241 DO j = 1, ncols, bszc
1242 thisblkc = (j-1) / bszc
1243 IF ( mod(thisblkc, pnc) .EQ. pj-1 )
THEN
1245 IF ( j+r-1 .LE. ncols )
THEN
1246 DO i = 1, nrows, bszr
1247 thisblkr = (i-1) / bszr
1248 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
1250 IF ( i+q-1 .LE. nrows )
THEN
1252 a(i+q-1,j+r-1) = suba(k)
1263 IF ( k .NE. subnrows*subncols )
THEN
1264 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
1265 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows,
' subnc=', subncols;
CALL fl(ofu)
1266 CALL assert(.false.,
'blocktri #6')
1269 IF(kpdbg)
WRITE(ofu,*)
'InjectSubMatrix done K', k;
CALL fl(ofu)
1271 END SUBROUTINE injectsubmatrix
1278 SUBROUTINE injectsubvector( bszr, pnr, pi, V, nrows, subV, subnrows )
1282 INTEGER,
INTENT(IN) :: bszr
1283 INTEGER,
INTENT(IN) :: pnr
1284 INTEGER,
INTENT(IN) :: pi
1285 INTEGER,
INTENT(OUT) :: V(:)
1286 INTEGER,
INTENT(IN) :: nrows
1287 INTEGER,
INTENT(IN) :: subV(:)
1288 INTEGER,
INTENT(IN) :: subnrows
1295 IF(kpdbg)
WRITE(ofu,*)
'InjectSubVector NR=', subnrows;
CALL fl(ofu)
1298 DO i = 1, nrows, bszr
1299 thisblkr = (i-1) / bszr
1300 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
1302 IF ( i+q-1 .LE. nrows )
THEN
1311 IF ( k .NE. subnrows )
THEN
1312 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
1313 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows;
CALL fl(ofu)
1314 CALL assert(.false.,
'blocktri #7')
1317 IF(kpdbg)
WRITE(ofu,*)
'InjectSubVector done K', k;
CALL fl(ofu)
1319 END SUBROUTINE injectsubvector
1328 SUBROUTINE masterrecvmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1332 REAL(dp),
INTENT(OUT) :: A(:,:)
1333 INTEGER,
INTENT(IN) :: nrows, ncols
1334 REAL(dp),
INTENT(IN) :: ssubA(:)
1335 INTEGER,
INTENT(IN) :: ssubnrows, ssubncols
1340 INTEGER :: aslaverank
1341 REAL(dp),
ALLOCATABLE :: subA(:)
1342 INTEGER :: subnrows, subncols
1343 INTEGER :: bszr, bszc
1345 #if defined(MPI_OPT)
1346 INTEGER,
EXTERNAL :: NUMROC
1349 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix started';
CALL fl(ofu)
1350 CALL bsystemclock(pstats%comm%t1)
1352 #if defined(MPI_OPT)
1353 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1354 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1358 aslaverank = blacs%pgrid%map(i,j)
1360 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1361 subncols = numroc( ncols, bszc, j-1, 0, pnc )
1363 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix from ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1365 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1367 IF ( aslaverank .NE. rank )
THEN
1368 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1369 CALL assert(.false.,
'blocktri #8')
1371 IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) )
THEN
1372 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1373 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows,
' SSNC ', ssubncols;
CALL fl(ofu)
1374 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows,
' SNC ', subncols;
CALL fl(ofu)
1375 CALL assert(.false.,
'blocktri #9')
1379 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injecting self submatrix';
CALL fl(ofu)
1380 CALL injectsubmatrix(bszr, bszc, pnr, pnc, &
1381 i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1382 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix kept self submatrix';
CALL fl(ofu)
1385 ALLOCATE( suba( 1 : subnrows*subncols ) )
1386 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix receiving slave submatrix';
CALL fl(ofu)
1387 CALL bsystemclock(pstats%wait%t1)
1388 CALL dgerv2d( blacs%levelcontext, subnrows, subncols, &
1389 suba, subnrows, i-1, j-1 )
1390 CALL bsystemclock(pstats%wait%t2)
1391 CALL chargetime( pstats%wait%tm, pstats%wait%t2, pstats%wait%t1, pstats%wait%cnt )
1392 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injecting submatrix',i,j;
CALL fl(ofu)
1393 CALL injectsubmatrix( bszr, bszc, pnr, pnc, &
1394 i, j, a, nrows, ncols, suba, subnrows, subncols )
1395 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injected submatrix';
CALL fl(ofu)
1397 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix deallocated temp submatrix';
CALL fl(ofu)
1402 CALL bsystemclock(pstats%comm%t2)
1403 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1406 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix done';
CALL fl(ofu)
1408 END SUBROUTINE masterrecvmatrix
1415 SUBROUTINE slavesendmatrix( nrows, ncols, subA, subnrows, subncols )
1419 INTEGER,
INTENT(IN) :: nrows, ncols
1420 REAL(dp),
INTENT(IN) :: subA(:)
1421 INTEGER,
INTENT(IN) :: subnrows, subncols
1423 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendMatrix started ',subnrows,
' ',subncols;
CALL fl(ofu)
1425 CALL bsystemclock(pstats%comm%t1)
1426 #if defined(MPI_OPT)
1427 CALL dgesd2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1429 CALL bsystemclock(pstats%comm%t2)
1430 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1432 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendMatrix done';
CALL fl(ofu)
1433 END SUBROUTINE slavesendmatrix
1442 SUBROUTINE masterrecvvector( V, nrows, ssubV, ssubnrows )
1446 INTEGER,
INTENT(OUT) :: V(:)
1447 INTEGER,
INTENT(IN) :: nrows
1448 INTEGER,
INTENT(IN) :: ssubV(:)
1449 INTEGER,
INTENT(IN) :: ssubnrows
1454 INTEGER :: aslaverank
1455 INTEGER,
ALLOCATABLE :: subV(:)
1459 #if defined(MPI_OPT)
1460 INTEGER,
EXTERNAL :: NUMROC
1463 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector started';
CALL fl(ofu)
1464 CALL bsystemclock(pstats%comm%t1)
1466 #if defined(MPI_OPT)
1467 bszr = blacs%pgrid%blockszrows
1468 pnr = blacs%pgrid%nrows
1472 aslaverank = blacs%pgrid%map(i,j)
1474 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1476 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector from ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1478 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1480 IF ( aslaverank .NE. rank )
THEN
1481 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1482 CALL assert(.false.,
'blocktri #10')
1484 IF ( ssubnrows .NE. subnrows )
THEN
1485 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1486 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows;
CALL fl(ofu)
1487 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows;
CALL fl(ofu)
1488 CALL assert(.false.,
'blocktri #11')
1492 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injecting self subvector';
CALL fl(ofu)
1493 CALL injectsubvector(bszr, pnr, i, v, nrows, ssubv, subnrows)
1494 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector kept self subvector';
CALL fl(ofu)
1497 ALLOCATE( subv( 1 : subnrows ) )
1498 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector receiving slave subvector';
CALL fl(ofu)
1499 CALL igerv2d( blacs%levelcontext, subnrows, 1, subv, subnrows, i-1, 0 )
1500 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injecting subvector',i,j;
CALL fl(ofu)
1501 CALL injectsubvector( bszr, pnr, i, v, nrows, subv, subnrows )
1502 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injected subvector';
CALL fl(ofu)
1504 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector deallocated temp subvector';
CALL fl(ofu)
1509 CALL bsystemclock(pstats%comm%t2)
1510 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1513 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector done';
CALL fl(ofu)
1515 END SUBROUTINE masterrecvvector
1524 SUBROUTINE slavesendvector( nrows, subV, subnrows )
1528 INTEGER,
INTENT(IN) :: nrows
1529 INTEGER,
INTENT(IN) :: subV(:)
1530 INTEGER,
INTENT(IN) :: subnrows
1533 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector started ', subnrows;
CALL fl(ofu)
1535 CALL bsystemclock(pstats%comm%t1)
1537 pj = blacs%pgrid%mycol
1538 IF ( pj .GT. 0 )
THEN
1539 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector skipping since mycol>0 ',pj;
CALL fl(ofu)
1541 #if defined(MPI_OPT)
1542 CALL igesd2d( blacs%levelcontext, subnrows, 1, subv, subnrows, 0, 0 )
1546 CALL bsystemclock(pstats%comm%t2)
1547 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1549 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector done';
CALL fl(ofu)
1550 END SUBROUTINE slavesendvector
1557 SUBROUTINE plbdgemm( alpha, A, B, beta, C )
1561 REAL(dp),
INTENT(IN) :: alpha, beta
1562 REAL(dp),
INTENT(IN) :: A(:,:), B(:,:)
1563 REAL(dp),
INTENT(INOUT) :: C(:,:)
1567 REAL(dp),
ALLOCATABLE :: subA(:), subB(:), subC(:)
1568 INTEGER :: descA(9), descB(9), descC(9)
1569 INTEGER :: lldA, lldB, lldC
1570 INTEGER :: nrows, ncols, subnrows, subncols
1571 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1573 INTEGER :: ctxt, info
1574 #if defined(MPI_OPT)
1575 INTEGER,
EXTERNAL :: NUMROC
1578 IF(kpdbg)
WRITE(ofu,*)
'MasterGEMM started';
CALL fl(ofu)
1581 useblas = ( doblasonly ) .OR. &
1582 ( pblas%msmap%nslaves .EQ. 1) .OR. &
1583 ( m .LE. blacs%pgrid%blockszrows ) .OR. &
1584 ( m .LE. blacs%pgrid%blockszcols )
1587 IF(kpdbg)
WRITE(ofu,*)
'BLAS DGEMM only (not using PBLAS)';
CALL fl(ofu)
1588 CALL dgemm(
'N',
'N', m, m, m, alpha, a, m, b, m, beta, c, m )
1590 #if defined(MPI_OPT)
1591 CALL bsystemclock(pstats%mm%t1)
1592 nrows = m; ncols = m
1593 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1594 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1595 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1596 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1597 subncols = numroc( ncols, bszc, pj, 0, pnc )
1599 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM allocating subABC';
CALL fl(ofu)
1600 ALLOCATE( suba(1:subnrows*subncols) )
1601 ALLOCATE( subb(1:subnrows*subncols) )
1602 ALLOCATE( subc(1:subnrows*subncols) )
1603 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM allocated subABC';
CALL fl(ofu)
1605 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM desciniting subABC';
CALL fl(ofu)
1606 ctxt = blacs%levelcontext
1607 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1608 lldb = llda; lldc = llda
1609 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1610 CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1611 CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1612 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM desciniting subABC';
CALL fl(ofu)
1614 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending OP_DGEMM';
CALL fl(ofu)
1615 CALL masterbcastnextop( op_dgemm )
1617 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending A';
CALL fl(ofu)
1618 CALL bsystemclock(pstats%mma%t1)
1619 CALL mastersendmatrix( a, nrows, ncols, suba, subnrows, subncols )
1620 CALL bsystemclock(pstats%mma%t2)
1621 CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1623 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending B';
CALL fl(ofu)
1624 CALL bsystemclock(pstats%mmb%t1)
1625 CALL mastersendmatrix( b, nrows, ncols, subb, subnrows, subncols )
1626 CALL bsystemclock(pstats%mmb%t2)
1627 CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1629 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending C';
CALL fl(ofu)
1630 CALL bsystemclock(pstats%mmc%t1)
1631 CALL mastersendmatrix( c, nrows, ncols, subc, subnrows, subncols )
1632 CALL bsystemclock(pstats%mmc%t2)
1633 CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1635 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending alpha', alpha;
CALL fl(ofu)
1636 CALL bsystemclock(pstats%mmalpha%t1)
1637 CALL masterbcastvalue( alpha )
1638 CALL bsystemclock(pstats%mmalpha%t2)
1639 CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1641 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending beta', beta;
CALL fl(ofu)
1642 CALL bsystemclock(pstats%mmbeta%t1)
1643 CALL masterbcastvalue( beta )
1644 CALL bsystemclock(pstats%mmbeta%t2)
1645 CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1647 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM invoking PDGEMM';
CALL fl(ofu)
1648 CALL bsystemclock(pstats%comp%t1)
1649 CALL pdgemm(
'N',
'N', m, m, m, alpha, &
1650 suba, 1, 1, desca, &
1651 subb, 1, 1, descb, &
1654 CALL bsystemclock(pstats%comp%t2)
1655 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1656 CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1657 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM done PDGEMM';
CALL fl(ofu)
1659 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM receiving slave matrices';
CALL fl(ofu)
1660 CALL bsystemclock(pstats%mmrc%t1)
1661 CALL masterrecvmatrix( c, nrows, ncols, subc, subnrows, subncols )
1662 CALL bsystemclock(pstats%mmrc%t2)
1663 CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1664 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM received slave matrices';
CALL fl(ofu)
1666 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM deallocating subABC';
CALL fl(ofu)
1670 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM deallocated subABC';
CALL fl(ofu)
1671 CALL bsystemclock(pstats%mm%t2)
1672 CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1676 IF(kpdbg)
WRITE(ofu,*)
'MasterGEMM done';
CALL fl(ofu)
1677 END SUBROUTINE plbdgemm
1684 SUBROUTINE slavedgemm()
1688 REAL(dp),
ALLOCATABLE :: subA(:), subB(:), subC(:)
1689 REAL(dp) :: alpha, beta
1690 INTEGER :: descA(9), descB(9), descC(9)
1691 INTEGER :: lldA, lldB, lldC
1692 INTEGER :: nrows, ncols, subnrows, subncols
1693 INTEGER :: bszr, bszc, pnr, pnc
1695 INTEGER :: ctxt, info
1696 #if defined(MPI_OPT)
1697 INTEGER,
EXTERNAL :: NUMROC
1700 CALL bsystemclock(pstats%mm%t1)
1702 #if defined(MPI_OPT)
1703 nrows = m; ncols = m
1704 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1705 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1706 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1708 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1709 subncols = numroc( ncols, bszc, pj, 0, pnc )
1711 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM allocating subABC';
CALL fl(ofu)
1712 ALLOCATE( suba(1:subnrows*subncols) )
1713 ALLOCATE( subb(1:subnrows*subncols) )
1714 ALLOCATE( subc(1:subnrows*subncols) )
1715 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM allocated subABC';
CALL fl(ofu)
1717 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM desciniting subABC';
CALL fl(ofu)
1718 ctxt = blacs%levelcontext
1719 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1720 lldb = llda; lldc = llda
1721 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1722 CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1723 CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1724 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM desciniting subABC';
CALL fl(ofu)
1726 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving A';
CALL fl(ofu)
1727 CALL bsystemclock(pstats%mma%t1)
1728 CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1729 CALL bsystemclock(pstats%mma%t2)
1730 CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1732 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving B';
CALL fl(ofu)
1733 CALL bsystemclock(pstats%mmb%t1)
1734 CALL slavereceivematrix( nrows, ncols, subb, subnrows, subncols )
1735 CALL bsystemclock(pstats%mmb%t2)
1736 CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1738 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving C';
CALL fl(ofu)
1739 CALL bsystemclock(pstats%mmc%t1)
1740 CALL slavereceivematrix( nrows, ncols, subc, subnrows, subncols )
1741 CALL bsystemclock(pstats%mmc%t2)
1742 CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1744 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving alpha';
CALL fl(ofu)
1745 CALL bsystemclock(pstats%mmalpha%t1)
1746 CALL slavereceivevalue( alpha )
1747 CALL bsystemclock(pstats%mmalpha%t2)
1748 CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1750 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving beta';
CALL fl(ofu)
1751 CALL bsystemclock(pstats%mmbeta%t1)
1752 CALL slavereceivevalue( beta )
1753 CALL bsystemclock(pstats%mmbeta%t2)
1754 CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1756 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM invoking PDGEMM';
CALL fl(ofu)
1757 CALL bsystemclock(pstats%comp%t1)
1758 CALL pdgemm(
'N',
'N', m, m, m, alpha, &
1759 suba, 1, 1, desca, &
1760 subb, 1, 1, descb, &
1763 CALL bsystemclock(pstats%comp%t2)
1764 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1765 CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1766 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM done PDGEMM';
CALL fl(ofu)
1768 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM sending result matrix to master';
CALL fl(ofu)
1769 CALL bsystemclock(pstats%mmrc%t1)
1770 CALL slavesendmatrix( nrows, ncols, subc, subnrows, subncols )
1771 CALL bsystemclock(pstats%mmrc%t2)
1772 CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1773 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM sent result matrix to master';
CALL fl(ofu)
1775 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM deallocating subABC';
CALL fl(ofu)
1779 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM deallocated subABC';
CALL fl(ofu)
1782 CALL bsystemclock(pstats%mm%t2)
1783 CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1785 END SUBROUTINE slavedgemm
1792 SUBROUTINE plbdgemv( alpha, A, x, beta, y )
1796 REAL(dp),
INTENT(IN) :: alpha, beta
1797 REAL(dp),
INTENT(IN) :: A(:,:)
1798 REAL(dp),
INTENT(IN) :: x(:)
1799 REAL(dp),
INTENT(INOUT) :: y(:)
1803 CALL dgemv(
'N', m, m, alpha, a, m, x, 1, beta, y, 1 )
1804 END SUBROUTINE plbdgemv
1811 SUBROUTINE plbdgetrf( A, piv, info )
1815 REAL(dp),
INTENT(INOUT) :: A(:,:)
1816 INTEGER,
INTENT(OUT) :: piv(:)
1817 INTEGER,
INTENT(INOUT) :: info
1821 REAL(dp),
ALLOCATABLE :: subA(:)
1822 INTEGER,
ALLOCATABLE :: subpiv(:)
1825 INTEGER :: nrows, ncols, subnrows, subncols
1826 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1829 #if defined(MPI_OPT)
1830 INTEGER,
EXTERNAL :: NUMROC
1833 IF(kpdbg)
WRITE(ofu,*)
'MasterGETRF started';
CALL fl(ofu)
1836 useblas = ( doblasonly ) .OR. &
1837 ( pblas%msmap%nslaves .EQ. 1) .OR. &
1838 ( m .LE. blacs%pgrid%blockszrows ) .OR. &
1839 ( m .LE. blacs%pgrid%blockszcols )
1844 IF(kpdbg)
WRITE(ofu,*)
'BLAS DGETRF only (not using PBLAS) with M=',m ;
CALL fl(ofu)
1845 CALL dgetrf( m, m, a, m, piv, info )
1847 #if defined(MPI_OPT)
1848 CALL bsystemclock(pstats%trf%t1)
1849 nrows = m; ncols = m
1850 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1851 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1852 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1853 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1854 subncols = numroc( ncols, bszc, pj, 0, pnc )
1856 ctxt = blacs%levelcontext
1857 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1858 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1860 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF allocating subAPiv';
CALL fl(ofu)
1861 ALLOCATE( suba(1:subnrows*subncols) )
1862 ALLOCATE( subpiv(1:subnrows+bszr) )
1863 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF allocated subAPiv';
CALL fl(ofu)
1865 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF sending OP_GETRF';
CALL fl(ofu)
1866 CALL masterbcastnextop( op_dgetrf )
1867 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF sending A';
CALL fl(ofu)
1868 CALL mastersendmatrix( a, nrows, ncols, suba, subnrows, subncols )
1869 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF invoking PDGETRF';
CALL fl(ofu)
1870 CALL bsystemclock(pstats%comp%t1)
1871 CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1872 CALL bsystemclock(pstats%comp%t2)
1873 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1874 CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1875 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF done PDGETRF';
CALL fl(ofu)
1877 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF receiving slave submatrices';
CALL fl(ofu)
1878 CALL masterrecvmatrix( a, nrows, ncols, suba, subnrows, subncols )
1879 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF received slave submatrices';
CALL fl(ofu)
1880 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF receiving slave vectors';
CALL fl(ofu)
1881 CALL masterrecvvector( piv, nrows, subpiv, subnrows )
1882 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF received slave vectors';
CALL fl(ofu)
1884 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF deallocating subAPiv';
CALL fl(ofu)
1885 DEALLOCATE( subpiv )
1887 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF deallocated subAPiv';
CALL fl(ofu)
1888 CALL bsystemclock(pstats%trf%t2)
1889 CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1893 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF done';
CALL fl(ofu)
1894 END SUBROUTINE plbdgetrf
1901 SUBROUTINE slavedgetrf()
1905 REAL(dp),
ALLOCATABLE :: subA(:)
1906 INTEGER,
ALLOCATABLE :: subpiv(:)
1909 INTEGER :: nrows, ncols, subnrows, subncols
1910 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1911 INTEGER :: ctxt, info
1912 #if defined(MPI_OPT)
1913 INTEGER,
EXTERNAL :: NUMROC
1916 CALL bsystemclock(pstats%trf%t1)
1918 #if defined(MPI_OPT)
1919 nrows = m; ncols = m
1920 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1921 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1922 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1923 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1924 subncols = numroc( ncols, bszc, pj, 0, pnc )
1926 ctxt = blacs%levelcontext
1927 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1928 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1930 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF allocating subAPiv';
CALL fl(ofu)
1931 ALLOCATE( suba(1:subnrows*subncols) )
1932 ALLOCATE( subpiv(1:subnrows+bszr) )
1933 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF allocated subAPiv';
CALL fl(ofu)
1935 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF receiving A submatrix';
CALL fl(ofu)
1936 CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1937 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF received A submatrix';
CALL fl(ofu)
1939 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF invoking PDGETRF';
CALL fl(ofu)
1940 CALL bsystemclock(pstats%comp%t1)
1941 CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1942 CALL bsystemclock(pstats%comp%t2)
1943 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1944 CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1945 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF done PDGETRF';
CALL fl(ofu)
1947 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sending result matrix to master';
CALL fl(ofu)
1948 CALL slavesendmatrix( nrows, ncols, suba, subnrows, subncols )
1949 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sent result matrix to master';
CALL fl(ofu)
1950 CALL slavesendvector( nrows, subpiv, subnrows )
1951 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sent result vector to master';
CALL fl(ofu)
1953 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF deallocating subAPiv';
CALL fl(ofu)
1954 DEALLOCATE( subpiv )
1956 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF deallocated subAPiv';
CALL fl(ofu)
1960 CALL bsystemclock(pstats%trf%t2)
1961 CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1963 END SUBROUTINE slavedgetrf
1970 SUBROUTINE plbdgetrs( nrhs, A, piv, B, info )
1974 INTEGER,
INTENT(IN) :: nrhs
1975 REAL(dp),
INTENT(IN) :: A(:,:)
1976 INTEGER,
INTENT(IN) :: piv(:)
1977 REAL(dp),
INTENT(INOUT) :: B(:,:)
1982 IF(kpdbg)
WRITE(ofu,*)
'BLAS DGETRS only (not using PBLAS)';
CALL fl(ofu)
1983 CALL dgetrs(
'N', m, nrhs, a, m, piv, b, m, info )
1984 END SUBROUTINE plbdgetrs
1991 SUBROUTINE slavedgetrs()
1993 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRS not implemented';
CALL fl(ofu)
1994 CALL assert(.false.,
'blocktri #12')
1996 END SUBROUTINE slavedgetrs
2005 SUBROUTINE initialize( inN, inM )
2009 INTEGER,
INTENT(IN) :: inn
2010 INTEGER,
INTENT(IN) :: inm
2015 INTEGER :: nrowsperrank
2016 INTEGER :: nspillrows
2019 INTEGER :: globrowoff
2025 #if defined(MPI_OPT)
2026 CALL mpi_initialized (flag, mpierr)
2027 IF ( flag .EQ. 0 )
THEN
2028 CALL mpi_init( mpierr )
2030 use_mpiwtime = .true.
2031 CALL mpi_comm_rank( siesta_comm, rank, mpierr )
2032 CALL mpi_comm_size( siesta_comm, nranks, mpierr)
2037 IF (n.EQ.0 .OR. m.EQ.0)
return
2045 IF ( p .GT. n )
THEN
2046 IF(kpdbg)
WRITE(ofu,*)
'Detected extra ', p-n,
' ranks';
CALL fl(ofu)
2058 kpdbg=.false.; kenvvar=
'KPDBG'
2059 CALL getenv(kenvvar,kenvval)
2060 IF (kenvval.NE.
'')
THEN
2061 IF (kenvval.EQ.
'TRUE')
THEN
2069 IF(kpdbg)
WRITE(ofu,*)
'Rank ', rank,
' NRanks ', nranks;
CALL fl(ofu)
2070 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
2071 IF(kpdbg)
WRITE(ofu,*)
'------ Initialization start ------';
CALL fl(ofu)
2072 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
2075 IF ( n .LT. 1 )
THEN
2076 IF(kpdbg)
WRITE(ofu,*)
'Bad N', n;
CALL fl(ofu)
2077 CALL assert(.false.,
'blocktri #13')
2085 nspillrows = mod(n, p)
2086 IF ( rank .LT. nspillrows )
THEN
2087 startglobrow = rank*nrowsperrank + rank + 1
2088 endglobrow = startglobrow + nrowsperrank
2089 ELSE IF ( rank .LT. p )
THEN
2090 startglobrow = rank*nrowsperrank + nspillrows + 1
2091 endglobrow = startglobrow + nrowsperrank - 1
2100 IF (.NOT.
ALLOCATED(origdiagelement))
ALLOCATE (origdiagelement((endglobrow-startglobrow+1)*m))
2105 lognbase2 = log(real(n))/log(2.0)
2106 nlevels = 1 + int(lognbase2)
2107 IF ( real(int(lognbase2)) .NE. lognbase2 )
THEN
2108 nlevels = nlevels + 1
2110 IF(kpdbg)
WRITE(ofu,*)
'NLEVELS=', nlevels;
CALL fl(ofu)
2120 usebarriers = .false.
2123 writeproblemfile = .false.
2124 writesolution = .false.
2150 IF (.NOT.
ALLOCATED(lelement))
ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
2152 CALL chargememory( nlevels*(endglobrow-startglobrow+3)*5*ptrsz )
2155 IF (.NOT.
ALLOCATED(selement))
ALLOCATE( selement(n) )
2157 CALL chargememory( n*ptrsz )
2162 DO globrow = startglobrow, endglobrow, 1
2163 globrowoff = globrow-startglobrow+1
2165 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%L))
ALLOCATE( lelement(1, globrowoff)%L(m,m) )
2166 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%D))
ALLOCATE( lelement(1, globrowoff)%D(m,m) )
2167 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%U))
ALLOCATE( lelement(1, globrowoff)%U(m,m) )
2168 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%b))
ALLOCATE( lelement(1, globrowoff)%b(m,1) )
2169 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%pivot))
ALLOCATE( lelement(1, globrowoff)%pivot(m) )
2176 CALL chargememory( (3*m*m+m)*dpsz + m*intsz )
2177 lelement(1, globrowoff)%L = 0
2178 lelement(1, globrowoff)%D = 0
2179 lelement(1, globrowoff)%U = 0
2180 lelement(1, globrowoff)%b = 0
2181 lelement(1, globrowoff)%pivot = 0
2188 IF (.NOT.
ALLOCATED(orig))
ALLOCATE( orig(endglobrow-startglobrow+1) )
2190 DO globrow = startglobrow, endglobrow, 1
2191 globrowoff = globrow-startglobrow+1
2193 IF(.NOT.
ALLOCATED(orig(globrowoff)%L))
ALLOCATE( orig(globrowoff)%L(m,m) )
2194 IF(.NOT.
ALLOCATED(orig(globrowoff)%D))
ALLOCATE( orig(globrowoff)%D(m,m) )
2195 IF(.NOT.
ALLOCATED(orig(globrowoff)%U))
ALLOCATE( orig(globrowoff)%U(m,m) )
2196 IF(.NOT.
ALLOCATED(orig(globrowoff)%b))
ALLOCATE( orig(globrowoff)%b(m,1) )
2203 orig(globrowoff)%L = 0
2204 orig(globrowoff)%D = 0
2205 orig(globrowoff)%U = 0
2206 orig(globrowoff)%b = 0
2210 CALL plbinitialize()
2213 IF(kpdbg)
WRITE(ofu,*)
' P=',p,
' N=',n,
' M=',m;
CALL fl(ofu)
2214 IF(kpdbg)
WRITE(ofu,*)
'SROW=',startglobrow,
' EROW=', endglobrow;
CALL fl(ofu)
2215 IF(kpdbg)
WRITE(ofu,*)
'------ Initialization end ------';
CALL fl(ofu)
2216 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
2217 END SUBROUTINE initialize
2222 SUBROUTINE getranks(irank, inranks)
2223 INTEGER,
INTENT(OUT) :: irank, inranks
2224 irank = rank; inranks = nranks
2225 END SUBROUTINE getranks
2232 SUBROUTINE setmatrixrow( globrow, L, D, U )
2236 INTEGER,
INTENT(IN) :: globrow
2237 REAL(dp),
INTENT(IN) :: L(:,:), D(:,:), U(:,:)
2242 INTEGER :: i, j, globrowoff
2247 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2248 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRow: Bad input globrow ',globrow;
CALL fl(ofu)
2249 CALL assert(.false.,
'blocktri #14')
2251 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2252 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRow: Non-local globrow ',globrow;
CALL fl(ofu)
2253 CALL assert(.false.,
'blocktri #15')
2256 globrowoff = globrow-startglobrow+1
2263 IF ( globrow .EQ. 1 )
THEN
2268 lelement(1, globrowoff)%L(i,j) = val
2271 lelement(1, globrowoff)%D(i,j) = val
2273 IF ( globrow .EQ. n )
THEN
2278 lelement(1, globrowoff)%U(i,j) = val
2285 orig(globrowoff)%L = lelement(1,globrowoff)%L
2286 orig(globrowoff)%D = lelement(1,globrowoff)%D
2287 orig(globrowoff)%U = lelement(1,globrowoff)%U
2292 END SUBROUTINE setmatrixrow
2299 SUBROUTINE setmatrixrowl( globrow, L )
2303 INTEGER,
INTENT(IN) :: globrow
2304 REAL(dp),
INTENT(IN) :: L(:,:)
2309 INTEGER :: i, j, globrowoff
2314 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2315 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowL: Bad input globrow ',globrow;
CALL fl(ofu)
2316 CALL assert(.false.,
'blocktri #16')
2318 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2319 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowL: Non-local globrow ',globrow;
CALL fl(ofu)
2320 CALL assert(.false.,
'blocktri #17')
2323 globrowoff = globrow-startglobrow+1
2330 IF ( globrow .EQ. 1 )
THEN
2335 lelement(1, globrowoff)%L(i,j) = val
2342 orig(globrowoff)%L = lelement(1,globrowoff)%L
2347 END SUBROUTINE setmatrixrowl
2354 SUBROUTINE setmatrixrowd( globrow, D )
2358 INTEGER,
INTENT(IN) :: globrow
2359 REAL(dp),
INTENT(IN) :: D(:,:)
2364 INTEGER :: i, j, globrowoff
2369 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2370 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowD: Bad input globrow ',globrow;
CALL fl(ofu)
2371 CALL assert(.false.,
'blocktri #18')
2373 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2374 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowD: Non-local globrow ',globrow;
CALL fl(ofu)
2375 CALL assert(.false.,
'blocktri #19')
2378 globrowoff = globrow-startglobrow+1
2386 lelement(1, globrowoff)%D(i,j) = val
2393 orig(globrowoff)%D = lelement(1,globrowoff)%D
2398 END SUBROUTINE setmatrixrowd
2405 SUBROUTINE setmatrixrowu( globrow, U )
2409 INTEGER,
INTENT(IN) :: globrow
2410 REAL(dp),
INTENT(IN) :: U(:,:)
2415 INTEGER :: i, j, globrowoff
2420 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2421 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowU: Bad input globrow ',globrow;
CALL fl(ofu)
2422 CALL assert(.false.,
'blocktri #20')
2424 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2425 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowU: Non-local globrow ',globrow;
CALL fl(ofu)
2426 CALL assert(.false.,
'blocktri #21')
2429 globrowoff = globrow-startglobrow+1
2436 IF ( globrow .EQ. n )
THEN
2441 lelement(1, globrowoff)%U(i,j) = val
2448 orig(globrowoff)%U = lelement(1,globrowoff)%U
2453 END SUBROUTINE setmatrixrowu
2460 SUBROUTINE setmatrixrowcoll( globrow, Lj, j )
2464 INTEGER,
INTENT(IN) :: globrow
2465 REAL(dp),
INTENT(IN) :: lj(:)
2466 INTEGER,
INTENT(IN) :: j
2471 INTEGER :: i, globrowoff
2476 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2477 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Bad input globrow ',globrow;
CALL fl(ofu)
2478 CALL assert(.false.,
'blocktri #22')
2480 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2481 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Non-local globrow ',globrow;
CALL fl(ofu)
2482 CALL assert(.false.,
'blocktri #23')
2484 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2485 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2486 CALL assert(.false.,
'blocktri #24')
2489 globrowoff = globrow-startglobrow+1
2495 IF ( globrow .EQ. 1 )
THEN
2500 lelement(1, globrowoff)%L(i,j) = val
2506 orig(globrowoff)%L(:,j) = lelement(1,globrowoff)%L(:,j)
2511 END SUBROUTINE setmatrixrowcoll
2518 SUBROUTINE setmatrixrowcold( globrow, Dj, j )
2522 INTEGER,
INTENT(IN) :: globrow
2523 REAL(dp),
INTENT(IN) :: dj(:)
2524 INTEGER,
INTENT(IN) :: j
2529 INTEGER :: i, globrowoff
2534 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2535 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColD: Bad input globrow ',globrow;
CALL fl(ofu)
2536 CALL assert(.false.,
'blocktri #25')
2538 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2539 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColD: Non-local globrow ',globrow;
CALL fl(ofu)
2540 CALL assert(.false.,
'blocktri #26')
2542 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2543 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColD: Bad j column ',j;
CALL fl(ofu)
2544 CALL assert(.false.,
'blocktri #27')
2547 globrowoff = globrow-startglobrow+1
2554 lelement(1, globrowoff)%D(i,j) = val
2560 orig(globrowoff)%D(:,j) = lelement(1,globrowoff)%D(:,j)
2565 END SUBROUTINE setmatrixrowcold
2572 SUBROUTINE setmatrixrowcolu( globrow, Uj, j )
2576 INTEGER,
INTENT(IN) :: globrow
2577 REAL(dp),
INTENT(IN) :: uj(:)
2578 INTEGER,
INTENT(IN) :: j
2583 INTEGER :: i, globrowoff
2588 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2589 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColU: Bad input globrow ',globrow;
CALL fl(ofu)
2590 CALL assert(.false.,
'blocktri #28')
2592 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2593 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColU: Non-local globrow ',globrow;
CALL fl(ofu)
2594 CALL assert(.false.,
'blocktri #29')
2596 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2597 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2598 CALL assert(.false.,
'blocktri #30')
2601 globrowoff = globrow-startglobrow+1
2607 IF ( globrow .EQ. n )
THEN
2612 lelement(1, globrowoff)%U(i,j) = val
2618 orig(globrowoff)%U(:,j) = lelement(1,globrowoff)%U(:,j)
2623 END SUBROUTINE setmatrixrowcolu
2630 SUBROUTINE setmatrixrhs( globrow, b )
2634 INTEGER,
INTENT(IN) :: globrow
2635 REAL(dp),
INTENT(IN) :: b(:)
2640 INTEGER :: i, globrowoff
2645 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2646 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRHS: Bad input globrow ',globrow;
CALL fl(ofu)
2647 CALL assert(.false.,
'blocktri #31')
2649 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2650 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRHS: Non-local globrow ',globrow;
CALL fl(ofu)
2651 CALL assert(.false.,
'blocktri #32')
2654 globrowoff = globrow-startglobrow+1
2661 lelement(1, globrowoff)%b(i,1) = val
2667 orig(globrowoff)%b = lelement(1,globrowoff)%b
2672 END SUBROUTINE setmatrixrhs
2679 SUBROUTINE getmatrixrowcoll( globrow, Lj, j )
2683 INTEGER,
INTENT(IN) :: globrow
2684 REAL(dp),
INTENT(OUT) :: lj(:)
2685 INTEGER,
INTENT(IN) :: j
2690 INTEGER :: i, globrowoff
2695 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2696 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad input globrow ',globrow;
CALL fl(ofu)
2697 CALL assert(.false.,
'blocktri #33')
2699 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2700 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Non-local globrow ',globrow;
CALL fl(ofu)
2701 CALL assert(.false.,
'blocktri #34')
2703 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2704 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2705 CALL assert(.false.,
'blocktri #35')
2708 globrowoff = globrow-startglobrow+1
2714 IF ( globrow .EQ. 1 )
THEN
2717 val = lelement(1, globrowoff)%L(i,j)
2722 END SUBROUTINE getmatrixrowcoll
2729 SUBROUTINE getmatrixrowcold( globrow, Dj, j )
2733 INTEGER,
INTENT(IN) :: globrow
2734 REAL(dp),
INTENT(OUT) :: dj(:)
2735 INTEGER,
INTENT(IN) :: j
2740 INTEGER :: i, globrowoff
2745 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2746 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Bad input globrow ',globrow;
CALL fl(ofu)
2747 CALL assert(.false.,
'blocktri #36')
2749 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2750 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Non-local globrow ',globrow;
CALL fl(ofu)
2751 CALL assert(.false.,
'blocktri #37')
2753 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2754 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Bad j column ',j;
CALL fl(ofu)
2755 CALL assert(.false.,
'blocktri #38')
2758 globrowoff = globrow-startglobrow+1
2764 val = lelement(1, globrowoff)%D(i,j)
2768 END SUBROUTINE getmatrixrowcold
2775 SUBROUTINE getmatrixrowcolu( globrow, Uj, j )
2779 INTEGER,
INTENT(IN) :: globrow
2780 REAL(dp),
INTENT(OUT) :: uj(:)
2781 INTEGER,
INTENT(IN) :: j
2786 INTEGER :: i, globrowoff
2791 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2792 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColU: Bad input globrow ',globrow;
CALL fl(ofu)
2793 CALL assert(.false.,
'blocktri #39')
2795 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2796 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColU: Non-local globrow ',globrow;
CALL fl(ofu)
2797 CALL assert(.false.,
'blocktri #40')
2799 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2800 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2801 CALL assert(.false.,
'blocktri #41')
2804 globrowoff = globrow-startglobrow+1
2810 IF ( globrow .EQ. n )
THEN
2813 val = lelement(1, globrowoff)%U(i,j)
2818 END SUBROUTINE getmatrixrowcolu
2825 SUBROUTINE getmatrixrhs( globrow, b )
2829 INTEGER,
INTENT(IN) :: globrow
2830 REAL(dp),
INTENT(OUT) :: b(:)
2835 INTEGER :: i, globrowoff
2840 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2841 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRHS: Bad input globrow ',globrow;
CALL fl(ofu)
2842 CALL assert(.false.,
'blocktri #42')
2844 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2845 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRHS: Non-local globrow ',globrow;
CALL fl(ofu)
2846 CALL assert(.false.,
'blocktri #43')
2849 globrowoff = globrow-startglobrow+1
2855 val = lelement(1, globrowoff)%b(i,1)
2859 END SUBROUTINE getmatrixrhs
2866 SUBROUTINE getsolutionvector( globrow, x )
2870 INTEGER,
INTENT(IN) :: globrow
2871 REAL(dp),
INTENT(OUT) :: x(:)
2876 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2877 IF(kpdbg)
WRITE(ofu,*)
'SetSolutionVector: Bad input globrow ',globrow;
CALL fl(ofu)
2878 CALL assert(.false.,
'blocktri #44')
2880 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2881 IF(kpdbg)
WRITE(ofu,*)
'SetSolutionVector: Non-local globrow ',globrow;
CALL fl(ofu)
2882 CALL assert(.false.,
'blocktri #45')
2888 x = selement(globrow)%x
2890 END SUBROUTINE getsolutionvector
2897 SUBROUTINE setidentitytestcase
2901 INTEGER :: i, j, globrow, globrowoff
2903 IF(kpdbg)
WRITE(ofu,*)
'------ Using Identity Test Case ------';
CALL fl(ofu)
2908 DO globrow = startglobrow, endglobrow, 1
2909 globrowoff = globrow-startglobrow+1
2912 lelement(1, globrowoff)%L(i,j) = zero
2913 IF ( i .EQ. j )
THEN
2914 lelement(1, globrowoff)%D(i,j) = one
2916 lelement(1, globrowoff)%D(i,j) = zero
2918 lelement(1, globrowoff)%U(i,j) = zero
2920 lelement(1, globrowoff)%b(i,1) = one
2925 orig(globrowoff)%L = lelement(1,globrowoff)%L
2926 orig(globrowoff)%D = lelement(1,globrowoff)%D
2927 orig(globrowoff)%U = lelement(1,globrowoff)%U
2928 orig(globrowoff)%b = lelement(1,globrowoff)%b
2935 END SUBROUTINE setidentitytestcase
2943 SUBROUTINE setidentityrhs
2947 INTEGER :: i, j, globrow, globrowoff
2949 IF(kpdbg)
WRITE(ofu,*)
'------ Using Identity Test Case RHS ------';
CALL fl(ofu)
2954 DO globrow = startglobrow, endglobrow, 1
2955 globrowoff = globrow-startglobrow+1
2957 lelement(1, globrowoff)%b(i,1) = one
2962 orig(globrowoff)%b = lelement(1,globrowoff)%b
2968 END SUBROUTINE setidentityrhs
2975 SUBROUTINE setrandomtestcase
2979 REAL(dp) :: randval, rmin, rmax
2981 INTEGER,
ALLOCATABLE :: seeds(:)
2982 INTEGER :: i, j, globrow, globrowoff
2984 IF(kpdbg)
WRITE(ofu,*)
'------ Using Random Test Case ------';
CALL fl(ofu)
2989 CALL random_seed( size=rngwidth)
2990 ALLOCATE( seeds(rngwidth) )
2992 seeds(i) = i*(rank+100)*p
2994 CALL random_seed( put=seeds(1:rngwidth) )
3000 IF( writeproblemfile )
THEN
3001 IF(kpdbg)
WRITE(pfu,*) n*m;
CALL fl(pfu)
3002 IF(kpdbg)
WRITE(pfu,*) (3*n-2)*m*m;
CALL fl(pfu)
3010 DO globrow = startglobrow, endglobrow, 1
3011 globrowoff = globrow-startglobrow+1
3014 IF ( globrow .EQ. 1 )
THEN
3017 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3018 IF( writeproblemfile )
THEN
3019 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow-2)*m+j, randval
3022 lelement(1, globrowoff)%L(i,j) = randval
3024 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3025 lelement(1, globrowoff)%D(i,j) = randval
3026 IF( writeproblemfile )
THEN
3027 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow-1)*m+j, randval
3030 IF ( globrow .EQ. n )
THEN
3033 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3034 IF( writeproblemfile )
THEN
3035 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow)*m+j, randval
3038 lelement(1, globrowoff)%U(i,j) = randval
3040 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3041 lelement(1, globrowoff)%b(i,1) = randval
3047 orig(globrowoff)%L = lelement(1,globrowoff)%L
3048 orig(globrowoff)%D = lelement(1,globrowoff)%D
3049 orig(globrowoff)%U = lelement(1,globrowoff)%U
3050 orig(globrowoff)%b = lelement(1,globrowoff)%b
3053 IF (writeproblemfile)
THEN
3054 DO globrow = startglobrow, endglobrow, 1
3055 globrowoff = globrow-startglobrow+1
3057 IF(kpdbg)
WRITE(pfu,*) lelement(1,globrowoff)%b(i,1);
CALL fl(pfu)
3066 END SUBROUTINE setrandomtestcase
3074 SUBROUTINE setrandomrhs( randseedoff )
3078 INTEGER,
INTENT(IN) :: randseedoff
3082 REAL(dp) :: randval, rmin, rmax
3084 INTEGER,
ALLOCATABLE :: seeds(:)
3085 INTEGER :: i, j, globrow, globrowoff
3087 IF(kpdbg)
WRITE(ofu,*)
'------ Using Random Test Case RHS ------';
CALL fl(ofu)
3092 CALL random_seed( size=rngwidth)
3093 ALLOCATE( seeds(rngwidth) )
3095 seeds(i) = i*(rank+100+34+randseedoff)*p
3097 CALL random_seed( put=seeds(1:rngwidth) )
3105 DO globrow = startglobrow, endglobrow, 1
3106 globrowoff = globrow-startglobrow+1
3108 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3109 lelement(1, globrowoff)%b(i,1) = randval
3115 orig(globrowoff)%b = lelement(1,globrowoff)%b
3121 END SUBROUTINE setrandomrhs
3128 SUBROUTINE finalize( do_mpifinalize )
3132 LOGICAL,
INTENT(IN) :: do_mpifinalize
3139 INTEGER :: tempinvcount, tempmulcount, tempsolcount
3142 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3143 IF(kpdbg)
WRITE(ofu,*)
'------ Finalizing start ------';
CALL fl(ofu)
3151 IF (
ALLOCATED(lelement) )
THEN
3152 DO level = lbound(lelement,1), ubound(lelement,1)
3153 DO globrow = lbound(lelement,2), ubound(lelement,2)
3154 IF (
ALLOCATED(lelement(level,globrow)%L) )
THEN
3155 DEALLOCATE( lelement(level, globrow)%L )
3157 IF (
ALLOCATED(lelement(level,globrow)%D) )
THEN
3158 DEALLOCATE( lelement(level, globrow)%D )
3160 IF (
ALLOCATED(lelement(level,globrow)%U) )
THEN
3161 DEALLOCATE( lelement(level, globrow)%U )
3163 IF (
ALLOCATED(lelement(level,globrow)%b) )
THEN
3164 DEALLOCATE( lelement(level, globrow)%b )
3166 IF (
ALLOCATED(lelement(level,globrow)%pivot) )
THEN
3167 DEALLOCATE( lelement(level, globrow)%pivot )
3171 DEALLOCATE(lelement)
3173 IF (
ALLOCATED(orig) )
THEN
3174 DO globrow = lbound(orig,1), ubound(orig,1)
3175 IF (
ALLOCATED(orig(globrow)%L) )
THEN
3176 DEALLOCATE( orig(globrow)%L )
3178 IF (
ALLOCATED(orig(globrow)%D) )
THEN
3179 DEALLOCATE( orig(globrow)%D )
3181 IF (
ALLOCATED(orig(globrow)%U) )
THEN
3182 DEALLOCATE( orig(globrow)%U )
3184 IF (
ALLOCATED(orig(globrow)%b) )
THEN
3185 DEALLOCATE( orig(globrow)%b )
3187 IF (
ALLOCATED(orig(globrow)%pivot) )
THEN
3188 DEALLOCATE( orig(globrow)%pivot )
3194 IF (
ALLOCATED(selement) )
THEN
3195 DO globrow = 1, n, 1
3196 IF (
ALLOCATED(selement(globrow)%x) )
THEN
3197 DEALLOCATE( selement(globrow)%x )
3200 DEALLOCATE(selement)
3204 #if defined(MPI_OPT)
3205 IF ( usebarriers )
THEN
3206 IF(kpdbg)
WRITE(ofu,*)
'Barrier in finalize';
CALL fl(ofu)
3207 CALL mpi_barrier( siesta_comm, mpierr )
3208 IF(kpdbg)
WRITE(ofu,*)
'Done barrier in finalize';
CALL fl(ofu)
3211 IF ( do_mpifinalize )
THEN
3212 CALL mpi_finalize( mpierr )
3213 use_mpiwtime = .false.
3218 tempmulcount=totmatmulcount;
IF ( tempmulcount .LE. 0 ) tempmulcount = 1
3219 tempinvcount=totinvcount;
IF ( tempinvcount .LE. 0 ) tempinvcount = 1
3220 tempsolcount=totmatsolcount;
IF ( tempsolcount .LE. 0 ) tempsolcount = 1
3221 IF(kpdbg)
WRITE(ofu,*)
'N=', n,
' M=', m,
' P=', p,
' rank=', rank
3222 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory ', membytes/1e6,
' MB'
3223 IF(kpdbg)
WRITE(ofu,
'(A,F8.4,A)')
'Computation ', tottime-totcommtime,
' sec'
3224 IF(kpdbg)
WRITE(ofu,
'(A,F8.4,A)')
'Communication ', totcommtime,
' sec'
3225 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix inv ',totinvcount,
' * ', &
3226 totinvtime/tempinvcount,
' sec = ', totinvtime,
' sec'
3227 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix mul ',totmatmulcount,
' * ', &
3228 totmatmultime/tempmulcount,
' sec = ', totmatmultime,
' sec'
3229 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix sol ',totmatsolcount,
' * ', &
3230 totmatsoltime/tempsolcount,
' sec = ', totmatsoltime,
' sec'
3231 IF(kpdbg)
WRITE(ofu,*)
'Finalized rank ', rank
3232 IF(kpdbg)
WRITE(ofu,*)
'------ Finalization end ------';
CALL fl(ofu)
3233 END SUBROUTINE finalize
3240 LOGICAL FUNCTION iseven( num )
3241 INTEGER,
INTENT(IN) :: num
3242 iseven = mod(num,2) .EQ. 0
3250 LOGICAL FUNCTION isodd( num )
3251 INTEGER,
INTENT(IN) :: num
3252 isodd = (.NOT. iseven(num))
3260 FUNCTION lr2gr( locrow, level )
result ( globrow )
3264 INTEGER,
INTENT(IN) :: locrow
3265 INTEGER,
INTENT(IN) :: level
3277 levelloop:
DO i = level-1, 1, -1
3278 globrow = 2*globrow - 1
3284 IF ( ((2.0 ** (level-1)) * (locrow-1) + 1) .NE. globrow )
THEN
3285 CALL assert(.false.,
'blocktri #46')
3298 FUNCTION gr2lr( globrow, level )
result ( locrow )
3302 INTEGER,
INTENT(IN) :: globrow
3303 INTEGER,
INTENT(IN) :: level
3316 levelloop:
DO i = 1, level-1, 1
3317 IF ( iseven(locrow) )
THEN
3321 locrow = (locrow+1) / 2
3332 FUNCTION gr2rank( globrow )
result ( outrank )
3336 INTEGER,
INTENT(IN) :: globrow
3345 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
3350 IF ( globrow .LE. (m+1)*spill )
THEN
3351 outrank = (globrow-1)/(m+1)
3353 outrank = (globrow-1 - (m+1)*spill)/m + spill
3358 END FUNCTION gr2rank
3365 FUNCTION lr2rank( locrow, level )
result ( outrank )
3369 INTEGER,
INTENT(IN) :: locrow
3370 INTEGER,
INTENT(IN) :: level
3378 globrow = lr2gr( locrow, level )
3379 outrank = gr2rank( globrow )
3382 END FUNCTION lr2rank
3391 #if defined(MPI_OPT)
3392 SUBROUTINE computeforwardoddrowhats(locrow, level, startlocrow, endlocrow,bonly)
3396 INTEGER,
INTENT(IN) :: locrow
3397 INTEGER,
INTENT(IN) :: level
3398 INTEGER,
INTENT(IN) :: startlocrow
3399 INTEGER,
INTENT(IN) :: endlocrow
3400 LOGICAL,
INTENT(IN) :: bonly
3405 INTEGER :: globrowoff
3406 INTEGER :: prevglobrowoff
3407 INTEGER :: nextglobrowoff
3412 IF ( level .GE. nlevels )
THEN
3413 IF(kpdbg)
WRITE(ofu,*)
'mat-mul last lvl: no op ';
CALL fl(ofu)
3414 IF ( rank .NE. 0 )
THEN
3415 IF(kpdbg)
WRITE(ofu,*)
'mat-mul last lvl: impossible ',rank;
CALL fl(ofu)
3416 CALL assert(.false.,
'blocktri #47')
3424 IF ( iseven( locrow ) )
THEN
3425 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: odd only ',globrow,
' ',locrow;
CALL fl(ofu);
CALL assert(.false.,
'blocktri #48')
3427 IF ( .NOT. ( (startlocrow .LE. locrow) .AND. (locrow .LE. endlocrow) ) )
THEN
3428 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: locrow prob ',globrow,
' ',locrow;
CALL fl(ofu);
CALL assert(.false.,
'blocktri #49')
3432 globrow = lr2gr( locrow, level )
3433 globrowoff = globrow - startglobrow + 1
3434 IF(kpdbg)
WRITE(ofu,*)
' Compute mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
3439 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D ) )
THEN
3440 IF(kpdbg)
WRITE(ofu,*)
' Copying bad D';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #50')
3442 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L ) )
THEN
3443 IF(kpdbg)
WRITE(ofu,*)
' Copying bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #51')
3445 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U ) )
THEN
3446 IF(kpdbg)
WRITE(ofu,*)
' Copying bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #52')
3448 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b ) )
THEN
3449 IF(kpdbg)
WRITE(ofu,*)
' Copying bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #53')
3455 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%D ) )
THEN
3456 IF(kpdbg)
WRITE(ofu,*)
' Copying bad D +1';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #54')
3458 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%L ) )
THEN
3459 IF(kpdbg)
WRITE(ofu,*)
' Copying bad L +1';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #55')
3461 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%U ) )
THEN
3462 IF(kpdbg)
WRITE(ofu,*)
' Copying bad U +1';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #56')
3464 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%b ) )
THEN
3465 IF(kpdbg)
WRITE(ofu,*)
' Copying bad b +1';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #57')
3471 IF ( .NOT. bonly )
THEN
3472 lelement(level+1,globrowoff)%D = lelement(level,globrowoff)%D
3474 lelement(level+1,globrowoff)%b = lelement(level,globrowoff)%b
3479 IF ( lr2rank(locrow-1,level) .LT. 0 )
THEN
3481 IF ( .NOT. bonly )
THEN
3482 lelement(level+1,globrowoff)%U = 0
3483 IF(kpdbg)
WRITE(ofu,*)
' ZERO L';
CALL fl(ofu)
3486 IF ( locrow .EQ. startlocrow )
THEN
3488 ELSE IF ( locrow .GT. startlocrow )
THEN
3489 prevglobrowoff = lr2gr( locrow-1, level ) - startglobrow + 1
3491 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: impossible prev';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #58')
3494 IF ( .NOT. bonly )
THEN
3498 CALL bsystemclock(mattimer1)
3499 CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3500 lelement(level,prevglobrowoff)%U, &
3501 one, lelement(level+1,globrowoff)%D )
3502 CALL bsystemclock(mattimer2)
3503 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3504 IF(kpdbg)
WRITE(ofu,*)
' Dtop';
CALL fl(ofu)
3509 CALL bsystemclock(mattimer1)
3510 CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3511 lelement(level,prevglobrowoff)%L, &
3512 zero, lelement(level+1,globrowoff)%L )
3513 CALL bsystemclock(mattimer2)
3514 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3515 IF(kpdbg)
WRITE(ofu,*)
' L';
CALL fl(ofu)
3521 CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
3522 lelement(level,prevglobrowoff)%b(:,1), &
3523 one, lelement(level+1,globrowoff)%b(:,1) )
3524 IF(kpdbg)
WRITE(ofu,*)
' btop';
CALL fl(ofu)
3530 IF ( lr2rank(locrow+1,level) .LT. 0 )
THEN
3532 IF ( .NOT. bonly )
THEN
3533 lelement(level+1,globrowoff)%U = 0
3534 IF(kpdbg)
WRITE(ofu,*)
' ZERO U';
CALL fl(ofu)
3537 IF ( locrow .EQ. endlocrow )
THEN
3538 nextglobrowoff = endglobrow - startglobrow + 2
3539 ELSE IF ( locrow .LT. endlocrow )
THEN
3540 nextglobrowoff = lr2gr( locrow+1, level ) - startglobrow + 1
3542 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: impossible next';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #59')
3545 IF ( .NOT. bonly )
THEN
3549 CALL bsystemclock(mattimer1)
3550 CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3551 lelement(level,nextglobrowoff)%L, &
3552 one, lelement(level+1,globrowoff)%D )
3553 CALL bsystemclock(mattimer2)
3554 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3555 IF(kpdbg)
WRITE(ofu,*)
' Dbot';
CALL fl(ofu)
3560 CALL bsystemclock(mattimer1)
3561 CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3562 lelement(level,nextglobrowoff)%U, &
3563 zero, lelement(level+1,globrowoff)%U )
3564 CALL bsystemclock(mattimer2)
3565 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3566 IF(kpdbg)
WRITE(ofu,*)
' U';
CALL fl(ofu)
3572 CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
3573 lelement(level,nextglobrowoff)%b(:,1), &
3574 one, lelement(level+1,globrowoff)%b(:,1) )
3575 IF(kpdbg)
WRITE(ofu,*)
' bbot';
CALL fl(ofu)
3577 END SUBROUTINE computeforwardoddrowhats
3584 SUBROUTINE forwardsolve
3591 INTEGER :: globrowoff
3592 INTEGER :: startlocrow
3593 INTEGER :: endlocrow
3594 INTEGER :: rowoffset
3596 INTEGER :: nbrmpireqindx
3597 INTEGER :: nbrmpireqcnt
3598 INTEGER :: nbrmpinirecvs
3599 INTEGER :: nbrmpinisends
3600 INTEGER :: nbrmpireq(6)
3601 INTEGER :: nbrmpierr(6)
3602 INTEGER :: mpiwaiterr
3604 INTEGER :: nbrmpistatuses(mpi_status_size,6)
3605 INTEGER :: nbrmpistatus(mpi_status_size)
3606 INTEGER :: waitmatchindex
3611 CALL bsystemclock(globtimer1)
3614 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3615 IF(kpdbg)
WRITE(ofu,*)
'------ Forward solve start ------';
CALL fl(ofu)
3616 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3619 CALL plbforwardinitialize()
3625 forwardsolveloop:
DO level = 1, nlevels, 1
3627 IF ( usebarriers )
THEN
3628 IF(kpdbg)
WRITE(ofu,*)
'Barrier at forward level ', level;
CALL fl(ofu)
3629 CALL mpi_barrier( siesta_comm, mpierr )
3630 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at forward level ', level;
CALL fl(ofu)
3638 DO globrow = startglobrow, endglobrow, 1
3639 locrow = gr2lr( globrow, level )
3640 IF ( locrow .GT. 0 )
THEN
3641 IF ( locrow .LT. startlocrow )
THEN
3642 startlocrow = locrow
3644 IF ( locrow .GT. endlocrow )
THEN
3653 IF ( startlocrow .GT. endlocrow )
THEN
3654 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at forward level ', level;
CALL fl(ofu)
3655 CALL plbforwardinitializelevel( level, .false. )
3656 CALL plbforwardfinalizelevel( level, .false. )
3657 cycle forwardsolveloop
3659 CALL plbforwardinitializelevel( level, .true. )
3662 IF(kpdbg)
WRITE(ofu,*)
'**** Forward level ', level,
' ****';
CALL fl(ofu)
3667 IF ( isodd(startlocrow) .AND. &
3668 (lr2rank(startlocrow-1,level) .GE. 0) )
THEN
3670 IF ( .NOT.
ALLOCATED( lelement(level, globrowoff)%L ) )
THEN
3671 ALLOCATE( lelement(level, globrowoff)%L(m,m) )
3672 ALLOCATE( lelement(level, globrowoff)%U(m,m) )
3673 ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3674 CALL chargememory( (2*m*m+m)*dpsz )
3676 lelement(level, globrowoff)%L = 0
3677 lelement(level, globrowoff)%U = 0
3678 lelement(level, globrowoff)%b = 0
3680 IF ( isodd(endlocrow) .AND. &
3681 (lr2rank(endlocrow+1,level) .GE. 0) )
THEN
3682 globrowoff = endglobrow-startglobrow+2
3683 IF ( .NOT.
ALLOCATED( lelement(level, globrowoff)%L ) )
THEN
3684 ALLOCATE( lelement(level, globrowoff)%L(m,m) )
3685 ALLOCATE( lelement(level, globrowoff)%U(m,m) )
3686 ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3687 CALL chargememory( (2*m*m+m)*dpsz )
3689 lelement(level, globrowoff)%L = 0
3690 lelement(level, globrowoff)%U = 0
3691 lelement(level, globrowoff)%b = 0
3697 IF ( level .LT. nlevels )
THEN
3698 DO locrow = startlocrow, endlocrow, 1
3699 IF ( isodd(locrow) )
THEN
3700 globrow = lr2gr( locrow, level )
3701 globrowoff = globrow-startglobrow+1
3702 IF ( .NOT.
ALLOCATED( lelement(level+1, globrowoff)%L ) )
THEN
3703 ALLOCATE( lelement(level+1, globrowoff)%L(m,m) )
3704 ALLOCATE( lelement(level+1, globrowoff)%D(m,m) )
3705 ALLOCATE( lelement(level+1, globrowoff)%U(m,m) )
3706 ALLOCATE( lelement(level+1, globrowoff)%b(m,1) )
3707 CALL chargememory( (2*m*m+m)*dpsz )
3709 lelement(level+1, globrowoff)%L = 0
3710 lelement(level+1, globrowoff)%D = 0
3711 lelement(level+1, globrowoff)%U = 0
3712 lelement(level+1, globrowoff)%b = 0
3720 DO nbrmpireqindx = 1, 6
3721 nbrmpireq(nbrmpireqindx) = mpi_request_null
3730 nbrrank = lr2rank(startlocrow-1,level)
3731 IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
3737 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',nbrrank,
' ';
CALL fl(ofu)
3739 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3740 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #60')
3742 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3743 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #61')
3745 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3746 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #62')
3749 CALL bsystemclock(loctimer1)
3750 CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3751 nbrrank, 1, siesta_comm, nbrmpireq(nbrmpireqindx), &
3752 nbrmpierr(nbrmpireqindx) )
3753 nbrmpireqindx = nbrmpireqindx + 1
3754 nbrmpinirecvs = nbrmpinirecvs + 1
3755 IF(kpdbg)
WRITE(ofu,*)
' Irecv 1 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3757 CALL mpi_irecv( lelement(level, globrowoff)%U, m*m, mpi_real8, &
3758 nbrrank, 2, siesta_comm, nbrmpireq(nbrmpireqindx), &
3759 nbrmpierr(nbrmpireqindx))
3760 nbrmpireqindx = nbrmpireqindx + 1
3761 nbrmpinirecvs = nbrmpinirecvs + 1
3762 IF(kpdbg)
WRITE(ofu,*)
' Irecv 2 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3764 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3765 nbrrank, 3, siesta_comm, nbrmpireq(nbrmpireqindx), &
3766 nbrmpierr(nbrmpireqindx))
3767 nbrmpireqindx = nbrmpireqindx + 1
3768 nbrmpinirecvs = nbrmpinirecvs + 1
3769 IF(kpdbg)
WRITE(ofu,*)
' Irecv 3 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3770 CALL bsystemclock(loctimer2)
3771 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3774 nbrrank = lr2rank(endlocrow+1,level)
3775 IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
3780 globrowoff = endglobrow-startglobrow+2
3781 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ', nbrrank;
CALL fl(ofu)
3783 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3784 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #63')
3786 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3787 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #64')
3789 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3790 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #65')
3793 CALL bsystemclock(loctimer1)
3794 CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3795 nbrrank, 4, siesta_comm, nbrmpireq(nbrmpireqindx), &
3796 nbrmpierr(nbrmpireqindx))
3797 nbrmpireqindx = nbrmpireqindx + 1
3798 nbrmpinirecvs = nbrmpinirecvs + 1
3799 IF(kpdbg)
WRITE(ofu,*)
' Irecv 4 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3801 CALL mpi_irecv( lelement(level, globrowoff)%U, m*m, mpi_real8, &
3802 nbrrank, 5, siesta_comm, nbrmpireq(nbrmpireqindx), &
3803 nbrmpierr(nbrmpireqindx))
3804 nbrmpireqindx = nbrmpireqindx + 1
3805 nbrmpinirecvs = nbrmpinirecvs + 1
3806 IF(kpdbg)
WRITE(ofu,*)
' Irecv 5 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3808 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3809 nbrrank, 6, siesta_comm, nbrmpireq(nbrmpireqindx), &
3810 nbrmpierr(nbrmpireqindx))
3811 nbrmpireqindx = nbrmpireqindx + 1
3812 nbrmpinirecvs = nbrmpinirecvs + 1
3813 IF(kpdbg)
WRITE(ofu,*)
' Irecv 6 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3814 CALL bsystemclock(loctimer2)
3815 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3822 DO locrow = startlocrow, endlocrow, 1
3823 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
3824 globrow = lr2gr( locrow, level )
3825 globrowoff = globrow-startglobrow+1
3827 IF(kpdbg)
WRITE(ofu,*)
' Compute even hats ',globrow,
' ', locrow;
CALL fl(ofu)
3832 IF ( level .EQ. nlevels )
THEN
3833 IF ( (rank .NE. 0) .OR. &
3834 (startlocrow .NE. endlocrow) .OR. &
3835 (locrow .NE. 1) .OR. (globrow .NE. 1) )
THEN
3836 IF(kpdbg)
WRITE(ofu,*)
' EVEN ERROR ',globrow,
' ', locrow;
CALL fl(ofu)
3837 CALL assert(.false.,
'blocktri #66')
3840 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D) )
THEN
3841 IF(kpdbg)
WRITE(ofu,*)
'Chats bad D';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #67')
3843 IF ( .NOT.
ALLOCATED(lelement(1,globrowoff)%pivot) )
THEN
3844 IF(kpdbg)
WRITE(ofu,*)
'Chats bad pivot';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #68')
3846 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3847 IF(kpdbg)
WRITE(ofu,*)
'Chats bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #69')
3849 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3850 IF(kpdbg)
WRITE(ofu,*)
'Chats bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #70')
3852 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3853 IF(kpdbg)
WRITE(ofu,*)
'Chats bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #71')
3859 CALL bsystemclock(mattimer1)
3860 CALL plbdgetrf( lelement(level,globrowoff)%D, &
3861 lelement(1,globrowoff)%pivot, blaserr )
3862 CALL bsystemclock(mattimer2)
3863 CALL chargetime( totinvtime, mattimer2, mattimer1, totinvcount )
3864 IF (blaserr .NE. 0)
THEN
3865 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRF failed ', blaserr;
CALL fl(ofu);
CALL assert(.false.,
'blocktri #72')
3871 IF ( level .LT. nlevels )
THEN
3872 CALL bsystemclock(mattimer1)
3873 CALL plbdgetrs( m, lelement(level,globrowoff)%D, &
3874 lelement(1,globrowoff)%pivot, &
3875 lelement(level,globrowoff)%L, blaserr )
3876 CALL bsystemclock(mattimer2)
3877 CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3878 IF (blaserr .NE. 0)
THEN
3879 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRS L failed';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #73')
3881 CALL bsystemclock(mattimer1)
3882 CALL plbdgetrs( m, lelement(level,globrowoff)%D, &
3883 lelement(1,globrowoff)%pivot, &
3884 lelement(level,globrowoff)%U, blaserr )
3885 CALL bsystemclock(mattimer2)
3886 CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3887 IF (blaserr .NE. 0)
THEN
3888 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRS U failed';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #74')
3895 CALL plbdgetrs( 1, lelement(level,globrowoff)%D, &
3896 lelement(1,globrowoff)%pivot, &
3897 lelement(level,globrowoff)%b, blaserr )
3898 IF (blaserr .NE. 0)
THEN
3899 IF(kpdbg)
WRITE(ofu,*)
'PLBDGETRS b failed';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #75')
3906 DO rowoffset = -1, 1, 2
3907 nbrrank = lr2rank(locrow+rowoffset, level)
3908 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
3909 CALL bsystemclock(loctimer1)
3910 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3911 IF(kpdbg)
WRITE(ofu,*)
'ISend bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #76')
3913 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3914 IF(kpdbg)
WRITE(ofu,*)
'ISend bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #77')
3916 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3917 IF(kpdbg)
WRITE(ofu,*)
'ISend bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #78')
3920 IF(kpdbg)
WRITE(ofu,*)
' ISend ',globrow,
' ',locrow,
' ',nbrrank;
CALL fl(ofu)
3921 globrowoff = globrow-startglobrow+1
3922 stag = (((1-rowoffset))/2)*3
3923 CALL mpi_isend( lelement(level,globrowoff)%L, m*m, mpi_real8, &
3924 nbrrank, stag+1, siesta_comm, &
3925 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3926 nbrmpireqindx = nbrmpireqindx + 1
3927 nbrmpinisends = nbrmpinisends + 1
3928 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+1,
' ',globrowoff;
CALL fl(ofu)
3930 CALL mpi_isend( lelement(level,globrowoff)%U, m*m, mpi_real8, &
3931 nbrrank, stag+2, siesta_comm, &
3932 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3933 nbrmpireqindx = nbrmpireqindx + 1
3934 nbrmpinisends = nbrmpinisends + 1
3935 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+2,
' ',globrowoff;
CALL fl(ofu)
3937 CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
3938 nbrrank, stag+3, siesta_comm, &
3939 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3940 nbrmpireqindx = nbrmpireqindx + 1
3941 nbrmpinisends = nbrmpinisends + 1
3942 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+3,
' ',globrowoff;
CALL fl(ofu)
3943 CALL bsystemclock(loctimer2)
3944 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3953 DO locrow = startlocrow, endlocrow, 1
3954 globrow = lr2gr( locrow, level )
3955 IF ( isodd( locrow ) .AND. (level .NE. nlevels) )
THEN
3956 IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) )
THEN
3957 IF(kpdbg)
WRITE(ofu,*)
' Precomp mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
3958 CALL computeforwardoddrowhats( locrow, level, &
3959 startlocrow, endlocrow, .false. )
3969 nbrmpireqcnt = nbrmpireqindx - 1
3970 IF ( nbrmpireqcnt .GT. 0 )
THEN
3971 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
3972 CALL bsystemclock(loctimer1)
3973 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
3974 CALL bsystemclock(loctimer2)
3975 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3981 IF ( isodd(startlocrow) )
THEN
3982 globrow = lr2gr( startlocrow, level )
3983 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',startlocrow;
CALL fl(ofu)
3984 CALL computeforwardoddrowhats( startlocrow, level, &
3985 startlocrow, endlocrow, .false. )
3987 IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) )
THEN
3988 globrow = lr2gr( endlocrow, level )
3989 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',endlocrow;
CALL fl(ofu)
3990 CALL computeforwardoddrowhats( endlocrow, level, &
3991 startlocrow, endlocrow, .false. )
3993 CALL plbforwardfinalizelevel( level, .true. )
3994 END DO forwardsolveloop
3997 CALL plbforwardfinalize()
4000 matdirtied = .false.
4001 rhsdirtied = .false.
4004 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
4005 IF(kpdbg)
WRITE(ofu,*)
'------ Forward solve end ------';
CALL fl(ofu)
4007 CALL bsystemclock(globtimer2)
4008 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4010 END SUBROUTINE forwardsolve
4018 SUBROUTINE forwardupdateb
4025 INTEGER :: globrowoff
4026 INTEGER :: startlocrow
4027 INTEGER :: endlocrow
4028 INTEGER :: rowoffset
4030 INTEGER :: nbrmpireqindx
4031 INTEGER :: nbrmpireqcnt
4032 INTEGER :: nbrmpinirecvs
4033 INTEGER :: nbrmpinisends
4034 INTEGER :: nbrmpireq(6)
4035 INTEGER :: nbrmpierr(6)
4036 INTEGER :: mpiwaiterr
4038 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
4039 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4040 INTEGER :: waitmatchindex
4046 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4047 IF(kpdbg)
WRITE(ofu,*)
'------ Forward updateb start ------';
CALL fl(ofu)
4048 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4051 IF ( matdirtied )
THEN
4052 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4053 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied matrix ------';
CALL fl(ofu)
4054 IF(kpdbg)
WRITE(ofu,*)
' ------ Forward solve, not updateb... ------';
CALL fl(ofu)
4059 CALL bsystemclock(globtimer1)
4062 CALL plbforwardinitialize()
4068 forwardsolveloop:
DO level = 1, nlevels, 1
4070 IF ( usebarriers )
THEN
4071 IF(kpdbg)
WRITE(ofu,*)
'Barrier at forward updateb level ', level;
CALL fl(ofu)
4072 CALL mpi_barrier( siesta_comm, mpierr )
4073 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at forward updateb level ', level;
CALL fl(ofu)
4081 DO globrow = startglobrow, endglobrow, 1
4082 locrow = gr2lr( globrow, level )
4083 IF ( locrow .GT. 0 )
THEN
4084 IF ( locrow .LT. startlocrow )
THEN
4085 startlocrow = locrow
4087 IF ( locrow .GT. endlocrow )
THEN
4096 IF ( startlocrow .GT. endlocrow )
THEN
4097 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at forward updateb level ',level;
CALL fl(ofu)
4098 CALL plbforwardinitializelevel( level, .false. )
4099 CALL plbforwardfinalizelevel( level, .false. )
4100 cycle forwardsolveloop
4102 CALL plbforwardinitializelevel( level, .true. )
4105 IF(kpdbg)
WRITE(ofu,*)
'**** Forward updateb level ', level,
' ****';
CALL fl(ofu)
4111 IF ( isodd(startlocrow) .AND. &
4112 (lr2rank(startlocrow-1,level) .GE. 0) )
THEN
4114 IF( .NOT. (
ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
4115 ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
4116 ALLOCATED( lelement(level, globrowoff)%b ) ) )
THEN
4117 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +0 memory error';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #79')
4119 lelement(level, globrowoff)%b = 0
4121 IF ( isodd(endlocrow) .AND. &
4122 (lr2rank(endlocrow+1,level) .GE. 0) )
THEN
4123 globrowoff = endglobrow-startglobrow+2
4124 IF ( .NOT. (
ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
4125 ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
4126 ALLOCATED( lelement(level, globrowoff)%b ) ) )
THEN
4127 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +2 memory error';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #80')
4129 lelement(level, globrowoff)%b = 0
4136 IF ( level .LT. nlevels )
THEN
4137 DO locrow = startlocrow, endlocrow, 1
4138 IF ( isodd(locrow) )
THEN
4139 globrow = lr2gr( locrow, level )
4140 globrowoff = globrow-startglobrow+1
4141 IF ( .NOT. (
ALLOCATED( lelement(level+1, globrowoff)%L ) .AND. &
4142 ALLOCATED( lelement(level+1, globrowoff)%D ) .AND. &
4143 ALLOCATED( lelement(level+1, globrowoff)%U ) .AND. &
4144 ALLOCATED( lelement(level+1, globrowoff)%b ) ) )
THEN
4145 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +1 memory error';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #81')
4147 lelement(level+1, globrowoff)%b = 0
4155 DO nbrmpireqindx = 1, 6
4156 nbrmpireq(nbrmpireqindx) = mpi_request_null
4165 nbrrank = lr2rank(startlocrow-1,level)
4166 IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
4172 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',nbrrank,
' ';
CALL fl(ofu)
4174 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
4175 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #82')
4177 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
4178 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #83')
4180 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
4181 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #84')
4184 CALL bsystemclock(loctimer1)
4185 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
4186 nbrrank, 3, siesta_comm, nbrmpireq(nbrmpireqindx), &
4187 nbrmpierr(nbrmpireqindx))
4188 nbrmpireqindx = nbrmpireqindx + 1
4189 nbrmpinirecvs = nbrmpinirecvs + 1
4190 IF(kpdbg)
WRITE(ofu,*)
' Irecv 3 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
4191 CALL bsystemclock(loctimer2)
4192 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4195 nbrrank = lr2rank(endlocrow+1,level)
4196 IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
4201 globrowoff = endglobrow-startglobrow+2
4202 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ', nbrrank;
CALL fl(ofu)
4204 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
4205 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #85')
4207 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
4208 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #86')
4210 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
4211 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #87')
4214 CALL bsystemclock(loctimer1)
4215 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
4216 nbrrank, 6, siesta_comm, nbrmpireq(nbrmpireqindx), &
4217 nbrmpierr(nbrmpireqindx))
4218 nbrmpireqindx = nbrmpireqindx + 1
4219 nbrmpinirecvs = nbrmpinirecvs + 1
4220 IF(kpdbg)
WRITE(ofu,*)
' Irecv 6 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
4221 CALL bsystemclock(loctimer2)
4222 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4229 DO locrow = startlocrow, endlocrow, 1
4230 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
4231 globrow = lr2gr( locrow, level )
4232 globrowoff = globrow-startglobrow+1
4234 IF(kpdbg)
WRITE(ofu,*)
' Computed even hats ',globrow,
' ', locrow;
CALL fl(ofu)
4239 IF ( level .EQ. nlevels )
THEN
4240 IF ( (rank .NE. 0) .OR. &
4241 (startlocrow .NE. endlocrow) .OR. &
4242 (locrow .NE. 1) .OR. (globrow .NE. 1) )
THEN
4243 IF(kpdbg)
WRITE(ofu,*)
' EVEN ERROR ',globrow,
' ', locrow;
CALL fl(ofu)
4244 CALL assert(.false.,
'blocktri #88')
4247 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D) )
THEN
4248 IF(kpdbg)
WRITE(ofu,*)
'Chats bad D';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #89')
4250 IF ( .NOT.
ALLOCATED(lelement(1,globrowoff)%pivot) )
THEN
4251 IF(kpdbg)
WRITE(ofu,*)
'Chats bad pivot';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #90')
4253 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
4254 IF(kpdbg)
WRITE(ofu,*)
'Chats bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #91')
4256 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
4257 IF(kpdbg)
WRITE(ofu,*)
'Chats bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #92')
4259 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
4260 IF(kpdbg)
WRITE(ofu,*)
'Chats bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #93')
4270 CALL plbdgetrs( 1, lelement(level,globrowoff)%D, &
4271 lelement(1,globrowoff)%pivot, &
4272 lelement(level,globrowoff)%b, blaserr )
4273 IF (blaserr .NE. 0)
THEN
4274 IF(kpdbg)
WRITE(ofu,*)
'PLBDGETRS b failed';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #94')
4281 DO rowoffset = -1, 1, 2
4282 nbrrank = lr2rank(locrow+rowoffset, level)
4283 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
4284 CALL bsystemclock(loctimer1)
4285 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
4286 IF(kpdbg)
WRITE(ofu,*)
'ISend bad L';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #95')
4288 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
4289 IF(kpdbg)
WRITE(ofu,*)
'ISend bad U';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #96')
4291 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
4292 IF(kpdbg)
WRITE(ofu,*)
'ISend bad b';
CALL fl(ofu);
CALL assert(.false.,
'blocktri #97')
4295 IF(kpdbg)
WRITE(ofu,*)
' ISend ',globrow,
' ',locrow,
' ',nbrrank;
CALL fl(ofu)
4296 globrowoff = globrow-startglobrow+1
4297 stag = (((1-rowoffset))/2)*3
4298 CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
4299 nbrrank, stag+3, siesta_comm, &
4300 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
4301 nbrmpireqindx = nbrmpireqindx + 1
4302 nbrmpinisends = nbrmpinisends + 1
4303 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+3,
' ',globrowoff;
CALL fl(ofu)
4304 CALL bsystemclock(loctimer2)
4305 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4314 DO locrow = startlocrow, endlocrow, 1
4315 globrow = lr2gr( locrow, level )
4316 IF ( isodd( locrow ) .AND. (level .NE. nlevels) )
THEN
4317 IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) )
THEN
4318 IF(kpdbg)
WRITE(ofu,*)
' Precomp mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
4319 CALL computeforwardoddrowhats( locrow, level, &
4320 startlocrow, endlocrow, .true. )
4330 nbrmpireqcnt = nbrmpireqindx - 1
4331 IF ( nbrmpireqcnt .GT. 0 )
THEN
4332 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
4333 CALL bsystemclock(loctimer1)
4334 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4335 CALL bsystemclock(loctimer2)
4336 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4342 IF ( isodd(startlocrow) )
THEN
4343 globrow = lr2gr( startlocrow, level )
4344 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',startlocrow;
CALL fl(ofu)
4345 CALL computeforwardoddrowhats( startlocrow, level, &
4346 startlocrow, endlocrow, .true. )
4348 IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) )
THEN
4349 globrow = lr2gr( endlocrow, level )
4350 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',endlocrow;
CALL fl(ofu)
4351 CALL computeforwardoddrowhats( endlocrow, level, &
4352 startlocrow, endlocrow, .true. )
4354 CALL plbforwardfinalizelevel( level, .true. )
4355 END DO forwardsolveloop
4358 CALL plbforwardfinalize()
4361 rhsdirtied = .false.
4364 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
4365 IF(kpdbg)
WRITE(ofu,*)
'------ updateb solve end ------';
CALL fl(ofu)
4367 CALL bsystemclock(globtimer2)
4368 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4370 END SUBROUTINE forwardupdateb
4377 SUBROUTINE backwardsolve
4384 INTEGER :: globrowoff
4385 INTEGER :: prevglobrow
4386 INTEGER :: nextglobrow
4387 INTEGER :: startlocrow
4388 INTEGER :: endlocrow
4389 INTEGER :: rowoffset
4391 INTEGER :: nbrmpireqindx
4392 INTEGER :: nbrmpireqcnt
4393 INTEGER :: nbrmpinirecvs
4394 INTEGER :: nbrmpinisends
4395 INTEGER :: nbrmpireq(2)
4396 INTEGER :: nbrmpierr(2)
4397 INTEGER :: mpiwaiterr
4399 INTEGER :: nbrmpistatuses(mpi_status_size,2)
4400 INTEGER :: nbrmpistatus(mpi_status_size)
4401 INTEGER :: waitmatchindex
4406 CALL bsystemclock(globtimer1)
4409 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4410 IF(kpdbg)
WRITE(ofu,*)
'------ Backward solve start ------';
CALL fl(ofu)
4413 IF ( matdirtied )
THEN
4414 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4415 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied matrix; updating... ------';
CALL fl(ofu)
4418 IF ( rhsdirtied )
THEN
4419 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4420 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied RHS; updating... ------';
CALL fl(ofu)
4421 CALL forwardupdateb()
4425 CALL plbbackwardinitialize()
4430 IF ( (.NOT.
ALLOCATED(lelement)) .OR. (.NOT.
ALLOCATED(selement)) )
THEN
4431 IF(kpdbg)
WRITE(ofu,*)
'Forward not called before backward?';
CALL fl(ofu)
4439 backwardsolveloop:
DO level = nlevels, 1, -1
4441 IF ( usebarriers )
THEN
4442 IF(kpdbg)
WRITE(ofu,*)
'Barrier at backward level ', level;
CALL fl(ofu)
4443 CALL mpi_barrier( siesta_comm, mpierr )
4444 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at backward level ', level;
CALL fl(ofu)
4452 DO globrow = startglobrow, endglobrow, 1
4453 locrow = gr2lr( globrow, level )
4454 IF ( locrow .GT. 0 )
THEN
4455 IF ( locrow .LT. startlocrow )
THEN
4456 startlocrow = locrow
4458 IF ( locrow .GT. endlocrow )
THEN
4467 IF ( startlocrow .GT. endlocrow )
THEN
4468 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at backward level', level;
CALL fl(ofu)
4469 CALL plbbackwardinitializelevel( level, .false. )
4470 CALL plbbackwardfinalizelevel( level, .false. )
4471 cycle backwardsolveloop
4473 CALL plbbackwardinitializelevel( level, .true. )
4476 IF(kpdbg)
WRITE(ofu,*)
'**** Backward level ', level,
' ****';
CALL fl(ofu)
4481 DO nbrmpireqindx = 1, 2
4482 nbrmpireq(nbrmpireqindx) = mpi_request_null
4491 nbrrank = lr2rank(startlocrow-1,level)
4492 IF( iseven(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
4498 globrow = lr2gr(startlocrow-1,level)
4499 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',globrow,
' ',nbrrank;
CALL fl(ofu)
4500 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4501 ALLOCATE( selement(globrow)%x(m) )
4502 CALL chargememory( m*dpsz )
4504 CALL bsystemclock(loctimer1)
4505 CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4506 siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4507 nbrmpireqindx = nbrmpireqindx + 1
4508 nbrmpinirecvs = nbrmpinirecvs + 1
4509 CALL bsystemclock(loctimer2)
4510 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4512 nbrrank = lr2rank(endlocrow+1,level)
4513 IF ( iseven(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
4519 globrow = lr2gr(endlocrow+1,level)
4520 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ',globrow,
' ',nbrrank;
CALL fl(ofu)
4521 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4522 ALLOCATE( selement(globrow)%x(m) )
4523 CALL chargememory( m*dpsz )
4525 CALL bsystemclock(loctimer1)
4526 CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4527 siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4528 nbrmpireqindx = nbrmpireqindx + 1
4529 nbrmpinirecvs = nbrmpinirecvs + 1
4530 CALL bsystemclock(loctimer2)
4531 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4538 DO locrow = startlocrow, endlocrow, 1
4539 IF ( isodd( locrow ) )
THEN
4544 DO rowoffset = -1, 1, 2
4545 nbrrank = lr2rank(locrow+rowoffset, level)
4546 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
4547 IF(kpdbg)
WRITE(ofu,*)
' Isend ', locrow,
' ', nbrrank;
CALL fl(ofu)
4548 globrow = lr2gr(locrow,level)
4549 IF ( .NOT.
ALLOCATED(selement(globrow)%x) )
THEN
4550 IF(kpdbg)
WRITE(ofu,*)
'ERROR BISEND AT LEVEL', level;
CALL fl(ofu)
4551 IF(kpdbg)
WRITE(ofu,*) locrow,
' ', globrow,
' ', nbrrank;
CALL fl(ofu)
4552 CALL assert(.false.,
'blocktri #98')
4554 stag = (1-rowoffset)/2+1
4555 CALL bsystemclock(loctimer1)
4556 CALL mpi_isend( selement(globrow)%x, m, mpi_real8, &
4557 nbrrank, stag, siesta_comm, nbrmpireq(nbrmpireqindx), &
4558 nbrmpierr(nbrmpireqindx) )
4559 nbrmpireqindx = nbrmpireqindx + 1
4560 nbrmpinisends = nbrmpinisends + 1
4561 CALL bsystemclock(loctimer2)
4562 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4571 nbrmpireqcnt = nbrmpireqindx - 1
4572 IF ( nbrmpireqcnt .GT. 0 )
THEN
4573 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
4574 CALL bsystemclock(loctimer1)
4575 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4576 CALL bsystemclock(loctimer2)
4577 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4583 DO locrow = startlocrow, endlocrow, 1
4584 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
4585 globrow = lr2gr( locrow, level )
4586 globrowoff = globrow - startglobrow + 1
4587 IF ( ( level .EQ. nlevels ) .AND. (locrow .NE. 1) )
THEN
4588 IF(kpdbg)
WRITE(ofu,*)
'ERROR ',level,
' ',globrow,
' ', locrow;
CALL fl(ofu)
4589 CALL assert(.false.,
'blocktri #99')
4592 IF(kpdbg)
WRITE(ofu,*)
' Compute solution ', globrow,
' ', locrow;
CALL fl(ofu)
4594 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4595 ALLOCATE( selement(globrow)%x(m) )
4596 CALL chargememory( m*dpsz )
4602 selement(globrow)%x = lelement(level,globrowoff)%b(:,1)
4607 nbrrank = lr2rank(locrow-1,level)
4608 IF ( nbrrank .GE. 0 )
THEN
4609 prevglobrow = lr2gr(locrow-1,level)
4610 CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
4611 selement(prevglobrow)%x, &
4612 one, selement(globrow)%x )
4618 nbrrank = lr2rank(locrow+1,level)
4619 IF ( nbrrank .GE. 0 )
THEN
4620 nextglobrow = lr2gr(locrow+1,level)
4621 CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
4622 selement(nextglobrow)%x, &
4623 one, selement(globrow)%x )
4630 IF(kpdbg)
WRITE(ofu,*)
' x[',globrow,
']=',selement(globrow)%x;
CALL fl(ofu)
4634 CALL plbbackwardfinalizelevel( level, .false. )
4635 END DO backwardsolveloop
4638 CALL plbbackwardfinalize()
4641 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
4642 IF(kpdbg)
WRITE(ofu,*)
'------ Backward solve end ------';
CALL fl(ofu)
4644 CALL bsystemclock(globtimer2)
4645 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4647 END SUBROUTINE backwardsolve
4654 SUBROUTINE verifysolution
4658 INTEGER :: i, k, globrow, globrowoff
4659 REAL(dp) :: term, totrmserr
4661 INTEGER :: nbrmpireqindx
4662 INTEGER :: nbrmpireqcnt
4663 INTEGER :: nbrmpinirecvs
4664 INTEGER :: nbrmpinisends
4665 INTEGER :: nbrmpireq(2)
4666 INTEGER :: nbrmpierr(2)
4667 INTEGER :: mpiwaiterr
4669 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4670 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4671 INTEGER :: waitmatchindex
4674 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4675 IF(kpdbg)
WRITE(ofu,*)
'------ Verifying solution ------';
CALL fl(ofu)
4683 nbrrank = gr2rank(startglobrow-1)
4684 IF ( isodd(startglobrow) .AND. (nbrrank .GE. 0) )
THEN
4685 IF ( .NOT.
ALLOCATED( selement(startglobrow-1)%x ) )
THEN
4686 ALLOCATE( selement(startglobrow-1)%x(m) )
4688 CALL mpi_irecv( selement(startglobrow-1)%x, m, mpi_real8, nbrrank, 1, &
4689 siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4690 nbrmpireqindx = nbrmpireqindx + 1
4691 nbrmpinirecvs = nbrmpinirecvs + 1
4693 nbrrank = gr2rank(endglobrow+1)
4694 IF ( isodd(endglobrow) .AND. (nbrrank .GE. 0) )
THEN
4695 IF ( .NOT.
ALLOCATED( selement(endglobrow+1)%x ) )
THEN
4696 ALLOCATE( selement(endglobrow+1)%x(m) )
4698 CALL mpi_irecv( selement(endglobrow+1)%x, m, mpi_real8, nbrrank, 2, &
4699 siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4700 nbrmpireqindx = nbrmpireqindx + 1
4701 nbrmpinirecvs = nbrmpinirecvs + 1
4703 nbrrank = gr2rank(startglobrow-1)
4704 IF ( iseven(startglobrow) .AND. (nbrrank .GE. 0) )
THEN
4705 CALL mpi_isend( selement(startglobrow)%x, m, mpi_real8, nbrrank, 2, &
4706 siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4707 nbrmpireqindx = nbrmpireqindx + 1
4708 nbrmpinisends = nbrmpinisends + 1
4710 nbrrank = gr2rank(endglobrow+1)
4711 IF ( iseven(endglobrow) .AND. (nbrrank .GE. 0) )
THEN
4712 CALL mpi_isend( selement(endglobrow)%x, m, mpi_real8, nbrrank, 1, &
4713 siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4714 nbrmpireqindx = nbrmpireqindx + 1
4715 nbrmpinisends = nbrmpinisends + 1
4717 nbrmpireqcnt = nbrmpireqindx - 1
4718 IF ( nbrmpireqcnt .GT. 0 )
THEN
4719 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
4720 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4725 DO globrow = startglobrow, endglobrow, 1
4726 globrowoff = globrow - startglobrow + 1
4729 IF ( globrow .GT. 1 )
THEN
4730 term = term + sum( orig(globrowoff)%L(i,:) * selement(globrow-1)%x )
4733 term = term + sum( orig(globrowoff)%D(i,:) * selement(globrow)%x )
4735 IF (writesolution)
THEN
4738 IF(kpdbg)
WRITE(ofu,*)
'X[',(globrow-1)*m+k,
']=', selement(globrow)%x(k);
CALL fl(ofu)
4743 IF ( globrow .LT. n )
THEN
4744 term = term + sum( orig(globrowoff)%U(i,:) * selement(globrow+1)%x )
4747 totrmserr = totrmserr + (term - orig(globrowoff)%b(i,1))**2
4751 IF ( endglobrow .LT. startglobrow )
THEN
4754 totrmserr = sqrt( totrmserr / ((endglobrow-startglobrow+1) * m) )
4756 IF(kpdbg)
WRITE(ofu,
'(A,E15.8E3)')
'TOTAL RMS ERROR = ', totrmserr;
CALL fl(ofu)
4757 IF(kpdbg)
WRITE(ofu,*)
'------ Solution verified ------';
CALL fl(ofu)
4758 END SUBROUTINE verifysolution
4762 SUBROUTINE checkconditionnumber(nblkrow,bsize,anorm,rcond,info)
4764 INTEGER,
INTENT(IN) :: nblkrow, bsize
4765 INTEGER,
INTENT(OUT) :: info
4766 REAL(dp),
INTENT(OUT) :: rcond, anorm
4769 INTEGER :: iblkrow, n1, i, j, offs, offsu, offk, kl, ku, ldab, icol, &
4771 REAL(dp),
ALLOCATABLE :: Band(:,:), Acol(:), work(:)
4772 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: iwork, ipiv
4776 IF (nranks.NE.1)
THEN
4777 print *,
"Returning from CheckConditionNumber"
4788 ALLOCATE(band(ldab, n1), ipiv(n1), iwork(n1), work(3*n1))
4796 DO iblkrow = 1, nblkrow
4799 rowmin = offs-bsize+1; rowmax = offsu+bsize
4801 acol(rowmin:offs)=orig(iblkrow-1)%U(:,icol)
4802 IF (iblkrow.LT.nblkrow) &
4803 acol(offsu+1:rowmax)=orig(iblkrow+1)%L(:,icol)
4804 acol(offs+1:offsu)=orig(iblkrow)%D(:,icol)
4806 IF (iblkrow.EQ.1)
THEN
4808 ELSE IF (iblkrow.EQ.nblkrow)
THEN
4811 anorm = max(anorm, sum(abs(acol(rowmin:rowmax))))
4813 DO i = max(1,j-ku,rowmin), min(n1,j+kl,rowmax)
4814 band(offk+i-j,j) = acol(i)
4819 offsu = offsu + bsize
4825 CALL dgbtrf( n1, n1, kl, ku, band, ldab, ipiv, info )
4826 CALL dgbcon(
'1', n1, kl, ku, band, ldab, ipiv, anorm, rcond, &
4829 IF (info.eq.0 .and. rcond.ne.zero)
THEN
4835 DEALLOCATE(band,work,iwork,ipiv)
4837 END SUBROUTINE checkconditionnumber
4840 SUBROUTINE checksymmetry (asymIndx)
4842 REAL(dp),
INTENT (OUT) :: asymIndx
4843 REAL(dp) :: partdiff, eij, eji, summ
4844 REAL(dp),
DIMENSION(2) :: temp
4845 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:) :: sendBuf, bottomLeftMatrix
4847 INTEGER :: topNeighbor, bottomNeighbor
4849 INTEGER :: mpistatus(MPI_STATUS_SIZE), mpi_err
4850 INTEGER :: blkrow,blktype,i,j
4852 ALLOCATE(bottomleftmatrix(m,m), sendbuf(m,m), stat=i)
4853 CALL assert(i.EQ.0,
'Allocation error in CheckSymmetry')
4857 IF(nranks.EQ.1)
THEN
4858 topneighbor=mpi_proc_null
4859 bottomneighbor=mpi_proc_null
4862 topneighbor=mpi_proc_null
4863 bottomneighbor=rank+1
4864 ELSE IF (rank.EQ.nranks-1)
THEN
4866 bottomneighbor=mpi_proc_null
4867 sendbuf=lelement(1,1)%L
4870 bottomneighbor=rank+1
4871 sendbuf=lelement(1,1)%L
4875 CALL mpi_sendrecv(sendbuf,m*m,mpi_real8,topneighbor,1, &
4876 bottomleftmatrix,m*m,mpi_real8,bottomneighbor,1,siesta_comm, &
4880 IF (rank.EQ.nranks-1) bottomleftmatrix=0
4885 DO blkrow=1,endglobrow-startglobrow+1
4890 eij=lelement(1,blkrow)%D(i,j)
4891 eji=lelement(1,blkrow)%D(j,i)
4892 partdiff = partdiff+(eij-eji)*(eij-eji)
4897 temp(2) = temp(2) + eij*eij + eji*eji
4901 temp(2) = temp(2) + eij*eij
4905 temp(1) = temp(1) + 2*partdiff
4910 IF (blkrow.NE.endglobrow-startglobrow+1)
THEN
4911 eij=lelement(1,blkrow)%U(i,j)
4912 eji=lelement(1,blkrow+1)%L(j,i)
4913 temp(1) = temp(1) + (eij - eji)*(eij - eji)
4914 temp(2) = temp(2) + eij*eij + eji*eji
4916 IF (rank.NE.nranks-1)
THEN
4917 eij=lelement(1,blkrow)%U(i,j)
4918 eji=bottomleftmatrix(j,i)
4919 temp(1) = temp(1) + (eij - eji)*(eij - eji)
4920 temp(2) = temp(2) + eij*eij + eji*eji
4928 CALL flush(360+rank)
4930 DEALLOCATE(bottomleftmatrix)
4932 IF (rank .eq. 0)
THEN
4933 CALL mpi_reduce(mpi_in_place,temp,2,mpi_real8,mpi_sum,0,siesta_comm,mpi_err)
4935 CALL mpi_reduce(temp,temp,2,mpi_real8,mpi_sum,0,siesta_comm,mpi_err)
4938 asymindx=sqrt(temp(1)/temp(2))
4942 END SUBROUTINE checksymmetry
4946 SUBROUTINE storediagonal(blkrow,colnum,buf)
4948 INTEGER,
INTENT(IN) :: blkrow, colnum
4949 REAL(dp),
DIMENSION(1:M),
INTENT(IN) :: buf
4952 IF(blkrow.LT.startglobrow)
THEN
4953 IF(kpdbg)
WRITE(ofu,*)
'Error in indexing global block row index';
CALL fl(ofu)
4956 indx=(blkrow-startglobrow)*m+colnum
4957 origdiagelement(indx)=buf(colnum)
4959 END SUBROUTINE storediagonal
4964 SUBROUTINE writeblocks(flag)
4966 INTEGER,
INTENT(IN) :: flag
4968 INTEGER :: row, col, localblkrow, globalrow
4970 CHARACTER(100) :: fname
4971 CHARACTER(50) :: ciam, cnprocs
4972 INTEGER :: WOFU, istat
4974 WRITE(ciam,*) rank;
WRITE(cnprocs,*) nranks
4975 ciam=adjustl(ciam); cnprocs=adjustl(cnprocs)
4976 wofu = 4*nranks+rank
4978 fname=
'block-'//trim(ciam)//
'-P-'//trim(cnprocs)//
'.txt'
4979 OPEN(unit=wofu, file=fname, status=
"REPLACE", action=
"WRITE",&
4980 &form=
"FORMATTED",position=
"APPEND", iostat=istat)
4985 DO row=1, (endglobrow-startglobrow+1)*m
4986 WRITE(wofu,*)
'SavedDiag:',row,origdiagelement(row), flag
4992 DO globalrow=startglobrow,endglobrow
4993 localblkrow=globalrow-startglobrow+1
4999 WRITE(wofu,*) globalrow,
'L', row, col, orig(localblkrow)%L(row,col), flag
5008 WRITE(wofu,*) globalrow,
'D', row, col, orig(localblkrow)%D(row,col), flag
5017 WRITE(wofu,*) globalrow,
'U', row, col, orig(localblkrow)%U(row,col), flag
5021 WRITE(wofu,*)
'--------------------------------------------------------'
5027 END SUBROUTINE writeblocks
5032 SUBROUTINE displayblocks(flag)
5034 INTEGER,
INTENT(IN) :: flag
5035 INTEGER :: row, localblkrow, globalrow
5037 IF(kpdbg)
WRITE(ofu,*)
'------- Here ------- ', 1;
CALL fl(ofu)
5039 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
5040 DO row=1, (endglobrow-startglobrow+1)*m
5041 IF(kpdbg)
WRITE(ofu,*)
'OrigDiagElement:',origdiagelement(row),
'-',flag
5044 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
5046 IF(kpdbg)
WRITE(ofu,*)
'------- Here ------- ', 2;
CALL fl(ofu)
5049 DO globalrow=startglobrow,endglobrow
5050 localblkrow=globalrow-startglobrow+1
5052 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
5055 IF(kpdbg)
WRITE(ofu,*)
'orig-L:',orig(localblkrow)%L(row,1:m),
'-',flag
5059 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
5061 IF(kpdbg)
WRITE(ofu,*)
'orig-D:',orig(localblkrow)%D(row,1:m),
'-',flag
5065 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
5067 IF(kpdbg)
WRITE(ofu,*)
'orig-U:',orig(localblkrow)%U(row,1:m),
'-',flag
5070 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
5073 IF(kpdbg)
WRITE(ofu,*)
'------- Here ------- ', 3;
CALL fl(ofu)
5076 END SUBROUTINE displayblocks
5081 SUBROUTINE parmatvec (invec,outvec,outveclength)
5083 INTEGER,
INTENT(IN) :: outveclength
5084 REAL(dp),
INTENT(IN) :: invec(1:N*M)
5085 REAL(dp),
INTENT(OUT) :: outvec(1:outveclength)
5087 INTEGER :: col, row, offset
5088 INTEGER :: globalrow, localblkrow, cnt, dcnt
5089 REAL(dp) :: melement, velement, total
5091 outvec=0; cnt=0; dcnt=0
5092 DO globalrow=startglobrow,endglobrow
5094 localblkrow=globalrow-startglobrow+1
5096 IF (globalrow.EQ.1)
THEN
5099 offset=(globalrow-2)*m
5102 IF (globalrow.EQ.1)
THEN
5106 velement=invec(offset+col)
5110 melement=origdiagelement(dcnt)
5112 melement=orig(localblkrow)%D(row,col)
5115 melement=orig(localblkrow)%U(row,col-m)
5117 total=total+melement*velement
5122 ELSE IF (globalrow.EQ.n)
THEN
5126 velement=invec(offset+col)
5128 melement=orig(localblkrow)%L(row,col)
5130 IF (row.EQ.col-m)
THEN
5132 melement=origdiagelement(dcnt)
5134 melement=orig(localblkrow)%D(row,col-m)
5137 total=total+melement*velement
5146 velement=invec(offset+col)
5148 melement=orig(localblkrow)%L(row,col)
5149 ELSE IF (col.GT.m.AND.col.LE.2*m)
THEN
5150 IF (row.EQ.col-m)
THEN
5152 melement=origdiagelement(dcnt)
5154 melement=orig(localblkrow)%D(row,col-m)
5157 melement=orig(localblkrow)%U(row,col-2*m)
5159 total=total+melement*velement
5160 IF (globalrow.EQ.2.AND.row.EQ.1)
THEN
5169 END SUBROUTINE parmatvec
5173 SUBROUTINE getcolsum(cs)
5175 real(dp),
DIMENSION(M, N),
INTENT(OUT) :: cs
5178 real(dp) :: minscale
5182 minscale = 1.0e-5_dp
5184 minscale = 1.0e-8_dp
5186 CALL initscalefactors
5187 DO js = startglobrow, endglobrow
5188 CALL getscalefactors(js,cs(:,js))
5189 eps = minscale*maxval(cs(:,js))
5190 CALL assert(eps .gt. zero,
' cs == 0 in GetColSum')
5191 cs(:,js) = one/sqrt(max(cs(:,js), eps))
5194 CALL finalizescalefactors
5196 END SUBROUTINE getcolsum
5200 SUBROUTINE initscalefactors
5202 INTEGER :: top, bot, MPI_ERR, ic, j, k
5203 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
5204 REAL(dp),
DIMENSION(:),
ALLOCATABLE :: tmp1, tmp2
5205 INTEGER :: numBlockRows
5207 numblockrows = endglobrow-startglobrow+1
5209 top=rank-1;
IF(rank.EQ.0) top=mpi_proc_null
5210 bot=rank+1;
IF(rank.EQ.nranks-1) bot=mpi_proc_null
5212 ALLOCATE (topscalefac(m), botscalefac(m), tmp1(m), tmp2(m), stat=ic)
5213 CALL assert(ic.eq.0,
'STAT != 0 IN InitScaleFactors')
5217 tmp1(ic)=sum(abs(orig(1)%L(:,ic)))
5218 tmp2(ic)=sum(abs(orig(numblockrows)%U(:,ic)))
5220 tmp1(ic)=maxval(abs(orig(1)%L(:,ic)))
5221 tmp2(ic)=maxval(abs(orig(numblockrows)%U(:,ic)))
5225 CALL mpi_sendrecv(tmp1,m,mpi_real8,top,1, &
5226 botscalefac,m,mpi_real8,bot,1,siesta_comm,mpi_stat,mpi_err)
5228 CALL mpi_sendrecv(tmp2,m,mpi_real8,bot,1, &
5229 topscalefac,m,mpi_real8,top,1,siesta_comm,mpi_stat,mpi_err)
5231 DEALLOCATE(tmp1, tmp2)
5233 END SUBROUTINE initscalefactors
5237 SUBROUTINE getscalefactors(js,scalevector)
5239 INTEGER,
INTENT(IN) :: js
5240 REAL(dp),
INTENT(OUT) :: scalevector(M)
5242 REAL(dp),
ALLOCATABLE :: coltmp(:)
5247 ALLOCATE (coltmp(m))
5249 ir = js-startglobrow+1
5254 coltmp = abs(orig(ir)%D(:,ic))
5256 scalevector(ic) = sum(coltmp)
5258 scalevector(ic) = maxval(coltmp)
5261 IF (js.GT.startglobrow)
THEN
5262 coltmp = abs(orig(ir-1)%U(:,ic))
5264 scalevector(ic) = scalevector(ic) + sum(coltmp)
5265 IF (js.EQ.endglobrow .AND. js.NE.n) &
5266 scalevector(ic) = scalevector(ic) + botscalefac(ic)
5268 scalevector(ic) = max(scalevector(ic),maxval(coltmp))
5269 IF (js.EQ.endglobrow .AND. js.NE.n) &
5270 scalevector(ic) = max(scalevector(ic),botscalefac(ic))
5274 IF (js.LT.endglobrow)
THEN
5275 coltmp = abs(orig(ir+1)%L(:,ic))
5277 scalevector(ic) = scalevector(ic) + sum(coltmp)
5278 IF (js.EQ.startglobrow .AND. js.NE.1) &
5279 scalevector(ic) = scalevector(ic) + topscalefac(ic)
5281 scalevector(ic) = max(scalevector(ic),maxval(coltmp))
5282 IF (js.EQ.startglobrow .AND. js.NE.1) &
5283 scalevector(ic) = max(scalevector(ic),topscalefac(ic))
5290 CALL assert(all(scalevector.GT.zero),
'SCALEVECTOR <= 0 IN GETSCALEFACTORS')
5292 END SUBROUTINE getscalefactors
5296 SUBROUTINE finalizescalefactors
5297 DEALLOCATE (topscalefac,botscalefac)
5298 END SUBROUTINE finalizescalefactors
5302 SUBROUTINE applyparallelscaling(lmp, colscale)
5304 REAL(dp),
INTENT(IN) :: lmp
5305 REAL(dp),
INTENT(INOUT) :: colscale(M,N)
5307 INTEGER :: icount, istat
5308 REAL(dp) :: ton, toff, colcnd, colcnd1, temp(2)
5309 REAL(dp),
ALLOCATABLE :: colpad(:,:)
5314 startrow1 = max(1, startglobrow-1); endrow1 = min(n, endglobrow+1)
5315 CALL assert(all(colscale(:,startrow1:endrow1).EQ.1), &
5316 'COLSCALE !=1 INITIALLY IN APPLYPARALLELSCALING')
5318 ALLOCATE (colpad(m,n), stat=istat)
5319 CALL assert(istat.eq.0,
' Allocation error in ApplyParallelScaling')
5321 CALL getcolsum(colpad)
5322 CALL padsides(colpad, m, 1, 1)
5323 CALL vectorcopypar(colpad, colscale)
5324 CALL parallelscaling(colpad)
5328 temp(1) = maxval( colpad(:,startglobrow:endglobrow))
5329 temp(2) = maxval(-colpad(:,startglobrow:endglobrow))
5330 CALL mpi_allreduce(mpi_in_place, temp, 2, mpi_real8, mpi_max, siesta_comm, &
5335 IF (colcnd1 .NE. zero)
THEN
5336 colcnd = abs(colcnd/colcnd1)
5346 CALL findminmax_tri(lmp)
5349 IF (rank.EQ.0) print
'(1x,3(a,1p,e10.3))', &
5350 'COLUMN-SCALING TIME: ', toff-ton,
' COLCND: ', colcnd, &
5351 ' DMINL: ', dmin_tri*lmp
5353 END SUBROUTINE applyparallelscaling
5357 SUBROUTINE parallelscaling(colsum)
5359 REAL(dp),
DIMENSION(M,N),
INTENT(IN) :: colsum
5361 INTEGER :: js, icol, ir, dcnt
5367 DO js = startglobrow, endglobrow
5368 ir=js-startglobrow+1
5370 lelement(1,ir)%D(:,icol) = colsum(:,js)*orig(ir)%D(:,icol)*colsum(icol,js)
5371 orig(ir)%D(:,icol) = lelement(1,ir)%D(:,icol)
5373 origdiagelement(dcnt) = orig(ir)%D(icol,icol)
5375 lelement(1,ir)%L(:,icol) = colsum(:,js)*orig(ir)%L(:,icol)*colsum(icol,js-1)
5376 orig(ir)%L(:,icol) = lelement(1,ir)%L(:,icol)
5379 lelement(1,ir)%U(:,icol) = colsum(:,js)*orig(ir)%U(:,icol)*colsum(icol,js+1)
5380 orig(ir)%U(:,icol) = lelement(1,ir)%U(:,icol)
5385 END SUBROUTINE parallelscaling
5389 SUBROUTINE findminmax_tri(lmp)
5391 REAL(dp),
PARAMETER :: minThreshold=1.e-6_dp
5392 REAL(dp),
INTENT(IN) :: lmp
5393 INTEGER :: js, icol, ir, dcnt, imax_diag(1)
5394 REAL(dp) :: eps, temp(2), minDiag, sign_diag = -1
5398 dmin_tri = huge(dmin_tri)
5399 DO js = startglobrow, endglobrow
5400 ir=js-startglobrow+1
5403 mindiag = minthreshold*maxval(abs(origdiagelement(dcnt+1:dcnt+m)))
5404 IF (mindiag .EQ. zero)
THEN
5405 mindiag = minthreshold
5407 imax_diag = maxloc(abs(origdiagelement(dcnt+1:dcnt+m)))
5408 sign_diag = sign(one, origdiagelement(imax_diag(1)))
5413 eps = origdiagelement(dcnt)
5415 IF (abs(eps) .LE. mindiag)
THEN
5416 eps = sign_diag*mindiag
5417 origdiagelement(dcnt) = eps
5418 orig(ir)%D(icol,icol) = eps
5419 lelement(1,ir)%D(icol,icol) = eps
5421 IF (js .LT. n) dmin_tri = min(max(1.e-12_dp,abs(eps)), dmin_tri)
5424 maxeigen_tri = max(maxeigen_tri, sum(abs(orig(ir)%L(icol,:)) + &
5425 abs(orig(ir)%D(icol,:)) + abs(orig(ir)%U(icol,:))))
5429 temp(1)=-dmin_tri; temp(2)=maxeigen_tri
5430 CALL mpi_allreduce(mpi_in_place,temp,2,mpi_real8, mpi_max, siesta_comm,mpi_err)
5431 dmin_tri=sign(-temp(1), eps)
5432 maxeigen_tri=temp(2)
5436 CALL resetdiagonal(lmp, .false.)
5438 END SUBROUTINE findminmax_tri
5443 SUBROUTINE resetdiagonal(lmp, bReset)
5444 REAL(dp),
INTENT(IN) :: lmp
5445 LOGICAL,
INTENT(IN) :: bReset
5446 REAL(dp) :: DminL, eps, eps0
5447 INTEGER :: dcnt, js, ir, icol
5450 eps0 = sign(one, origdiagelement(1))
5451 dmin_tri = sign(dmin_tri,eps0)
5452 dminl = dmin_tri*abs(lmp)
5456 DO js = startglobrow, endglobrow
5457 ir=js-startglobrow+1
5460 eps = origdiagelement(dcnt)
5461 CALL assert(sign(one,eps).EQ.eps0,
'WRONG SIGN IN RESET DIAGONAL')
5462 IF (l_colscale)
THEN
5463 lelement(1,ir)%D(icol,icol) = eps + dminl
5465 lelement(1,ir)%D(icol,icol) = eps*(1 + abs(lmp))
5467 orig(ir)%D(icol,icol) = lelement(1,ir)%D(icol,icol)
5471 lelement(1,ir)%L = orig(ir)%L
5472 lelement(1,ir)%D = orig(ir)%D
5473 lelement(1,ir)%U = orig(ir)%U
5474 lelement(1,ir)%pivot = 0
5478 END SUBROUTINE resetdiagonal
5482 SUBROUTINE refactorhessian(lmp)
5486 REAL(dp),
INTENT(IN) :: lmp
5488 CALL resetdiagonal(lmp, .true.)
5491 END SUBROUTINE refactorhessian
5495 SUBROUTINE computetranspose
5499 END SUBROUTINE computetranspose
5503 SUBROUTINE vectorcopypar (colsum, colscale)
5505 REAL(dp),
INTENT(IN) :: colsum(M,N)
5506 REAL(dp),
INTENT(OUT) :: colscale(M,N)
5510 DO js = startrow1, endrow1
5511 colscale(:,js) = colsum(:,js)*colscale(:,js)
5514 END SUBROUTINE vectorcopypar
5518 SUBROUTINE padsides(arrin, blksize, top, bot)
5520 INTEGER,
INTENT(IN) :: top, bot, blksize
5521 REAL(dp),
INTENT(INOUT) :: arrin(blksize, N)
5523 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
5524 INTEGER :: M1, left, right, tag1=1, tag2=2, &
5527 left=rank-1;
IF(rank.EQ.0) left=mpi_proc_null
5528 right=rank+1;
IF(rank.EQ.nranks-1) right=mpi_proc_null
5530 tr = endglobrow; tl = startglobrow
5531 t1r= min(endglobrow+1, n); t1l= max(1, startglobrow-1)
5535 CALL mpi_sendrecv(arrin(:,tr-(top-1):tr), m1, mpi_real8, &
5536 right, tag1, arrin(:,t1l-(top-1):t1l), m1, mpi_real8, &
5537 left, tag1, siesta_comm, mpi_stat, mpi_err)
5541 CALL mpi_sendrecv(arrin(:,tl:tl+(bot-1)), m1, mpi_real8, &
5542 left, tag2, arrin(:,t1r:t1r+(bot-1)), m1, mpi_real8, &
5543 right, tag2, siesta_comm, mpi_stat, mpi_err)
5544 END SUBROUTINE padsides