10 USE parallel_include_module,
ONLY: stopmpi
11 USE parallel_include_module,
ONLY: tofu
12 USE parallel_include_module,
ONLY: ns_comm
13 USE parallel_include_module,
ONLY: grank, gnranks
14 USE parallel_include_module,
ONLY: rank, nranks
15 USE parallel_include_module,
ONLY: ns_resltn
21 INTEGER,
PARAMETER :: rprec = selected_real_kind(15,300)
22 INTEGER,
PARAMETER :: iprec = selected_int_kind(8)
23 INTEGER,
PARAMETER :: cprec = kind((1.0_rprec,1.0_rprec))
24 INTEGER,
PARAMETER :: dp = rprec
32 REAL(dp),
ALLOCATABLE :: L(:,:), D(:,:), U(:,:), b(:,:)
33 INTEGER,
ALLOCATABLE :: pivot(:)
42 REAL(dp),
ALLOCATABLE :: x(:)
77 INTEGER :: startglobrow
84 CHARACTER*100 :: kenvvar
85 CHARACTER*100 :: kenvval
90 LOGICAL :: writeproblemfile
91 LOGICAL :: writesolution
92 LOGICAL :: usebarriers
99 LOGICAL(dp) :: l_colscale=.false.
102 LOGICAL :: use_mpiwtime
103 DOUBLE PRECISION :: loctimer1, loctimer2
104 DOUBLE PRECISION :: mattimer1, mattimer2
105 DOUBLE PRECISION :: globtimer1, globtimer2
106 DOUBLE PRECISION :: timerfreq
107 DOUBLE PRECISION :: tottime
109 DOUBLE PRECISION :: totcommtime
110 INTEGER :: totcommcount
111 DOUBLE PRECISION :: totinvtime
112 INTEGER :: totinvcount
113 DOUBLE PRECISION :: totmatmultime
114 INTEGER :: totmatmulcount
115 DOUBLE PRECISION :: totmatsoltime
116 INTEGER :: totmatsolcount
118 REAL(dp),
PUBLIC :: maxeigen_tri
119 REAL(dp),
PUBLIC :: dmin_tri
126 REAL(dp),
ALLOCATABLE :: origdiagelement(:)
127 REAL(dp),
ALLOCATABLE :: topscalefac(:), botscalefac(:)
136 LOGICAL :: doblasonly
137 LOGICAL :: doblacscomm
145 INTEGER :: myrow, mycol
146 INTEGER :: nrows, ncols
147 INTEGER :: blockszrows, blockszcols
148 INTEGER,
ALLOCATABLE :: map(:,:)
159 INTEGER :: maincontext
160 INTEGER :: levelcontext
161 TYPE(BlacsProcessGrid) :: pgrid
173 INTEGER :: masterrank
175 INTEGER,
ALLOCATABLE :: slaveranks(:)
185 TYPE(MasterToSlaveMapping) :: msmap
187 INTEGER :: mpicomm, mpitag, mpierr
189 INTEGER :: mpistatus(MPI_STATUS_SIZE)
194 TYPE(PBLASLevelParameters) :: pblas
201 INTEGER,
PARAMETER :: op_none = 0
202 INTEGER,
PARAMETER :: op_done = 1
203 INTEGER,
PARAMETER :: op_dgemm = 2
204 INTEGER,
PARAMETER :: op_dgemv = 3
205 INTEGER,
PARAMETER :: op_dgetrf = 4
206 INTEGER,
PARAMETER :: op_dgetrs = 5
214 DOUBLE PRECISION :: tm
216 DOUBLE PRECISION :: t1, t2
219 TYPE(TimeCount) :: wait, comm, comp
220 TYPE(TimeCount) :: mm, trf, pmm, ptrf
221 TYPE(TimeCount) :: mma, mmb, mmc, mmalpha, mmbeta, mmrc
222 TYPE(TimeCount) :: extract, waitall
225 TYPE(PBLASStats) :: pstats
229 DOUBLE PRECISION,
ALLOCATABLE :: temparray(:)
241 SUBROUTINE bclockinit()
247 IF ( use_mpiwtime )
THEN
250 CALL system_clock(count_rate=tempint)
253 END SUBROUTINE bclockinit
260 SUBROUTINE bsystemclock( ts )
264 DOUBLE PRECISION,
INTENT(INOUT) :: ts
270 DOUBLE PRECISION :: tempdbl
272 IF ( use_mpiwtime )
THEN
274 tempdbl = mpi_wtime()
278 CALL system_clock( tempint )
281 END SUBROUTINE bsystemclock
292 INTEGER,
INTENT(IN) :: u
302 SUBROUTINE chargememory( bytes )
306 REAL,
INTENT(IN) :: bytes
309 membytes = membytes + bytes
310 END SUBROUTINE chargememory
317 SUBROUTINE chargetime( tot, t2, t1, cnt )
321 DOUBLE PRECISION,
INTENT(INOUT) :: tot
322 DOUBLE PRECISION,
INTENT(IN) :: t2
323 DOUBLE PRECISION,
INTENT(IN) :: t1
324 INTEGER,
INTENT(INOUT) :: cnt
327 tot = tot + (real(t2-t1))/timerfreq
329 END SUBROUTINE chargetime
337 SUBROUTINE timecountinit( tc )
346 END SUBROUTINE timecountinit
353 SUBROUTINE timecountprint( tc, msg )
358 CHARACTER(*),
INTENT(IN) :: msg
362 DOUBLE PRECISION :: avg
366 IF (tc%cnt .GT. 0) avg = tc%tm / tc%cnt
367 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)') msg,tc%cnt,
' * ',avg,
' sec = ',tc%tm,
' sec'
368 END SUBROUTINE timecountprint
375 SUBROUTINE plbinitstats()
376 CALL timecountinit( pstats%wait )
377 CALL timecountinit( pstats%comm )
378 CALL timecountinit( pstats%comp )
379 CALL timecountinit( pstats%mm )
380 CALL timecountinit( pstats%trf )
381 CALL timecountinit( pstats%pmm )
382 CALL timecountinit( pstats%ptrf )
384 CALL timecountinit( pstats%mma )
385 CALL timecountinit( pstats%mmb )
386 CALL timecountinit( pstats%mmc )
387 CALL timecountinit( pstats%mmalpha )
388 CALL timecountinit( pstats%mmbeta )
389 CALL timecountinit( pstats%mmrc )
390 CALL timecountinit( pstats%extract )
391 CALL timecountinit( pstats%waitall )
392 END SUBROUTINE plbinitstats
399 SUBROUTINE plbprintstats()
400 CALL timecountprint( pstats%wait,
'PBLAS Wait ' )
401 CALL timecountprint( pstats%comm,
'PBLAS Comm ' )
402 CALL timecountprint( pstats%comp,
'PBLAS Comp ' )
403 CALL timecountprint( pstats%mm,
'PBLAS MM ' )
404 CALL timecountprint( pstats%trf,
'PBLAS TRF ' )
405 CALL timecountprint( pstats%pmm,
'PBLAS PMM ' )
406 CALL timecountprint( pstats%ptrf,
'PBLAS PTRF ' )
408 CALL timecountprint( pstats%mma,
'PBLAS MMA ' )
409 CALL timecountprint( pstats%mmb,
'PBLAS MMB ' )
410 CALL timecountprint( pstats%mmc,
'PBLAS MMC ' )
411 CALL timecountprint( pstats%mmalpha,
'PBLAS MMalpha ' )
412 CALL timecountprint( pstats%mmbeta,
'PBLAS MMbeta ' )
413 CALL timecountprint( pstats%mmrc,
'PBLAS MMRC ' )
414 CALL timecountprint( pstats%extract,
'PBLAS Extract ' )
415 CALL timecountprint( pstats%waitall,
'PBLAS Waitall ' )
416 END SUBROUTINE plbprintstats
423 SUBROUTINE plbinitialize
427 CHARACTER*100 :: envvar
428 CHARACTER*100 :: envval
430 IF(kpdbg)
WRITE(ofu,*)
'PLBInitialize Started';
CALL fl(ofu)
434 IF ( m .GE. 2048 )
THEN
436 envvar =
'BLOCKTRI_BLASONLY'
437 CALL getenv( envvar, envval )
438 IF ( envval .EQ.
'TRUE' )
THEN
440 IF(kpdbg)
WRITE(ofu,*)
'BLAS ONLY -- obeying env var ', envvar;
CALL fl(ofu)
443 IF(kpdbg)
WRITE(ofu,*)
'doblasonly = ', doblasonly;
CALL fl(ofu)
446 doblacscomm = .false.
447 envvar =
'BLOCKTRI_BLACSCOMM'
448 CALL getenv( envvar, envval )
449 IF ( envval .EQ.
'TRUE' )
THEN
451 IF(kpdbg)
WRITE(ofu,*)
'BLACS COMM -- obeying env var ', envvar;
CALL fl(ofu)
453 IF(kpdbg)
WRITE(ofu,*)
'doblacscomm = ', doblacscomm;
CALL fl(ofu)
457 envvar =
'BLOCKTRI_NBPP'
458 CALL getenv( envvar, envval )
459 IF ( envval .NE.
'' )
THEN
460 READ( envval, *) blacs%nbpp
461 IF(kpdbg)
WRITE(ofu,*)
'NBPP -- obeying env var ', envvar;
CALL fl(ofu)
463 IF(kpdbg)
WRITE(ofu,*)
'NBPP = ', blacs%nbpp;
CALL fl(ofu)
470 IF(kpdbg)
WRITE(ofu,*)
'BLAS only (not using PBLAS)';
CALL fl(ofu)
473 CALL blacs_pinfo( blacs%iam, blacs%nprocs )
474 IF(kpdbg)
WRITE(ofu,*)
'BLACS_PINFO ', blacs%iam,
' ', blacs%nprocs;
CALL fl(ofu)
475 CALL blacs_get( 0, 0, blacs%maincontext )
476 IF(kpdbg)
WRITE(ofu,*)
'BLACS_GET ', blacs%maincontext;
CALL fl(ofu)
477 CALL blacs_gridinit( blacs%maincontext,
'R', 1, blacs%nprocs )
478 IF(kpdbg)
WRITE(ofu,*)
'BLACS_GRIDINIT';
CALL fl(ofu)
479 CALL blacs_barrier( blacs%maincontext,
'All' )
483 IF(kpdbg)
WRITE(ofu,*)
'PLBInitialize Done';
CALL fl(ofu)
484 END SUBROUTINE plbinitialize
491 SUBROUTINE plbfinalize
493 IF(kpdbg)
WRITE(ofu,*)
'PLBFinalize Started';
CALL fl(ofu)
495 IF(kpdbg)
WRITE(ofu,*)
'BLAS only (not using PBLAS)';
CALL fl(ofu)
498 CALL blacs_barrier( blacs%maincontext,
'All' )
499 IF(kpdbg)
WRITE(ofu,*)
'BLACS_BARRIER';
CALL fl(ofu)
501 IF(kpdbg)
WRITE(ofu,*)
'BLACS_EXIT nprocs= ', blacs%nprocs;
CALL fl(ofu)
505 IF(kpdbg)
WRITE(ofu,*)
'PLBFinalize Done';
CALL fl(ofu)
506 END SUBROUTINE plbfinalize
513 SUBROUTINE plbforwardinitialize
516 END SUBROUTINE plbforwardinitialize
523 SUBROUTINE plbforwardfinalize
526 END SUBROUTINE plbforwardfinalize
533 SUBROUTINE plbbackwardinitialize
536 END SUBROUTINE plbbackwardinitialize
543 SUBROUTINE plbbackwardfinalize
546 END SUBROUTINE plbbackwardfinalize
553 SUBROUTINE plbforwardinitializelevel( lvl, ammaster )
563 INTEGER :: maxslaves, actualslaves
567 IF ( doblasonly )
THEN
568 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel BLAS only';
CALL fl(ofu)
572 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Started', ammaster;
CALL fl(ofu)
576 pblas%ammaster = ammaster
577 pblas%msmap%masterrank = -1
578 pblas%msmap%nslaves = 0
579 CALL determinemasterslaveranks()
583 blacs%levelcontext = -1
584 CALL blacs_get( blacs%maincontext, 10, blacs%levelcontext )
585 IF(kpdbg)
WRITE(ofu,*)
'Created new context';
CALL fl(ofu)
586 IF(kpdbg)
WRITE(ofu,*)
'Nslaves ',pblas%msmap%nslaves;
CALL fl(ofu)
588 blacs%pgrid%blockszrows = 64
589 IF ( blacs%pgrid%blockszrows .GT. m ) blacs%pgrid%blockszrows = m
590 blacs%pgrid%blockszcols = blacs%pgrid%blockszrows
591 IF(kpdbg)
WRITE(ofu,*)
'Block NR=',blacs%pgrid%blockszrows;
CALL fl(ofu)
592 IF(kpdbg)
WRITE(ofu,*)
'Block NC=',blacs%pgrid%blockszcols;
CALL fl(ofu)
594 maxslaves = (m*m) / (blacs%nbpp*blacs%nbpp* &
595 blacs%pgrid%blockszrows*blacs%pgrid%blockszcols)
596 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
597 IF ( maxslaves .LT. 1 ) maxslaves = 1
598 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
599 IF ( maxslaves .GT. pblas%msmap%nslaves ) maxslaves = pblas%msmap%nslaves
600 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
601 actualslaves = maxslaves
602 IF(kpdbg)
WRITE(ofu,*)
' Actual slaves ', actualslaves;
CALL fl(ofu)
604 blacs%pgrid%nrows = int( sqrt( real( actualslaves ) ) )
605 IF ( blacs%pgrid%nrows .LT. 1 ) blacs%pgrid%nrows = 1
606 blacs%pgrid%ncols = int( actualslaves / blacs%pgrid%nrows )
607 IF(kpdbg)
WRITE(ofu,*)
'NR=',blacs%pgrid%nrows,
' NC=',blacs%pgrid%ncols;
CALL fl(ofu)
608 ALLOCATE( blacs%pgrid%map( 1 : blacs%pgrid%nrows, 1 : blacs%pgrid%ncols ) )
610 DO i = 1, blacs%pgrid%nrows
611 DO j = 1, blacs%pgrid%ncols
612 blacs%pgrid%map(i,j) = pblas%msmap%slaveranks(k+1)
616 IF(kpdbg)
WRITE(ofu,*)
'NR*NC=',k;
CALL fl(ofu)
617 CALL blacs_gridmap( blacs%levelcontext, blacs%pgrid%map, &
618 blacs%pgrid%nrows, blacs%pgrid%nrows, blacs%pgrid%ncols )
619 IF(kpdbg)
WRITE(ofu,*)
'GridMap done';
CALL fl(ofu)
620 CALL blacs_gridinfo( blacs%levelcontext, &
621 blacs%pgrid%nrows, blacs%pgrid%ncols, &
622 blacs%pgrid%myrow, blacs%pgrid%mycol )
623 IF(kpdbg)
WRITE(ofu,*)
'GridInfo done';
CALL fl(ofu)
624 IF(kpdbg)
WRITE(ofu,*)
'Myrowcol ',blacs%pgrid%myrow,
' ',blacs%pgrid%mycol;
CALL fl(ofu)
627 pblas%mpicomm = ns_comm
631 CALL blacs_barrier( blacs%maincontext,
'All' )
632 IF(kpdbg)
WRITE(ofu,*)
'BLACS_BARRIER';
CALL fl(ofu)
636 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Master';
CALL fl(ofu)
638 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Slave';
CALL fl(ofu)
642 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Done', ammaster;
CALL fl(ofu)
645 END SUBROUTINE plbforwardinitializelevel
653 SUBROUTINE plbforwardfinalizelevel( lvl, ammaster )
662 IF ( doblasonly )
THEN
663 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel BLAS only';
CALL fl(ofu)
668 IF ( ammaster .AND. (pblas%msmap%nslaves .GT. 1) )
THEN
669 CALL masterbcastnextop( op_done )
672 IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
673 (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) )
THEN
674 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel pariah !level-barrier';
CALL fl(ofu)
676 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel level-barrier';
CALL fl(ofu)
677 CALL blacs_barrier( blacs%levelcontext,
'All' )
678 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel level grid exit';
CALL fl(ofu)
679 CALL blacs_gridexit( blacs%levelcontext )
682 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel main-barrier';
CALL fl(ofu)
683 CALL blacs_barrier( blacs%maincontext,
'All' )
686 pblas%msmap%masterrank = -1
687 pblas%msmap%nslaves = 0
688 DEALLOCATE( pblas%msmap%slaveranks )
689 DEALLOCATE( blacs%pgrid%map )
694 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel ', ammaster;
CALL fl(ofu)
696 END SUBROUTINE plbforwardfinalizelevel
704 SUBROUTINE plbbackwardinitializelevel( lvl, ammaster )
713 END SUBROUTINE plbbackwardinitializelevel
720 SUBROUTINE plbbackwardfinalizelevel( lvl, ammaster )
729 END SUBROUTINE plbbackwardfinalizelevel
737 SUBROUTINE determinemasterslaveranks()
741 LOGICAL,
ALLOCATABLE :: send_ammaster(:), recv_ammaster(:), assigned(:)
743 INTEGER :: totmasters, nslavespermaster, adjustednslavespermaster, tempmaster
744 INTEGER :: mpi_err, I, J, masterindex
746 IF(kpdbg)
WRITE(ofu,*)
'DetermineMasterSlaveRanks started';
CALL fl(ofu)
750 ALLOCATE( send_ammaster(1:1) )
751 ALLOCATE( recv_ammaster(1:nranks) )
752 send_ammaster(1) = pblas%ammaster
753 ALLOCATE( assigned(1:nranks) )
755 assigned( i ) = .false.
757 ALLOCATE( allmsmaps(1:nranks) )
759 allmsmaps(i)%masterrank = -1
760 allmsmaps(i)%nslaves = 0
764 CALL mpi_allgather( send_ammaster, 1, mpi_logical, &
765 recv_ammaster, 1, mpi_logical, &
772 IF(kpdbg)
WRITE(ofu,*)
' recv_ammaster ',i,
' ',recv_ammaster(i);
CALL fl(ofu)
773 IF ( recv_ammaster(i) )
THEN
774 totmasters = totmasters + 1
778 IF ( totmasters .LE. 0 )
THEN
779 IF(kpdbg)
WRITE(ofu,*)
'Total masters must be greater than 0';
CALL fl(ofu)
783 nslavespermaster = (nranks / totmasters)
784 IF(kpdbg)
WRITE(ofu,*)
'Avg nslavespermaster',nslavespermaster;
CALL fl(ofu)
789 masterloop:
DO i = 1, nranks
790 IF ( recv_ammaster(i) )
THEN
792 tempmaster = tempmaster + 1
796 adjustednslavespermaster = nslavespermaster
797 IF ( tempmaster .LE. mod( nranks, totmasters ) )
THEN
798 adjustednslavespermaster = adjustednslavespermaster + 1
801 IF(kpdbg)
WRITE(ofu,*)
'Adjusted nslavespm',adjustednslavespermaster;
CALL fl(ofu)
802 allmsmaps(i)%masterrank = i-1
803 ALLOCATE( allmsmaps(i)%slaveranks(1:adjustednslavespermaster) )
808 allmsmaps(i)%nslaves = 1
809 allmsmaps(i)%slaveranks(1) = i-1
813 slaveloop:
DO j = 1, nranks
814 IF ( allmsmaps(i)%nslaves .GE. adjustednslavespermaster )
THEN
817 IF ( (.NOT. assigned(j)) .AND. (.NOT. recv_ammaster(j)) )
THEN
819 allmsmaps(j)%masterrank = i-1
820 allmsmaps(i)%nslaves = allmsmaps(i)%nslaves + 1
821 allmsmaps(i)%slaveranks(allmsmaps(i)%nslaves) = j-1
826 IF(kpdbg)
WRITE(ofu,*)
'Computed all mappings';
CALL fl(ofu)
831 IF ( allmsmaps(masterindex)%masterrank .NE. rank )
THEN
832 masterindex = allmsmaps(masterindex)%masterrank+1
834 pblas%msmap%masterrank = allmsmaps(masterindex)%masterrank
835 pblas%msmap%nslaves = allmsmaps(masterindex)%nslaves
836 ALLOCATE(pblas%msmap%slaveranks(1:allmsmaps(masterindex)%nslaves))
837 pblas%msmap%slaveranks(:) = allmsmaps(masterindex)%slaveranks(:)
839 IF(kpdbg)
WRITE(ofu,*)
'Extracted my mappings';
CALL fl(ofu)
840 IF(kpdbg)
WRITE(ofu,*)
'My master rank',masterindex-1;
CALL fl(ofu)
841 IF(kpdbg)
WRITE(ofu,*)
'Nslaves in my set',allmsmaps(masterindex)%nslaves;
CALL fl(ofu)
842 DO j = 1, allmsmaps(masterindex)%nslaves
843 IF(kpdbg)
WRITE(ofu,*)
'Slave',j,
' ',allmsmaps(masterindex)%slaveranks(j);
CALL fl(ofu)
849 IF ( recv_ammaster(i) )
THEN
850 DEALLOCATE( allmsmaps(i)%slaveranks )
853 DEALLOCATE( assigned )
854 DEALLOCATE( allmsmaps )
855 DEALLOCATE( send_ammaster )
856 DEALLOCATE( recv_ammaster )
858 IF(kpdbg)
WRITE(ofu,*)
'DetermineMasterSlaveRanks done';
CALL fl(ofu)
859 END SUBROUTINE determinemasterslaveranks
867 SUBROUTINE masterbcastnextop( nextop )
871 INTEGER,
INTENT(IN) :: nextop
875 INTEGER :: K, destrank
876 REAL(dp) :: realnextop
878 realnextop = real(nextop)
879 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastNextOp started ', nextop;
CALL fl(ofu)
880 CALL masterbcastvalue( realnextop )
881 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastNextOp done ', nextop;
CALL fl(ofu)
882 END SUBROUTINE masterbcastnextop
889 SUBROUTINE slavegetnextop( nextop )
893 REAL(dp) :: realnextop
894 INTEGER,
INTENT(INOUT) :: nextop
896 IF(kpdbg)
WRITE(ofu,*)
'SlaveGetNextOp started';
CALL fl(ofu)
897 CALL slavereceivevalue( realnextop )
898 nextop = int( realnextop )
899 IF(kpdbg)
WRITE(ofu,*)
'SlaveGetNextOp done ', nextop;
CALL fl(ofu)
900 END SUBROUTINE slavegetnextop
907 SUBROUTINE slaveservice()
914 IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
915 (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) )
THEN
916 IF(kpdbg)
WRITE(ofu,*)
'SlaveService pariah falling out ';
CALL fl(ofu)
918 IF(kpdbg)
WRITE(ofu,*)
'SlaveService started ';
CALL fl(ofu)
919 oploop:
DO WHILE (.true.)
921 CALL slavegetnextop( nextop )
922 IF ( nextop .EQ. op_done )
THEN
924 ELSE IF ( nextop .EQ. op_dgemm )
THEN
926 ELSE IF ( nextop .EQ. op_dgetrf )
THEN
928 ELSE IF ( nextop .EQ. op_dgetrs )
THEN
931 IF(kpdbg)
WRITE(ofu,*)
'Bad Next Op', nextop;
CALL fl(ofu)
935 IF(kpdbg)
WRITE(ofu,*)
'SlaveService done ';
CALL fl(ofu)
937 END SUBROUTINE slaveservice
944 SUBROUTINE extractsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
945 A, nrows, ncols, subA, subnrows, subncols )
949 INTEGER,
INTENT(IN) :: bszr, bszc
950 INTEGER,
INTENT(IN) :: pnr, pnc
951 INTEGER,
INTENT(IN) :: pi, pj
952 REAL(dp),
INTENT(IN) :: A(:,:)
953 INTEGER,
INTENT(IN) :: nrows, ncols
954 REAL(dp),
INTENT(OUT) :: subA(:)
955 INTEGER,
INTENT(IN) :: subnrows, subncols
959 INTEGER :: I, J, K, Q, R
960 INTEGER :: thisblkr, thisblkc
962 IF(kpdbg)
WRITE(ofu,*)
'ExtractSubMatrix NR=', subnrows,
' NC=', subncols;
CALL fl(ofu)
964 CALL bsystemclock(pstats%extract%t1)
967 DO j = 1, ncols, bszc
968 thisblkc = (j-1) / bszc
969 IF ( mod(thisblkc, pnc) .EQ. pj-1 )
THEN
971 IF ( j+r-1 .LE. ncols )
THEN
972 DO i = 1, nrows, bszr
973 thisblkr = (i-1) / bszr
974 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
976 IF ( i+q-1 .LE. nrows )
THEN
978 suba(k) = a(i+q-1,j+r-1)
989 IF ( k .NE. subnrows*subncols )
THEN
990 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
991 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows,
' subnc=', subncols;
CALL fl(ofu)
995 CALL bsystemclock(pstats%extract%t2)
996 CALL chargetime( pstats%extract%tm, pstats%extract%t2, pstats%extract%t1, pstats%extract%cnt )
998 IF(kpdbg)
WRITE(ofu,*)
'ExtractSubMatrix done K', k;
CALL fl(ofu)
1000 END SUBROUTINE extractsubmatrix
1009 SUBROUTINE mastersendmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1013 REAL(dp),
INTENT(IN) :: A(:,:)
1014 INTEGER,
INTENT(IN) :: nrows, ncols
1015 REAL(dp),
INTENT(OUT) :: ssubA(:)
1016 INTEGER,
INTENT(IN) :: ssubnrows, ssubncols
1021 INTEGER :: aslaverank
1023 INTEGER :: subnrows, subncols
1024 INTEGER :: maxsubnrows, maxsubncols
1025 INTEGER :: bszr, bszc
1027 #if defined(MPI_OPT)
1028 INTEGER,
EXTERNAL :: NUMROC
1029 INTEGER :: mpistatus(MPI_STATUS_SIZE)
1033 INTEGER :: mpinisends
1034 INTEGER,
ALLOCATABLE :: mpireqs(:)
1036 INTEGER,
ALLOCATABLE :: mpistatuses(:,:)
1037 INTEGER :: waitmatchindex
1040 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix started';
CALL fl(ofu)
1041 CALL bsystemclock(pstats%comm%t1)
1043 #if defined(MPI_OPT)
1044 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1045 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1049 ALLOCATE( mpireqs( pnr * pnc ) )
1050 ALLOCATE( mpistatuses( pnr * pnc, mpi_status_size ) )
1053 maxsubnrows = numroc( nrows, bszr, 0, 0, pnr )
1054 maxsubncols = numroc( ncols, bszc, 0, 0, pnc )
1055 ALLOCATE( suba( 1 : pnr, 1 : pnc ) )
1059 aslaverank = blacs%pgrid%map(i,j)
1061 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1062 subncols = numroc( ncols, bszc, j-1, 0, pnc )
1064 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix to ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1066 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1068 IF ( aslaverank .NE. rank )
THEN
1069 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1072 IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) )
THEN
1073 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1074 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows,
' SSNC ', ssubncols;
CALL fl(ofu)
1075 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows,
' SNC ', subncols;
CALL fl(ofu)
1080 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracting self submatrix';
CALL fl(ofu)
1081 CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1082 i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1083 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix kept self submatrix';
CALL fl(ofu)
1086 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracting submatrix',i,j;
CALL fl(ofu)
1087 ALLOCATE( suba(i,j)%temparray( subnrows*subncols ) )
1088 CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1089 i, j, a, nrows, ncols, suba(i,j)%temparray, &
1091 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracted submatrix';
CALL fl(ofu)
1092 IF ( doblacscomm )
THEN
1093 CALL dgesd2d( blacs%levelcontext, subnrows, subncols, &
1094 suba(i,j)%temparray, subnrows, i-1, j-1 )
1096 mpinisends = mpinisends + 1
1097 CALL mpi_isend( suba(i,j)%temparray, subnrows*subncols, mpi_real8, &
1098 aslaverank, pblas%mpitag, pblas%mpicomm, mpireqs(mpinisends), mpierr )
1100 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix sent slave submatrix';
CALL fl(ofu)
1101 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix deallocated temp submatrix';
CALL fl(ofu)
1106 CALL bsystemclock(pstats%waitall%t1)
1107 CALL mpi_waitall( mpinisends, mpireqs, mpistatuses, mpierr )
1108 CALL bsystemclock(pstats%waitall%t2)
1109 CALL chargetime( pstats%waitall%tm, pstats%waitall%t2, pstats%waitall%t1, pstats%waitall%cnt )
1113 IF ( .NOT. (i .EQ. 1 .AND. j .EQ. 1) )
THEN
1114 DEALLOCATE( suba(i,j)%temparray )
1119 DEALLOCATE( mpireqs )
1120 DEALLOCATE( mpistatuses )
1122 CALL bsystemclock(pstats%comm%t2)
1123 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1125 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix done';
CALL fl(ofu)
1127 END SUBROUTINE mastersendmatrix
1134 SUBROUTINE slavereceivematrix( nrows, ncols, subA, subnrows, subncols )
1138 #if defined(MPI_OPT)
1139 INTEGER :: mpistatus(MPI_STATUS_SIZE)
1141 INTEGER,
INTENT(IN) :: nrows, ncols
1142 REAL(dp),
INTENT(OUT) :: subA(:)
1143 INTEGER,
INTENT(IN) :: subnrows, subncols
1147 INTEGER :: masterrank
1150 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveMatrix started ',subnrows,
' ',subncols;
CALL fl(ofu)
1152 #if defined(MPI_OPT)
1153 masterrank = blacs%pgrid%map(1,1)
1155 CALL bsystemclock(pstats%comm%t1)
1157 IF ( doblacscomm )
THEN
1158 CALL dgerv2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1160 CALL mpi_recv( suba, subnrows*subncols, mpi_real8, &
1161 masterrank, pblas%mpitag, pblas%mpicomm, mpistatus, mpierr )
1164 CALL bsystemclock(pstats%comm%t2)
1165 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1168 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveMatrix done';
CALL fl(ofu)
1169 END SUBROUTINE slavereceivematrix
1176 SUBROUTINE masterbcastvalue( val )
1181 REAL(dp),
INTENT(IN) :: val
1183 #if defined(MPI_OPT)
1184 CALL dgebs2d( blacs%levelcontext,
'All',
' ', 1, 1, val, 1 );
1186 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastValue bcast to slaves';
CALL fl(ofu)
1188 END SUBROUTINE masterbcastvalue
1195 SUBROUTINE slavereceivevalue( val )
1200 REAL(dp),
INTENT(OUT) :: val
1202 #if defined(MPI_OPT)
1203 CALL dgebr2d( blacs%levelcontext,
'All',
' ', 1, 1, val, 1, 0, 0 )
1205 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveValue bcast from master'
1208 END SUBROUTINE slavereceivevalue
1215 SUBROUTINE injectsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
1216 A, nrows, ncols, subA, subnrows, subncols )
1220 INTEGER,
INTENT(IN) :: bszr, bszc
1221 INTEGER,
INTENT(IN) :: pnr, pnc
1222 INTEGER,
INTENT(IN) :: pi, pj
1223 REAL(dp),
INTENT(OUT) :: A(:,:)
1224 INTEGER,
INTENT(IN) :: nrows, ncols
1225 REAL(dp),
INTENT(IN) :: subA(:)
1226 INTEGER,
INTENT(IN) :: subnrows, subncols
1230 INTEGER :: I, J, K, Q, R
1231 INTEGER :: thisblkr, thisblkc
1233 IF(kpdbg)
WRITE(ofu,*)
'InjectSubMatrix NR=', subnrows,
' NC=', subncols;
CALL fl(ofu)
1236 DO j = 1, ncols, bszc
1237 thisblkc = (j-1) / bszc
1238 IF ( mod(thisblkc, pnc) .EQ. pj-1 )
THEN
1240 IF ( j+r-1 .LE. ncols )
THEN
1241 DO i = 1, nrows, bszr
1242 thisblkr = (i-1) / bszr
1243 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
1245 IF ( i+q-1 .LE. nrows )
THEN
1247 a(i+q-1,j+r-1) = suba(k)
1258 IF ( k .NE. subnrows*subncols )
THEN
1259 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
1260 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows,
' subnc=', subncols;
CALL fl(ofu)
1264 IF(kpdbg)
WRITE(ofu,*)
'InjectSubMatrix done K', k;
CALL fl(ofu)
1266 END SUBROUTINE injectsubmatrix
1273 SUBROUTINE injectsubvector( bszr, pnr, pi, V, nrows, subV, subnrows )
1277 INTEGER,
INTENT(IN) :: bszr
1278 INTEGER,
INTENT(IN) :: pnr
1279 INTEGER,
INTENT(IN) :: pi
1280 INTEGER,
INTENT(OUT) :: V(:)
1281 INTEGER,
INTENT(IN) :: nrows
1282 INTEGER,
INTENT(IN) :: subV(:)
1283 INTEGER,
INTENT(IN) :: subnrows
1290 IF(kpdbg)
WRITE(ofu,*)
'InjectSubVector NR=', subnrows;
CALL fl(ofu)
1293 DO i = 1, nrows, bszr
1294 thisblkr = (i-1) / bszr
1295 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
1297 IF ( i+q-1 .LE. nrows )
THEN
1306 IF ( k .NE. subnrows )
THEN
1307 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
1308 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows;
CALL fl(ofu)
1312 IF(kpdbg)
WRITE(ofu,*)
'InjectSubVector done K', k;
CALL fl(ofu)
1314 END SUBROUTINE injectsubvector
1323 SUBROUTINE masterrecvmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1327 REAL(dp),
INTENT(OUT) :: A(:,:)
1328 INTEGER,
INTENT(IN) :: nrows, ncols
1329 REAL(dp),
INTENT(IN) :: ssubA(:)
1330 INTEGER,
INTENT(IN) :: ssubnrows, ssubncols
1335 INTEGER :: aslaverank
1336 REAL(dp),
ALLOCATABLE :: subA(:)
1337 INTEGER :: subnrows, subncols
1338 INTEGER :: bszr, bszc
1340 #if defined(MPI_OPT)
1341 INTEGER,
EXTERNAL :: NUMROC
1344 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix started';
CALL fl(ofu)
1345 CALL bsystemclock(pstats%comm%t1)
1347 #if defined(MPI_OPT)
1348 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1349 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1353 aslaverank = blacs%pgrid%map(i,j)
1355 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1356 subncols = numroc( ncols, bszc, j-1, 0, pnc )
1358 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix from ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1360 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1362 IF ( aslaverank .NE. rank )
THEN
1363 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1366 IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) )
THEN
1367 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1368 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows,
' SSNC ', ssubncols;
CALL fl(ofu)
1369 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows,
' SNC ', subncols;
CALL fl(ofu)
1374 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injecting self submatrix';
CALL fl(ofu)
1375 CALL injectsubmatrix(bszr, bszc, pnr, pnc, &
1376 i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1377 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix kept self submatrix';
CALL fl(ofu)
1380 ALLOCATE( suba( 1 : subnrows*subncols ) )
1381 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix receiving slave submatrix';
CALL fl(ofu)
1382 CALL bsystemclock(pstats%wait%t1)
1383 CALL dgerv2d( blacs%levelcontext, subnrows, subncols, &
1384 suba, subnrows, i-1, j-1 )
1385 CALL bsystemclock(pstats%wait%t2)
1386 CALL chargetime( pstats%wait%tm, pstats%wait%t2, pstats%wait%t1, pstats%wait%cnt )
1387 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injecting submatrix',i,j;
CALL fl(ofu)
1388 CALL injectsubmatrix( bszr, bszc, pnr, pnc, &
1389 i, j, a, nrows, ncols, suba, subnrows, subncols )
1390 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injected submatrix';
CALL fl(ofu)
1392 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix deallocated temp submatrix';
CALL fl(ofu)
1397 CALL bsystemclock(pstats%comm%t2)
1398 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1401 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix done';
CALL fl(ofu)
1403 END SUBROUTINE masterrecvmatrix
1410 SUBROUTINE slavesendmatrix( nrows, ncols, subA, subnrows, subncols )
1414 INTEGER,
INTENT(IN) :: nrows, ncols
1415 REAL(dp),
INTENT(IN) :: subA(:)
1416 INTEGER,
INTENT(IN) :: subnrows, subncols
1418 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendMatrix started ',subnrows,
' ',subncols;
CALL fl(ofu)
1420 CALL bsystemclock(pstats%comm%t1)
1421 #if defined(MPI_OPT)
1422 CALL dgesd2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1424 CALL bsystemclock(pstats%comm%t2)
1425 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1427 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendMatrix done';
CALL fl(ofu)
1428 END SUBROUTINE slavesendmatrix
1437 SUBROUTINE masterrecvvector( V, nrows, ssubV, ssubnrows )
1441 INTEGER,
INTENT(OUT) :: V(:)
1442 INTEGER,
INTENT(IN) :: nrows
1443 INTEGER,
INTENT(IN) :: ssubV(:)
1444 INTEGER,
INTENT(IN) :: ssubnrows
1449 INTEGER :: aslaverank
1450 INTEGER,
ALLOCATABLE :: subV(:)
1454 #if defined(MPI_OPT)
1455 INTEGER,
EXTERNAL :: NUMROC
1458 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector started';
CALL fl(ofu)
1459 CALL bsystemclock(pstats%comm%t1)
1461 #if defined(MPI_OPT)
1462 bszr = blacs%pgrid%blockszrows
1463 pnr = blacs%pgrid%nrows
1467 aslaverank = blacs%pgrid%map(i,j)
1469 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1471 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector from ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1473 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1475 IF ( aslaverank .NE. rank )
THEN
1476 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1479 IF ( ssubnrows .NE. subnrows )
THEN
1480 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1481 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows;
CALL fl(ofu)
1482 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows;
CALL fl(ofu)
1487 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injecting self subvector';
CALL fl(ofu)
1488 CALL injectsubvector(bszr, pnr, i, v, nrows, ssubv, subnrows)
1489 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector kept self subvector';
CALL fl(ofu)
1492 ALLOCATE( subv( 1 : subnrows ) )
1493 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector receiving slave subvector';
CALL fl(ofu)
1494 CALL igerv2d( blacs%levelcontext, subnrows, 1, subv, subnrows, i-1, 0 )
1495 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injecting subvector',i,j;
CALL fl(ofu)
1496 CALL injectsubvector( bszr, pnr, i, v, nrows, subv, subnrows )
1497 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injected subvector';
CALL fl(ofu)
1499 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector deallocated temp subvector';
CALL fl(ofu)
1504 CALL bsystemclock(pstats%comm%t2)
1505 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1508 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector done';
CALL fl(ofu)
1510 END SUBROUTINE masterrecvvector
1519 SUBROUTINE slavesendvector( nrows, subV, subnrows )
1523 INTEGER,
INTENT(IN) :: nrows
1524 INTEGER,
INTENT(IN) :: subV(:)
1525 INTEGER,
INTENT(IN) :: subnrows
1528 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector started ', subnrows;
CALL fl(ofu)
1530 CALL bsystemclock(pstats%comm%t1)
1532 pj = blacs%pgrid%mycol
1533 IF ( pj .GT. 0 )
THEN
1534 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector skipping since mycol>0 ',pj;
CALL fl(ofu)
1536 #if defined(MPI_OPT)
1537 CALL igesd2d( blacs%levelcontext, subnrows, 1, subv, subnrows, 0, 0 )
1541 CALL bsystemclock(pstats%comm%t2)
1542 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1544 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector done';
CALL fl(ofu)
1545 END SUBROUTINE slavesendvector
1552 SUBROUTINE plbdgemm( alpha, A, B, beta, C )
1556 REAL(dp),
INTENT(IN) :: alpha, beta
1557 REAL(dp),
INTENT(IN) :: A(:,:), B(:,:)
1558 REAL(dp),
INTENT(INOUT) :: C(:,:)
1562 REAL(dp),
ALLOCATABLE :: subA(:), subB(:), subC(:)
1563 INTEGER :: descA(9), descB(9), descC(9)
1564 INTEGER :: lldA, lldB, lldC
1565 INTEGER :: nrows, ncols, subnrows, subncols
1566 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1568 INTEGER :: ctxt, info
1569 #if defined(MPI_OPT)
1570 INTEGER,
EXTERNAL :: NUMROC
1573 IF(kpdbg)
WRITE(ofu,*)
'MasterGEMM started';
CALL fl(ofu)
1576 useblas = ( doblasonly ) .OR. &
1577 ( pblas%msmap%nslaves .EQ. 1) .OR. &
1578 ( m .LE. blacs%pgrid%blockszrows ) .OR. &
1579 ( m .LE. blacs%pgrid%blockszcols )
1582 IF(kpdbg)
WRITE(ofu,*)
'BLAS DGEMM only (not using PBLAS)';
CALL fl(ofu)
1583 CALL dgemm(
'N',
'N', m, m, m, alpha, a, m, b, m, beta, c, m )
1585 #if defined(MPI_OPT)
1586 CALL bsystemclock(pstats%mm%t1)
1587 nrows = m; ncols = m
1588 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1589 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1590 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1591 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1592 subncols = numroc( ncols, bszc, pj, 0, pnc )
1594 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM allocating subABC';
CALL fl(ofu)
1595 ALLOCATE( suba(1:subnrows*subncols) )
1596 ALLOCATE( subb(1:subnrows*subncols) )
1597 ALLOCATE( subc(1:subnrows*subncols) )
1598 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM allocated subABC';
CALL fl(ofu)
1600 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM desciniting subABC';
CALL fl(ofu)
1601 ctxt = blacs%levelcontext
1602 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1603 lldb = llda; lldc = llda
1604 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1605 CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1606 CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1607 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM desciniting subABC';
CALL fl(ofu)
1609 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending OP_DGEMM';
CALL fl(ofu)
1610 CALL masterbcastnextop( op_dgemm )
1612 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending A';
CALL fl(ofu)
1613 CALL bsystemclock(pstats%mma%t1)
1614 CALL mastersendmatrix( a, nrows, ncols, suba, subnrows, subncols )
1615 CALL bsystemclock(pstats%mma%t2)
1616 CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1618 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending B';
CALL fl(ofu)
1619 CALL bsystemclock(pstats%mmb%t1)
1620 CALL mastersendmatrix( b, nrows, ncols, subb, subnrows, subncols )
1621 CALL bsystemclock(pstats%mmb%t2)
1622 CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1624 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending C';
CALL fl(ofu)
1625 CALL bsystemclock(pstats%mmc%t1)
1626 CALL mastersendmatrix( c, nrows, ncols, subc, subnrows, subncols )
1627 CALL bsystemclock(pstats%mmc%t2)
1628 CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1630 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending alpha', alpha;
CALL fl(ofu)
1631 CALL bsystemclock(pstats%mmalpha%t1)
1632 CALL masterbcastvalue( alpha )
1633 CALL bsystemclock(pstats%mmalpha%t2)
1634 CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1636 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM sending beta', beta;
CALL fl(ofu)
1637 CALL bsystemclock(pstats%mmbeta%t1)
1638 CALL masterbcastvalue( beta )
1639 CALL bsystemclock(pstats%mmbeta%t2)
1640 CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1642 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM invoking PDGEMM';
CALL fl(ofu)
1643 CALL bsystemclock(pstats%comp%t1)
1644 CALL pdgemm(
'N',
'N', m, m, m, alpha, &
1645 suba, 1, 1, desca, &
1646 subb, 1, 1, descb, &
1649 CALL bsystemclock(pstats%comp%t2)
1650 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1651 CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1652 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM done PDGEMM';
CALL fl(ofu)
1654 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM receiving slave matrices';
CALL fl(ofu)
1655 CALL bsystemclock(pstats%mmrc%t1)
1656 CALL masterrecvmatrix( c, nrows, ncols, subc, subnrows, subncols )
1657 CALL bsystemclock(pstats%mmrc%t2)
1658 CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1659 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM received slave matrices';
CALL fl(ofu)
1661 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM deallocating subABC';
CALL fl(ofu)
1665 IF(kpdbg)
WRITE(ofu,*)
'MasterDGEMM deallocated subABC';
CALL fl(ofu)
1666 CALL bsystemclock(pstats%mm%t2)
1667 CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1671 IF(kpdbg)
WRITE(ofu,*)
'MasterGEMM done';
CALL fl(ofu)
1672 END SUBROUTINE plbdgemm
1679 SUBROUTINE slavedgemm()
1683 REAL(dp),
ALLOCATABLE :: subA(:), subB(:), subC(:)
1684 REAL(dp) :: alpha, beta
1685 INTEGER :: descA(9), descB(9), descC(9)
1686 INTEGER :: lldA, lldB, lldC
1687 INTEGER :: nrows, ncols, subnrows, subncols
1688 INTEGER :: bszr, bszc, pnr, pnc
1690 INTEGER :: ctxt, info
1691 #if defined(MPI_OPT)
1692 INTEGER,
EXTERNAL :: NUMROC
1695 CALL bsystemclock(pstats%mm%t1)
1697 #if defined(MPI_OPT)
1698 nrows = m; ncols = m
1699 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1700 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1701 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1703 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1704 subncols = numroc( ncols, bszc, pj, 0, pnc )
1706 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM allocating subABC';
CALL fl(ofu)
1707 ALLOCATE( suba(1:subnrows*subncols) )
1708 ALLOCATE( subb(1:subnrows*subncols) )
1709 ALLOCATE( subc(1:subnrows*subncols) )
1710 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM allocated subABC';
CALL fl(ofu)
1712 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM desciniting subABC';
CALL fl(ofu)
1713 ctxt = blacs%levelcontext
1714 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1715 lldb = llda; lldc = llda
1716 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1717 CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1718 CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1719 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM desciniting subABC';
CALL fl(ofu)
1721 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving A';
CALL fl(ofu)
1722 CALL bsystemclock(pstats%mma%t1)
1723 CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1724 CALL bsystemclock(pstats%mma%t2)
1725 CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1727 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving B';
CALL fl(ofu)
1728 CALL bsystemclock(pstats%mmb%t1)
1729 CALL slavereceivematrix( nrows, ncols, subb, subnrows, subncols )
1730 CALL bsystemclock(pstats%mmb%t2)
1731 CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1733 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving C';
CALL fl(ofu)
1734 CALL bsystemclock(pstats%mmc%t1)
1735 CALL slavereceivematrix( nrows, ncols, subc, subnrows, subncols )
1736 CALL bsystemclock(pstats%mmc%t2)
1737 CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1739 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving alpha';
CALL fl(ofu)
1740 CALL bsystemclock(pstats%mmalpha%t1)
1741 CALL slavereceivevalue( alpha )
1742 CALL bsystemclock(pstats%mmalpha%t2)
1743 CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1745 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving beta';
CALL fl(ofu)
1746 CALL bsystemclock(pstats%mmbeta%t1)
1747 CALL slavereceivevalue( beta )
1748 CALL bsystemclock(pstats%mmbeta%t2)
1749 CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1751 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM invoking PDGEMM';
CALL fl(ofu)
1752 CALL bsystemclock(pstats%comp%t1)
1753 CALL pdgemm(
'N',
'N', m, m, m, alpha, &
1754 suba, 1, 1, desca, &
1755 subb, 1, 1, descb, &
1758 CALL bsystemclock(pstats%comp%t2)
1759 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1760 CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1761 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM done PDGEMM';
CALL fl(ofu)
1763 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM sending result matrix to master';
CALL fl(ofu)
1764 CALL bsystemclock(pstats%mmrc%t1)
1765 CALL slavesendmatrix( nrows, ncols, subc, subnrows, subncols )
1766 CALL bsystemclock(pstats%mmrc%t2)
1767 CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1768 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM sent result matrix to master';
CALL fl(ofu)
1770 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM deallocating subABC';
CALL fl(ofu)
1774 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM deallocated subABC';
CALL fl(ofu)
1777 CALL bsystemclock(pstats%mm%t2)
1778 CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1780 END SUBROUTINE slavedgemm
1787 SUBROUTINE plbdgemv( alpha, A, x, beta, y )
1791 REAL(dp),
INTENT(IN) :: alpha, beta
1792 REAL(dp),
INTENT(IN) :: A(:,:)
1793 REAL(dp),
INTENT(IN) :: x(:)
1794 REAL(dp),
INTENT(INOUT) :: y(:)
1798 CALL dgemv(
'N', m, m, alpha, a, m, x, 1, beta, y, 1 )
1799 END SUBROUTINE plbdgemv
1806 SUBROUTINE plbdgetrf( A, piv, info )
1810 REAL(dp),
INTENT(INOUT) :: A(:,:)
1811 INTEGER,
INTENT(OUT) :: piv(:)
1812 INTEGER,
INTENT(INOUT) :: info
1816 REAL(dp),
ALLOCATABLE :: subA(:)
1817 INTEGER,
ALLOCATABLE :: subpiv(:)
1820 INTEGER :: nrows, ncols, subnrows, subncols
1821 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1824 #if defined(MPI_OPT)
1825 INTEGER,
EXTERNAL :: NUMROC
1828 IF(kpdbg)
WRITE(ofu,*)
'MasterGETRF started';
CALL fl(ofu)
1831 useblas = ( doblasonly ) .OR. &
1832 ( pblas%msmap%nslaves .EQ. 1) .OR. &
1833 ( m .LE. blacs%pgrid%blockszrows ) .OR. &
1834 ( m .LE. blacs%pgrid%blockszcols )
1839 IF(kpdbg)
WRITE(ofu,*)
'BLAS DGETRF only (not using PBLAS) with M=',m ;
CALL fl(ofu)
1840 CALL dgetrf( m, m, a, m, piv, info )
1842 #if defined(MPI_OPT)
1843 CALL bsystemclock(pstats%trf%t1)
1844 nrows = m; ncols = m
1845 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1846 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1847 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1848 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1849 subncols = numroc( ncols, bszc, pj, 0, pnc )
1851 ctxt = blacs%levelcontext
1852 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1853 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1855 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF allocating subAPiv';
CALL fl(ofu)
1856 ALLOCATE( suba(1:subnrows*subncols) )
1857 ALLOCATE( subpiv(1:subnrows+bszr) )
1858 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF allocated subAPiv';
CALL fl(ofu)
1860 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF sending OP_GETRF';
CALL fl(ofu)
1861 CALL masterbcastnextop( op_dgetrf )
1862 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF sending A';
CALL fl(ofu)
1863 CALL mastersendmatrix( a, nrows, ncols, suba, subnrows, subncols )
1864 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF invoking PDGETRF';
CALL fl(ofu)
1865 CALL bsystemclock(pstats%comp%t1)
1866 CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1867 CALL bsystemclock(pstats%comp%t2)
1868 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1869 CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1870 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF done PDGETRF';
CALL fl(ofu)
1872 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF receiving slave submatrices';
CALL fl(ofu)
1873 CALL masterrecvmatrix( a, nrows, ncols, suba, subnrows, subncols )
1874 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF received slave submatrices';
CALL fl(ofu)
1875 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF receiving slave vectors';
CALL fl(ofu)
1876 CALL masterrecvvector( piv, nrows, subpiv, subnrows )
1877 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF received slave vectors';
CALL fl(ofu)
1879 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF deallocating subAPiv';
CALL fl(ofu)
1880 DEALLOCATE( subpiv )
1882 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF deallocated subAPiv';
CALL fl(ofu)
1883 CALL bsystemclock(pstats%trf%t2)
1884 CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1888 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF done';
CALL fl(ofu)
1889 END SUBROUTINE plbdgetrf
1896 SUBROUTINE slavedgetrf()
1900 REAL(dp),
ALLOCATABLE :: subA(:)
1901 INTEGER,
ALLOCATABLE :: subpiv(:)
1904 INTEGER :: nrows, ncols, subnrows, subncols
1905 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1906 INTEGER :: ctxt, info
1907 #if defined(MPI_OPT)
1908 INTEGER,
EXTERNAL :: NUMROC
1911 CALL bsystemclock(pstats%trf%t1)
1913 #if defined(MPI_OPT)
1914 nrows = m; ncols = m
1915 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1916 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1917 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1918 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1919 subncols = numroc( ncols, bszc, pj, 0, pnc )
1921 ctxt = blacs%levelcontext
1922 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1923 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1925 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF allocating subAPiv';
CALL fl(ofu)
1926 ALLOCATE( suba(1:subnrows*subncols) )
1927 ALLOCATE( subpiv(1:subnrows+bszr) )
1928 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF allocated subAPiv';
CALL fl(ofu)
1930 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF receiving A submatrix';
CALL fl(ofu)
1931 CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1932 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF received A submatrix';
CALL fl(ofu)
1934 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF invoking PDGETRF';
CALL fl(ofu)
1935 CALL bsystemclock(pstats%comp%t1)
1936 CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1937 CALL bsystemclock(pstats%comp%t2)
1938 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1939 CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1940 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF done PDGETRF';
CALL fl(ofu)
1942 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sending result matrix to master';
CALL fl(ofu)
1943 CALL slavesendmatrix( nrows, ncols, suba, subnrows, subncols )
1944 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sent result matrix to master';
CALL fl(ofu)
1945 CALL slavesendvector( nrows, subpiv, subnrows )
1946 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sent result vector to master';
CALL fl(ofu)
1948 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF deallocating subAPiv';
CALL fl(ofu)
1949 DEALLOCATE( subpiv )
1951 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF deallocated subAPiv';
CALL fl(ofu)
1955 CALL bsystemclock(pstats%trf%t2)
1956 CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1958 END SUBROUTINE slavedgetrf
1965 SUBROUTINE plbdgetrs( nrhs, A, piv, B, info )
1969 INTEGER,
INTENT(IN) :: nrhs
1970 REAL(dp),
INTENT(IN) :: A(:,:)
1971 INTEGER,
INTENT(IN) :: piv(:)
1972 REAL(dp),
INTENT(INOUT) :: B(:,:)
1977 IF(kpdbg)
WRITE(ofu,*)
'BLAS DGETRS only (not using PBLAS)';
CALL fl(ofu)
1978 CALL dgetrs(
'N', m, nrhs, a, m, piv, b, m, info )
1979 END SUBROUTINE plbdgetrs
1986 SUBROUTINE slavedgetrs()
1988 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRS not implemented';
CALL fl(ofu)
1991 END SUBROUTINE slavedgetrs
2000 SUBROUTINE initialize( do_mpiinit, inN, inM )
2004 LOGICAL,
INTENT(IN) :: do_mpiinit
2005 INTEGER,
INTENT(IN) :: inN
2006 INTEGER,
INTENT(IN) :: inM
2011 INTEGER :: nrowsperrank
2012 INTEGER :: nspillrows
2015 INTEGER :: globrowoff
2022 use_mpiwtime = .false.
2023 #if defined(MPI_OPT)
2024 IF ( do_mpiinit )
THEN
2025 CALL mpi_init( mpierr )
2027 CALL mpi_comm_rank( ns_comm, rank, mpierr )
2028 CALL mpi_comm_size( ns_comm, nranks, mpierr)
2029 use_mpiwtime = .true.
2038 IF ( p .GT. n )
THEN
2039 IF(kpdbg)
WRITE(ofu,*)
'Detected extra ', p-n,
' ranks';
CALL fl(ofu)
2051 kpdbg=.false.; kenvvar=
'KPDBG'
2052 CALL getenv(kenvvar,kenvval)
2053 IF (kenvval.NE.
'')
THEN
2054 IF (kenvval.EQ.
'TRUE')
THEN
2062 IF(kpdbg)
WRITE(ofu,*)
'Rank ', rank,
' NRanks ', nranks;
CALL fl(ofu)
2063 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
2064 IF(kpdbg)
WRITE(ofu,*)
'------ Initialization start ------';
CALL fl(ofu)
2065 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
2068 IF ( n .LT. 1 )
THEN
2069 IF(kpdbg)
WRITE(ofu,*)
'Bad N', n;
CALL fl(ofu)
2078 nspillrows = mod(n, p)
2079 IF ( rank .LT. nspillrows )
THEN
2080 startglobrow = rank*nrowsperrank + rank + 1
2081 endglobrow = startglobrow + nrowsperrank
2082 ELSE IF ( rank .LT. p )
THEN
2083 startglobrow = rank*nrowsperrank + nspillrows + 1
2084 endglobrow = startglobrow + nrowsperrank - 1
2093 IF (.NOT.
ALLOCATED(origdiagelement))
ALLOCATE (origdiagelement((endglobrow-startglobrow+1)*m))
2098 lognbase2 = log(real(n))/log(2.0)
2099 nlevels = 1 + int(lognbase2)
2100 IF ( real(int(lognbase2)) .NE. lognbase2 )
THEN
2101 nlevels = nlevels + 1
2103 IF(kpdbg)
WRITE(ofu,*)
'NLEVELS=', nlevels;
CALL fl(ofu)
2113 usebarriers = .false.
2116 writeproblemfile = .false.
2117 writesolution = .false.
2143 IF (.NOT.
ALLOCATED(lelement))
ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
2145 CALL chargememory( nlevels*(endglobrow-startglobrow+3)*5*ptrsz )
2148 IF (.NOT.
ALLOCATED(selement))
ALLOCATE( selement(n) )
2150 CALL chargememory( n*ptrsz )
2155 DO globrow = startglobrow, endglobrow, 1
2156 globrowoff = globrow-startglobrow+1
2158 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%L))
ALLOCATE( lelement(1, globrowoff)%L(m,m) )
2159 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%D))
ALLOCATE( lelement(1, globrowoff)%D(m,m) )
2160 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%U))
ALLOCATE( lelement(1, globrowoff)%U(m,m) )
2161 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%b))
ALLOCATE( lelement(1, globrowoff)%b(m,1) )
2162 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%pivot))
ALLOCATE( lelement(1, globrowoff)%pivot(m) )
2169 CALL chargememory( (3*m*m+m)*dpsz + m*intsz )
2170 lelement(1, globrowoff)%L = 0
2171 lelement(1, globrowoff)%D = 0
2172 lelement(1, globrowoff)%U = 0
2173 lelement(1, globrowoff)%b = 0
2174 lelement(1, globrowoff)%pivot = 0
2181 IF (.NOT.
ALLOCATED(orig))
ALLOCATE( orig(endglobrow-startglobrow+1) )
2183 DO globrow = startglobrow, endglobrow, 1
2184 globrowoff = globrow-startglobrow+1
2186 IF(.NOT.
ALLOCATED(orig(globrowoff)%L))
ALLOCATE( orig(globrowoff)%L(m,m) )
2187 IF(.NOT.
ALLOCATED(orig(globrowoff)%D))
ALLOCATE( orig(globrowoff)%D(m,m) )
2188 IF(.NOT.
ALLOCATED(orig(globrowoff)%U))
ALLOCATE( orig(globrowoff)%U(m,m) )
2189 IF(.NOT.
ALLOCATED(orig(globrowoff)%b))
ALLOCATE( orig(globrowoff)%b(m,1) )
2196 orig(globrowoff)%L = 0
2197 orig(globrowoff)%D = 0
2198 orig(globrowoff)%U = 0
2199 orig(globrowoff)%b = 0
2203 CALL plbinitialize()
2206 IF(kpdbg)
WRITE(ofu,*)
' P=',p,
' N=',n,
' M=',m;
CALL fl(ofu)
2207 IF(kpdbg)
WRITE(ofu,*)
'SROW=',startglobrow,
' EROW=', endglobrow;
CALL fl(ofu)
2208 IF(kpdbg)
WRITE(ofu,*)
'------ Initialization end ------';
CALL fl(ofu)
2209 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
2210 END SUBROUTINE initialize
2217 SUBROUTINE setmatrixrow( globrow, L, D, U )
2221 INTEGER,
INTENT(IN) :: globrow
2222 REAL(dp),
INTENT(IN) :: L(:,:), D(:,:), U(:,:)
2227 INTEGER :: i, j, globrowoff
2232 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2233 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRow: Bad input globrow ',globrow;
CALL fl(ofu)
2236 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2237 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRow: Non-local globrow ',globrow;
CALL fl(ofu)
2241 globrowoff = globrow-startglobrow+1
2248 IF ( globrow .EQ. 1 )
THEN
2253 lelement(1, globrowoff)%L(i,j) = val
2256 lelement(1, globrowoff)%D(i,j) = val
2258 IF ( globrow .EQ. n )
THEN
2263 lelement(1, globrowoff)%U(i,j) = val
2270 orig(globrowoff)%L = lelement(1,globrowoff)%L
2271 orig(globrowoff)%D = lelement(1,globrowoff)%D
2272 orig(globrowoff)%U = lelement(1,globrowoff)%U
2277 END SUBROUTINE setmatrixrow
2284 SUBROUTINE setmatrixrowl( globrow, L )
2288 INTEGER,
INTENT(IN) :: globrow
2289 REAL(dp),
INTENT(IN) :: L(:,:)
2294 INTEGER :: i, j, globrowoff
2299 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2300 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowL: Bad input globrow ',globrow;
CALL fl(ofu)
2303 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2304 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowL: Non-local globrow ',globrow;
CALL fl(ofu)
2308 globrowoff = globrow-startglobrow+1
2315 IF ( globrow .EQ. 1 )
THEN
2320 lelement(1, globrowoff)%L(i,j) = val
2327 orig(globrowoff)%L = lelement(1,globrowoff)%L
2332 END SUBROUTINE setmatrixrowl
2339 SUBROUTINE setmatrixrowd( globrow, D )
2343 INTEGER,
INTENT(IN) :: globrow
2344 REAL(dp),
INTENT(IN) :: D(:,:)
2349 INTEGER :: i, j, globrowoff
2354 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2355 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowD: Bad input globrow ',globrow;
CALL fl(ofu)
2358 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2359 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowD: Non-local globrow ',globrow;
CALL fl(ofu)
2363 globrowoff = globrow-startglobrow+1
2371 lelement(1, globrowoff)%D(i,j) = val
2378 orig(globrowoff)%D = lelement(1,globrowoff)%D
2383 END SUBROUTINE setmatrixrowd
2390 SUBROUTINE setmatrixrowu( globrow, U )
2394 INTEGER,
INTENT(IN) :: globrow
2395 REAL(dp),
INTENT(IN) :: U(:,:)
2400 INTEGER :: i, j, globrowoff
2405 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2406 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowU: Bad input globrow ',globrow;
CALL fl(ofu)
2409 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2410 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowU: Non-local globrow ',globrow;
CALL fl(ofu)
2414 globrowoff = globrow-startglobrow+1
2421 IF ( globrow .EQ. n )
THEN
2426 lelement(1, globrowoff)%U(i,j) = val
2433 orig(globrowoff)%U = lelement(1,globrowoff)%U
2438 END SUBROUTINE setmatrixrowu
2445 SUBROUTINE setmatrixrowcoll( globrow, Lj, j )
2449 INTEGER,
INTENT(IN) :: globrow
2450 REAL(dp),
INTENT(IN) :: Lj(:)
2451 INTEGER,
INTENT(IN) :: j
2456 INTEGER :: i, globrowoff
2461 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2462 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Bad input globrow ',globrow;
CALL fl(ofu)
2465 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2466 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Non-local globrow ',globrow;
CALL fl(ofu)
2469 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2470 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2474 globrowoff = globrow-startglobrow+1
2480 IF ( globrow .EQ. 1 )
THEN
2485 lelement(1, globrowoff)%L(i,j) = val
2491 orig(globrowoff)%L(:,j) = lelement(1,globrowoff)%L(:,j)
2496 END SUBROUTINE setmatrixrowcoll
2503 SUBROUTINE setmatrixrowcold( globrow, Dj, j )
2507 INTEGER,
INTENT(IN) :: globrow
2508 REAL(dp),
INTENT(IN) :: Dj(:)
2509 INTEGER,
INTENT(IN) :: j
2514 INTEGER :: i, globrowoff, MPI_ERR
2519 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2520 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColD: Bad input globrow ',globrow;
CALL fl(ofu)
2524 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2525 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColD: Non-local globrow ',globrow;
CALL fl(ofu)
2529 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2530 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColD: Bad j column ',j;
CALL fl(ofu)
2534 globrowoff = globrow-startglobrow+1
2541 lelement(1, globrowoff)%D(i,j) = val
2547 orig(globrowoff)%D(:,j) = lelement(1,globrowoff)%D(:,j)
2552 END SUBROUTINE setmatrixrowcold
2559 SUBROUTINE setmatrixrowcolu( globrow, Uj, j )
2563 INTEGER,
INTENT(IN) :: globrow
2564 REAL(dp),
INTENT(IN) :: Uj(:)
2565 INTEGER,
INTENT(IN) :: j
2570 INTEGER :: i, globrowoff
2575 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2576 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColU: Bad input globrow ',globrow;
CALL fl(ofu)
2579 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2580 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColU: Non-local globrow ',globrow;
CALL fl(ofu)
2583 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2584 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2588 globrowoff = globrow-startglobrow+1
2594 IF ( globrow .EQ. n )
THEN
2599 lelement(1, globrowoff)%U(i,j) = val
2605 orig(globrowoff)%U(:,j) = lelement(1,globrowoff)%U(:,j)
2610 END SUBROUTINE setmatrixrowcolu
2617 SUBROUTINE setmatrixrhs( globrow, b )
2621 INTEGER,
INTENT(IN) :: globrow
2622 REAL(dp),
INTENT(IN) :: b(:)
2627 INTEGER :: i, globrowoff
2632 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2633 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRHS: Bad input globrow ',globrow;
CALL fl(ofu)
2636 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2637 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRHS: Non-local globrow ',globrow;
CALL fl(ofu)
2641 globrowoff = globrow-startglobrow+1
2648 lelement(1, globrowoff)%b(i,1) = val
2654 orig(globrowoff)%b = lelement(1,globrowoff)%b
2659 END SUBROUTINE setmatrixrhs
2666 SUBROUTINE getmatrixrowcoll( globrow, Lj, j )
2670 INTEGER,
INTENT(IN) :: globrow
2671 REAL(dp),
INTENT(OUT) :: Lj(:)
2672 INTEGER,
INTENT(IN) :: j
2677 INTEGER :: i, globrowoff
2682 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2683 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad input globrow ',globrow;
CALL fl(ofu)
2686 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2687 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Non-local globrow ',globrow;
CALL fl(ofu)
2690 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2691 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2695 globrowoff = globrow-startglobrow+1
2701 IF ( globrow .EQ. 1 )
THEN
2704 val = lelement(1, globrowoff)%L(i,j)
2709 END SUBROUTINE getmatrixrowcoll
2716 SUBROUTINE getmatrixrowcold( globrow, Dj, j )
2720 INTEGER,
INTENT(IN) :: globrow
2721 REAL(dp),
INTENT(OUT) :: Dj(:)
2722 INTEGER,
INTENT(IN) :: j
2727 INTEGER :: i, globrowoff
2732 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2733 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Bad input globrow ',globrow;
CALL fl(ofu)
2736 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2737 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Non-local globrow ',globrow;
CALL fl(ofu)
2740 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2741 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Bad j column ',j;
CALL fl(ofu)
2745 globrowoff = globrow-startglobrow+1
2751 val = lelement(1, globrowoff)%D(i,j)
2755 END SUBROUTINE getmatrixrowcold
2762 SUBROUTINE getmatrixrowcolu( globrow, Uj, j )
2766 INTEGER,
INTENT(IN) :: globrow
2767 REAL(dp),
INTENT(OUT) :: Uj(:)
2768 INTEGER,
INTENT(IN) :: j
2773 INTEGER :: i, globrowoff
2778 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2779 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColU: Bad input globrow ',globrow;
CALL fl(ofu)
2782 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2783 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColU: Non-local globrow ',globrow;
CALL fl(ofu)
2786 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2787 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2791 globrowoff = globrow-startglobrow+1
2797 IF ( globrow .EQ. n )
THEN
2800 val = lelement(1, globrowoff)%U(i,j)
2805 END SUBROUTINE getmatrixrowcolu
2812 SUBROUTINE getmatrixrhs( globrow, b )
2816 INTEGER,
INTENT(IN) :: globrow
2817 REAL(dp),
INTENT(OUT) :: b(:)
2822 INTEGER :: i, globrowoff
2827 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2828 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRHS: Bad input globrow ',globrow;
CALL fl(ofu)
2831 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2832 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRHS: Non-local globrow ',globrow;
CALL fl(ofu)
2836 globrowoff = globrow-startglobrow+1
2842 val = lelement(1, globrowoff)%b(i,1)
2846 END SUBROUTINE getmatrixrhs
2853 SUBROUTINE getsolutionvector( globrow, x )
2857 INTEGER,
INTENT(IN) :: globrow
2858 REAL(dp),
INTENT(OUT) :: x(:)
2868 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2869 IF(kpdbg)
WRITE(ofu,*)
'SetSolutionVector: Bad input globrow ',globrow;
CALL fl(ofu)
2872 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2873 IF(kpdbg)
WRITE(ofu,*)
'SetSolutionVector: Non-local globrow ',globrow;
CALL fl(ofu)
2881 val = selement(globrow)%x(i)
2885 END SUBROUTINE getsolutionvector
2892 SUBROUTINE setidentitytestcase
2896 INTEGER :: i, j, globrow, globrowoff
2898 IF(kpdbg)
WRITE(ofu,*)
'------ Using Identity Test Case ------';
CALL fl(ofu)
2903 DO globrow = startglobrow, endglobrow, 1
2904 globrowoff = globrow-startglobrow+1
2907 lelement(1, globrowoff)%L(i,j) = zero
2908 IF ( i .EQ. j )
THEN
2909 lelement(1, globrowoff)%D(i,j) = one
2911 lelement(1, globrowoff)%D(i,j) = zero
2913 lelement(1, globrowoff)%U(i,j) = zero
2915 lelement(1, globrowoff)%b(i,1) = one
2920 orig(globrowoff)%L = lelement(1,globrowoff)%L
2921 orig(globrowoff)%D = lelement(1,globrowoff)%D
2922 orig(globrowoff)%U = lelement(1,globrowoff)%U
2923 orig(globrowoff)%b = lelement(1,globrowoff)%b
2930 END SUBROUTINE setidentitytestcase
2938 SUBROUTINE setidentityrhs
2942 INTEGER :: i, j, globrow, globrowoff
2944 IF(kpdbg)
WRITE(ofu,*)
'------ Using Identity Test Case RHS ------';
CALL fl(ofu)
2949 DO globrow = startglobrow, endglobrow, 1
2950 globrowoff = globrow-startglobrow+1
2952 lelement(1, globrowoff)%b(i,1) = one
2957 orig(globrowoff)%b = lelement(1,globrowoff)%b
2963 END SUBROUTINE setidentityrhs
2970 SUBROUTINE setrandomtestcase
2974 REAL(dp) :: randval, rmin, rmax
2976 INTEGER,
ALLOCATABLE :: seeds(:)
2977 INTEGER :: i, j, globrow, globrowoff
2979 IF(kpdbg)
WRITE(ofu,*)
'------ Using Random Test Case ------';
CALL fl(ofu)
2984 CALL random_seed( size=rngwidth)
2985 ALLOCATE( seeds(rngwidth) )
2987 seeds(i) = i*(rank+100)*p
2989 CALL random_seed( put=seeds(1:rngwidth) )
2995 IF( writeproblemfile )
THEN
2996 IF(kpdbg)
WRITE(pfu,*) n*m;
CALL fl(pfu)
2997 IF(kpdbg)
WRITE(pfu,*) (3*n-2)*m*m;
CALL fl(pfu)
3005 DO globrow = startglobrow, endglobrow, 1
3006 globrowoff = globrow-startglobrow+1
3009 IF ( globrow .EQ. 1 )
THEN
3012 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3013 IF( writeproblemfile )
THEN
3014 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow-2)*m+j, randval
3017 lelement(1, globrowoff)%L(i,j) = randval
3019 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3020 lelement(1, globrowoff)%D(i,j) = randval
3021 IF( writeproblemfile )
THEN
3022 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow-1)*m+j, randval
3025 IF ( globrow .EQ. n )
THEN
3028 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3029 IF( writeproblemfile )
THEN
3030 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow)*m+j, randval
3033 lelement(1, globrowoff)%U(i,j) = randval
3035 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3036 lelement(1, globrowoff)%b(i,1) = randval
3042 orig(globrowoff)%L = lelement(1,globrowoff)%L
3043 orig(globrowoff)%D = lelement(1,globrowoff)%D
3044 orig(globrowoff)%U = lelement(1,globrowoff)%U
3045 orig(globrowoff)%b = lelement(1,globrowoff)%b
3048 IF (writeproblemfile)
THEN
3049 DO globrow = startglobrow, endglobrow, 1
3050 globrowoff = globrow-startglobrow+1
3052 IF(kpdbg)
WRITE(pfu,*) lelement(1,globrowoff)%b(i,1);
CALL fl(pfu)
3061 END SUBROUTINE setrandomtestcase
3069 SUBROUTINE setrandomrhs( randseedoff )
3073 INTEGER,
INTENT(IN) :: randseedoff
3077 REAL(dp) :: randval, rmin, rmax
3079 INTEGER,
ALLOCATABLE :: seeds(:)
3080 INTEGER :: i, j, globrow, globrowoff
3082 IF(kpdbg)
WRITE(ofu,*)
'------ Using Random Test Case RHS ------';
CALL fl(ofu)
3087 CALL random_seed( size=rngwidth)
3088 ALLOCATE( seeds(rngwidth) )
3090 seeds(i) = i*(rank+100+34+randseedoff)*p
3092 CALL random_seed( put=seeds(1:rngwidth) )
3100 DO globrow = startglobrow, endglobrow, 1
3101 globrowoff = globrow-startglobrow+1
3103 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3104 lelement(1, globrowoff)%b(i,1) = randval
3110 orig(globrowoff)%b = lelement(1,globrowoff)%b
3116 END SUBROUTINE setrandomrhs
3123 SUBROUTINE finalize( do_mpifinalize )
3127 LOGICAL,
INTENT(IN) :: do_mpifinalize
3134 INTEGER :: tempinvcount, tempmulcount, tempsolcount
3137 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3138 IF(kpdbg)
WRITE(ofu,*)
'------ Finalizing start ------';
CALL fl(ofu)
3146 IF (
ALLOCATED(lelement) )
THEN
3147 DO level = lbound(lelement,1), ubound(lelement,1)
3148 DO globrow = lbound(lelement,2), ubound(lelement,2)
3149 IF (
ALLOCATED(lelement(level,globrow)%L) )
THEN
3150 DEALLOCATE( lelement(level, globrow)%L )
3152 IF (
ALLOCATED(lelement(level,globrow)%D) )
THEN
3153 DEALLOCATE( lelement(level, globrow)%D )
3155 IF (
ALLOCATED(lelement(level,globrow)%U) )
THEN
3156 DEALLOCATE( lelement(level, globrow)%U )
3158 IF (
ALLOCATED(lelement(level,globrow)%b) )
THEN
3159 DEALLOCATE( lelement(level, globrow)%b )
3161 IF (
ALLOCATED(lelement(level,globrow)%pivot) )
THEN
3162 DEALLOCATE( lelement(level, globrow)%pivot )
3166 DEALLOCATE(lelement)
3168 IF (
ALLOCATED(orig) )
THEN
3169 DO globrow = lbound(orig,1), ubound(orig,1)
3170 IF (
ALLOCATED(orig(globrow)%L) )
THEN
3171 DEALLOCATE( orig(globrow)%L )
3173 IF (
ALLOCATED(orig(globrow)%D) )
THEN
3174 DEALLOCATE( orig(globrow)%D )
3176 IF (
ALLOCATED(orig(globrow)%U) )
THEN
3177 DEALLOCATE( orig(globrow)%U )
3179 IF (
ALLOCATED(orig(globrow)%b) )
THEN
3180 DEALLOCATE( orig(globrow)%b )
3182 IF (
ALLOCATED(orig(globrow)%pivot) )
THEN
3183 DEALLOCATE( orig(globrow)%pivot )
3189 IF (
ALLOCATED(selement) )
THEN
3190 DO globrow = 1, n, 1
3191 IF (
ALLOCATED(selement(globrow)%x) )
THEN
3192 DEALLOCATE( selement(globrow)%x )
3195 DEALLOCATE(selement)
3199 #if defined(MPI_OPT)
3200 IF ( usebarriers )
THEN
3201 IF(kpdbg)
WRITE(ofu,*)
'Barrier in finalize';
CALL fl(ofu)
3202 CALL mpi_barrier( ns_comm, mpierr )
3203 IF(kpdbg)
WRITE(ofu,*)
'Done barrier in finalize';
CALL fl(ofu)
3206 IF ( do_mpifinalize )
THEN
3207 CALL mpi_finalize( mpierr )
3212 tempmulcount=totmatmulcount;
IF ( tempmulcount .LE. 0 ) tempmulcount = 1
3213 tempinvcount=totinvcount;
IF ( tempinvcount .LE. 0 ) tempinvcount = 1
3214 tempsolcount=totmatsolcount;
IF ( tempsolcount .LE. 0 ) tempsolcount = 1
3215 IF(kpdbg)
WRITE(ofu,*)
'N=', n,
' M=', m,
' P=', p,
' rank=', rank
3216 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory ', membytes/1e6,
' MB'
3217 IF(kpdbg)
WRITE(ofu,
'(A,F8.4,A)')
'Computation ', tottime-totcommtime,
' sec'
3218 IF(kpdbg)
WRITE(ofu,
'(A,F8.4,A)')
'Communication ', totcommtime,
' sec'
3219 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix inv ',totinvcount,
' * ', &
3220 totinvtime/tempinvcount,
' sec = ', totinvtime,
' sec'
3221 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix mul ',totmatmulcount,
' * ', &
3222 totmatmultime/tempmulcount,
' sec = ', totmatmultime,
' sec'
3223 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix sol ',totmatsolcount,
' * ', &
3224 totmatsoltime/tempsolcount,
' sec = ', totmatsoltime,
' sec'
3225 IF(kpdbg)
WRITE(ofu,*)
'Finalized rank ', rank
3226 IF(kpdbg)
WRITE(ofu,*)
'------ Finalization end ------';
CALL fl(ofu)
3227 END SUBROUTINE finalize
3234 LOGICAL FUNCTION iseven( num )
3235 INTEGER,
INTENT(IN) :: num
3236 iseven = mod(num,2) .EQ. 0
3244 LOGICAL FUNCTION isodd( num )
3245 INTEGER,
INTENT(IN) :: num
3246 isodd = (.NOT. iseven(num))
3254 FUNCTION lr2gr( locrow, level )
result ( globrow )
3258 INTEGER,
INTENT(IN) :: locrow
3259 INTEGER,
INTENT(IN) :: level
3271 levelloop:
DO i = level-1, 1, -1
3272 globrow = 2*globrow - 1
3278 IF ( ((2.0 ** (level-1)) * (locrow-1) + 1) .NE. globrow )
THEN
3292 FUNCTION gr2lr( globrow, level )
result ( locrow )
3296 INTEGER,
INTENT(IN) :: globrow
3297 INTEGER,
INTENT(IN) :: level
3310 levelloop:
DO i = 1, level-1, 1
3311 IF ( iseven(locrow) )
THEN
3315 locrow = (locrow+1) / 2
3326 FUNCTION gr2rank( globrow )
result ( outrank )
3330 INTEGER,
INTENT(IN) :: globrow
3339 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
3344 IF ( globrow .LE. (m+1)*spill )
THEN
3345 outrank = (globrow-1)/(m+1)
3347 outrank = (globrow-1 - (m+1)*spill)/m + spill
3352 END FUNCTION gr2rank
3359 FUNCTION lr2rank( locrow, level )
result ( outrank )
3363 INTEGER,
INTENT(IN) :: locrow
3364 INTEGER,
INTENT(IN) :: level
3372 globrow = lr2gr( locrow, level )
3373 outrank = gr2rank( globrow )
3376 END FUNCTION lr2rank
3385 #if defined(MPI_OPT)
3386 SUBROUTINE computeforwardoddrowhats(locrow, level, startlocrow, endlocrow,bonly)
3390 INTEGER,
INTENT(IN) :: locrow
3391 INTEGER,
INTENT(IN) :: level
3392 INTEGER,
INTENT(IN) :: startlocrow
3393 INTEGER,
INTENT(IN) :: endlocrow
3394 LOGICAL,
INTENT(IN) :: bonly
3399 INTEGER :: globrowoff
3400 INTEGER :: prevglobrowoff
3401 INTEGER :: nextglobrowoff
3406 IF ( level .GE. nlevels )
THEN
3407 IF(kpdbg)
WRITE(ofu,*)
'mat-mul last lvl: no op ';
CALL fl(ofu)
3408 IF ( rank .NE. 0 )
THEN
3409 IF(kpdbg)
WRITE(ofu,*)
'mat-mul last lvl: impossible ',rank;
CALL fl(ofu)
3418 IF ( iseven( locrow ) )
THEN
3419 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: odd only ',globrow,
' ',locrow;
CALL fl(ofu); stop
3421 IF ( .NOT. ( (startlocrow .LE. locrow) .AND. (locrow .LE. endlocrow) ) )
THEN
3422 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: locrow prob ',globrow,
' ',locrow;
CALL fl(ofu);stop
3426 globrow = lr2gr( locrow, level )
3427 globrowoff = globrow - startglobrow + 1
3428 IF(kpdbg)
WRITE(ofu,*)
' Compute mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
3433 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D ) )
THEN
3434 IF(kpdbg)
WRITE(ofu,*)
' Copying bad D';
CALL fl(ofu); stop
3436 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L ) )
THEN
3437 IF(kpdbg)
WRITE(ofu,*)
' Copying bad L';
CALL fl(ofu); stop
3439 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U ) )
THEN
3440 IF(kpdbg)
WRITE(ofu,*)
' Copying bad U';
CALL fl(ofu); stop
3442 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b ) )
THEN
3443 IF(kpdbg)
WRITE(ofu,*)
' Copying bad b';
CALL fl(ofu); stop
3449 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%D ) )
THEN
3450 IF(kpdbg)
WRITE(ofu,*)
' Copying bad D +1';
CALL fl(ofu); stop
3452 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%L ) )
THEN
3453 IF(kpdbg)
WRITE(ofu,*)
' Copying bad L +1';
CALL fl(ofu); stop
3455 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%U ) )
THEN
3456 IF(kpdbg)
WRITE(ofu,*)
' Copying bad U +1';
CALL fl(ofu); stop
3458 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%b ) )
THEN
3459 IF(kpdbg)
WRITE(ofu,*)
' Copying bad b +1';
CALL fl(ofu); stop
3465 IF ( .NOT. bonly )
THEN
3466 lelement(level+1,globrowoff)%D = lelement(level,globrowoff)%D
3468 lelement(level+1,globrowoff)%b = lelement(level,globrowoff)%b
3473 IF ( lr2rank(locrow-1,level) .LT. 0 )
THEN
3475 IF ( .NOT. bonly )
THEN
3476 lelement(level+1,globrowoff)%U = 0
3477 IF(kpdbg)
WRITE(ofu,*)
' ZERO L';
CALL fl(ofu)
3480 IF ( locrow .EQ. startlocrow )
THEN
3482 ELSE IF ( locrow .GT. startlocrow )
THEN
3483 prevglobrowoff = lr2gr( locrow-1, level ) - startglobrow + 1
3485 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: impossible prev';
CALL fl(ofu); stop
3488 IF ( .NOT. bonly )
THEN
3492 CALL bsystemclock(mattimer1)
3493 CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3494 lelement(level,prevglobrowoff)%U, &
3495 one, lelement(level+1,globrowoff)%D )
3496 CALL bsystemclock(mattimer2)
3497 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3498 IF(kpdbg)
WRITE(ofu,*)
' Dtop';
CALL fl(ofu)
3503 CALL bsystemclock(mattimer1)
3504 CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3505 lelement(level,prevglobrowoff)%L, &
3506 zero, lelement(level+1,globrowoff)%L )
3507 CALL bsystemclock(mattimer2)
3508 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3509 IF(kpdbg)
WRITE(ofu,*)
' L';
CALL fl(ofu)
3515 CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
3516 lelement(level,prevglobrowoff)%b(:,1), &
3517 one, lelement(level+1,globrowoff)%b(:,1) )
3518 IF(kpdbg)
WRITE(ofu,*)
' btop';
CALL fl(ofu)
3524 IF ( lr2rank(locrow+1,level) .LT. 0 )
THEN
3526 IF ( .NOT. bonly )
THEN
3527 lelement(level+1,globrowoff)%U = 0
3528 IF(kpdbg)
WRITE(ofu,*)
' ZERO U';
CALL fl(ofu)
3531 IF ( locrow .EQ. endlocrow )
THEN
3532 nextglobrowoff = endglobrow - startglobrow + 2
3533 ELSE IF ( locrow .LT. endlocrow )
THEN
3534 nextglobrowoff = lr2gr( locrow+1, level ) - startglobrow + 1
3536 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: impossible next';
CALL fl(ofu); stop
3539 IF ( .NOT. bonly )
THEN
3543 CALL bsystemclock(mattimer1)
3544 CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3545 lelement(level,nextglobrowoff)%L, &
3546 one, lelement(level+1,globrowoff)%D )
3547 CALL bsystemclock(mattimer2)
3548 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3549 IF(kpdbg)
WRITE(ofu,*)
' Dbot';
CALL fl(ofu)
3554 CALL bsystemclock(mattimer1)
3555 CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3556 lelement(level,nextglobrowoff)%U, &
3557 zero, lelement(level+1,globrowoff)%U )
3558 CALL bsystemclock(mattimer2)
3559 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3560 IF(kpdbg)
WRITE(ofu,*)
' U';
CALL fl(ofu)
3566 CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
3567 lelement(level,nextglobrowoff)%b(:,1), &
3568 one, lelement(level+1,globrowoff)%b(:,1) )
3569 IF(kpdbg)
WRITE(ofu,*)
' bbot';
CALL fl(ofu)
3571 END SUBROUTINE computeforwardoddrowhats
3578 SUBROUTINE forwardsolve
3585 INTEGER :: globrowoff
3586 INTEGER :: startlocrow
3587 INTEGER :: endlocrow
3588 INTEGER :: rowoffset
3590 INTEGER :: nbrmpireqindx
3591 INTEGER :: nbrmpireqcnt
3592 INTEGER :: nbrmpinirecvs
3593 INTEGER :: nbrmpinisends
3594 INTEGER :: nbrmpireq(6)
3595 INTEGER :: nbrmpierr(6)
3596 INTEGER :: mpiwaiterr
3598 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
3599 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
3600 INTEGER :: waitmatchindex
3605 CALL bsystemclock(globtimer1)
3608 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3609 IF(kpdbg)
WRITE(ofu,*)
'------ Forward solve start ------';
CALL fl(ofu)
3610 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3613 CALL plbforwardinitialize()
3619 forwardsolveloop:
DO level = 1, nlevels, 1
3621 IF ( usebarriers )
THEN
3622 IF(kpdbg)
WRITE(ofu,*)
'Barrier at forward level ', level;
CALL fl(ofu)
3623 CALL mpi_barrier( ns_comm, mpierr )
3624 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at forward level ', level;
CALL fl(ofu)
3632 DO globrow = startglobrow, endglobrow, 1
3633 locrow = gr2lr( globrow, level )
3634 IF ( locrow .GT. 0 )
THEN
3635 IF ( locrow .LT. startlocrow )
THEN
3636 startlocrow = locrow
3638 IF ( locrow .GT. endlocrow )
THEN
3647 IF ( startlocrow .GT. endlocrow )
THEN
3648 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at forward level ', level;
CALL fl(ofu)
3649 CALL plbforwardinitializelevel( level, .false. )
3650 CALL plbforwardfinalizelevel( level, .false. )
3651 cycle forwardsolveloop
3653 CALL plbforwardinitializelevel( level, .true. )
3656 IF(kpdbg)
WRITE(ofu,*)
'**** Forward level ', level,
' ****';
CALL fl(ofu)
3661 IF ( isodd(startlocrow) .AND. &
3662 (lr2rank(startlocrow-1,level) .GE. 0) )
THEN
3664 IF ( .NOT.
ALLOCATED( lelement(level, globrowoff)%L ) )
THEN
3665 ALLOCATE( lelement(level, globrowoff)%L(m,m) )
3666 ALLOCATE( lelement(level, globrowoff)%U(m,m) )
3667 ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3668 CALL chargememory( (2*m*m+m)*dpsz )
3670 lelement(level, globrowoff)%L = 0
3671 lelement(level, globrowoff)%U = 0
3672 lelement(level, globrowoff)%b = 0
3674 IF ( isodd(endlocrow) .AND. &
3675 (lr2rank(endlocrow+1,level) .GE. 0) )
THEN
3676 globrowoff = endglobrow-startglobrow+2
3677 IF ( .NOT.
ALLOCATED( lelement(level, globrowoff)%L ) )
THEN
3678 ALLOCATE( lelement(level, globrowoff)%L(m,m) )
3679 ALLOCATE( lelement(level, globrowoff)%U(m,m) )
3680 ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3681 CALL chargememory( (2*m*m+m)*dpsz )
3683 lelement(level, globrowoff)%L = 0
3684 lelement(level, globrowoff)%U = 0
3685 lelement(level, globrowoff)%b = 0
3691 IF ( level .LT. nlevels )
THEN
3692 DO locrow = startlocrow, endlocrow, 1
3693 IF ( isodd(locrow) )
THEN
3694 globrow = lr2gr( locrow, level )
3695 globrowoff = globrow-startglobrow+1
3696 IF ( .NOT.
ALLOCATED( lelement(level+1, globrowoff)%L ) )
THEN
3697 ALLOCATE( lelement(level+1, globrowoff)%L(m,m) )
3698 ALLOCATE( lelement(level+1, globrowoff)%D(m,m) )
3699 ALLOCATE( lelement(level+1, globrowoff)%U(m,m) )
3700 ALLOCATE( lelement(level+1, globrowoff)%b(m,1) )
3701 CALL chargememory( (2*m*m+m)*dpsz )
3703 lelement(level+1, globrowoff)%L = 0
3704 lelement(level+1, globrowoff)%D = 0
3705 lelement(level+1, globrowoff)%U = 0
3706 lelement(level+1, globrowoff)%b = 0
3714 DO nbrmpireqindx = 1, 6
3715 nbrmpireq(nbrmpireqindx) = mpi_request_null
3724 nbrrank = lr2rank(startlocrow-1,level)
3725 IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
3731 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',nbrrank,
' ';
CALL fl(ofu)
3733 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3734 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad L';
CALL fl(ofu); stop
3736 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3737 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad U';
CALL fl(ofu); stop
3739 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3740 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad b';
CALL fl(ofu); stop
3743 CALL bsystemclock(loctimer1)
3744 CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3745 nbrrank, 1, ns_comm, nbrmpireq(nbrmpireqindx), &
3746 nbrmpierr(nbrmpireqindx) )
3747 nbrmpireqindx = nbrmpireqindx + 1
3748 nbrmpinirecvs = nbrmpinirecvs + 1
3749 IF(kpdbg)
WRITE(ofu,*)
' Irecv 1 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3751 CALL mpi_irecv( lelement(level, globrowoff)%U, m*m, mpi_real8, &
3752 nbrrank, 2, ns_comm, nbrmpireq(nbrmpireqindx), &
3753 nbrmpierr(nbrmpireqindx))
3754 nbrmpireqindx = nbrmpireqindx + 1
3755 nbrmpinirecvs = nbrmpinirecvs + 1
3756 IF(kpdbg)
WRITE(ofu,*)
' Irecv 2 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3758 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3759 nbrrank, 3, ns_comm, nbrmpireq(nbrmpireqindx), &
3760 nbrmpierr(nbrmpireqindx))
3761 nbrmpireqindx = nbrmpireqindx + 1
3762 nbrmpinirecvs = nbrmpinirecvs + 1
3763 IF(kpdbg)
WRITE(ofu,*)
' Irecv 3 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3764 CALL bsystemclock(loctimer2)
3765 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3768 nbrrank = lr2rank(endlocrow+1,level)
3769 IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
3774 globrowoff = endglobrow-startglobrow+2
3775 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ', nbrrank;
CALL fl(ofu)
3777 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3778 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad L';
CALL fl(ofu); stop
3780 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3781 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad U';
CALL fl(ofu); stop
3783 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3784 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad b';
CALL fl(ofu); stop
3787 CALL bsystemclock(loctimer1)
3788 CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3789 nbrrank, 4, ns_comm, nbrmpireq(nbrmpireqindx), &
3790 nbrmpierr(nbrmpireqindx))
3791 nbrmpireqindx = nbrmpireqindx + 1
3792 nbrmpinirecvs = nbrmpinirecvs + 1
3793 IF(kpdbg)
WRITE(ofu,*)
' Irecv 4 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3795 CALL mpi_irecv( lelement(level, globrowoff)%U, m*m, mpi_real8, &
3796 nbrrank, 5, ns_comm, nbrmpireq(nbrmpireqindx), &
3797 nbrmpierr(nbrmpireqindx))
3798 nbrmpireqindx = nbrmpireqindx + 1
3799 nbrmpinirecvs = nbrmpinirecvs + 1
3800 IF(kpdbg)
WRITE(ofu,*)
' Irecv 5 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3802 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3803 nbrrank, 6, ns_comm, nbrmpireq(nbrmpireqindx), &
3804 nbrmpierr(nbrmpireqindx))
3805 nbrmpireqindx = nbrmpireqindx + 1
3806 nbrmpinirecvs = nbrmpinirecvs + 1
3807 IF(kpdbg)
WRITE(ofu,*)
' Irecv 6 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3808 CALL bsystemclock(loctimer2)
3809 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3816 DO locrow = startlocrow, endlocrow, 1
3817 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
3818 globrow = lr2gr( locrow, level )
3819 globrowoff = globrow-startglobrow+1
3821 IF(kpdbg)
WRITE(ofu,*)
' Compute even hats ',globrow,
' ', locrow;
CALL fl(ofu)
3826 IF ( level .EQ. nlevels )
THEN
3827 IF ( (rank .NE. 0) .OR. &
3828 (startlocrow .NE. endlocrow) .OR. &
3829 (locrow .NE. 1) .OR. (globrow .NE. 1) )
THEN
3830 IF(kpdbg)
WRITE(ofu,*)
' EVEN ERROR ',globrow,
' ', locrow;
CALL fl(ofu)
3834 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D) )
THEN
3835 IF(kpdbg)
WRITE(ofu,*)
'Chats bad D';
CALL fl(ofu); stop
3837 IF ( .NOT.
ALLOCATED(lelement(1,globrowoff)%pivot) )
THEN
3838 IF(kpdbg)
WRITE(ofu,*)
'Chats bad pivot';
CALL fl(ofu); stop
3840 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3841 IF(kpdbg)
WRITE(ofu,*)
'Chats bad L';
CALL fl(ofu); stop
3843 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3844 IF(kpdbg)
WRITE(ofu,*)
'Chats bad U';
CALL fl(ofu); stop
3846 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3847 IF(kpdbg)
WRITE(ofu,*)
'Chats bad b';
CALL fl(ofu); stop
3853 CALL bsystemclock(mattimer1)
3854 CALL plbdgetrf( lelement(level,globrowoff)%D, &
3855 lelement(1,globrowoff)%pivot, blaserr )
3856 CALL bsystemclock(mattimer2)
3857 CALL chargetime( totinvtime, mattimer2, mattimer1, totinvcount )
3858 IF (blaserr .NE. 0)
THEN
3859 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRF failed ', blaserr;
CALL fl(ofu); stop
3865 IF ( level .LT. nlevels )
THEN
3866 CALL bsystemclock(mattimer1)
3867 CALL plbdgetrs( m, lelement(level,globrowoff)%D, &
3868 lelement(1,globrowoff)%pivot, &
3869 lelement(level,globrowoff)%L, blaserr )
3870 CALL bsystemclock(mattimer2)
3871 CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3872 IF (blaserr .NE. 0)
THEN
3873 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRS L failed';
CALL fl(ofu); stop
3875 CALL bsystemclock(mattimer1)
3876 CALL plbdgetrs( m, lelement(level,globrowoff)%D, &
3877 lelement(1,globrowoff)%pivot, &
3878 lelement(level,globrowoff)%U, blaserr )
3879 CALL bsystemclock(mattimer2)
3880 CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3881 IF (blaserr .NE. 0)
THEN
3882 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRS U failed';
CALL fl(ofu); stop
3889 CALL plbdgetrs( 1, lelement(level,globrowoff)%D, &
3890 lelement(1,globrowoff)%pivot, &
3891 lelement(level,globrowoff)%b, blaserr )
3892 IF (blaserr .NE. 0)
THEN
3893 IF(kpdbg)
WRITE(ofu,*)
'PLBDGETRS b failed';
CALL fl(ofu); stop
3900 DO rowoffset = -1, 1, 2
3901 nbrrank = lr2rank(locrow+rowoffset, level)
3902 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
3903 CALL bsystemclock(loctimer1)
3904 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3905 IF(kpdbg)
WRITE(ofu,*)
'ISend bad L';
CALL fl(ofu); stop
3907 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3908 IF(kpdbg)
WRITE(ofu,*)
'ISend bad U';
CALL fl(ofu); stop
3910 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3911 IF(kpdbg)
WRITE(ofu,*)
'ISend bad b';
CALL fl(ofu); stop
3914 IF(kpdbg)
WRITE(ofu,*)
' ISend ',globrow,
' ',locrow,
' ',nbrrank;
CALL fl(ofu)
3915 globrowoff = globrow-startglobrow+1
3916 stag = (((1-rowoffset))/2)*3
3917 CALL mpi_isend( lelement(level,globrowoff)%L, m*m, mpi_real8, &
3918 nbrrank, stag+1, ns_comm, &
3919 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3920 nbrmpireqindx = nbrmpireqindx + 1
3921 nbrmpinisends = nbrmpinisends + 1
3922 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+1,
' ',globrowoff;
CALL fl(ofu)
3924 CALL mpi_isend( lelement(level,globrowoff)%U, m*m, mpi_real8, &
3925 nbrrank, stag+2, ns_comm, &
3926 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3927 nbrmpireqindx = nbrmpireqindx + 1
3928 nbrmpinisends = nbrmpinisends + 1
3929 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+2,
' ',globrowoff;
CALL fl(ofu)
3931 CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
3932 nbrrank, stag+3, ns_comm, &
3933 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3934 nbrmpireqindx = nbrmpireqindx + 1
3935 nbrmpinisends = nbrmpinisends + 1
3936 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+3,
' ',globrowoff;
CALL fl(ofu)
3937 CALL bsystemclock(loctimer2)
3938 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3947 DO locrow = startlocrow, endlocrow, 1
3948 globrow = lr2gr( locrow, level )
3949 IF ( isodd( locrow ) .AND. (level .NE. nlevels) )
THEN
3950 IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) )
THEN
3951 IF(kpdbg)
WRITE(ofu,*)
' Precomp mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
3952 CALL computeforwardoddrowhats( locrow, level, &
3953 startlocrow, endlocrow, .false. )
3963 nbrmpireqcnt = nbrmpireqindx - 1
3964 IF ( nbrmpireqcnt .GT. 0 )
THEN
3965 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
3966 CALL bsystemclock(loctimer1)
3967 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
3968 CALL bsystemclock(loctimer2)
3969 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3975 IF ( isodd(startlocrow) )
THEN
3976 globrow = lr2gr( startlocrow, level )
3977 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',startlocrow;
CALL fl(ofu)
3978 CALL computeforwardoddrowhats( startlocrow, level, &
3979 startlocrow, endlocrow, .false. )
3981 IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) )
THEN
3982 globrow = lr2gr( endlocrow, level )
3983 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',endlocrow;
CALL fl(ofu)
3984 CALL computeforwardoddrowhats( endlocrow, level, &
3985 startlocrow, endlocrow, .false. )
3987 CALL plbforwardfinalizelevel( level, .true. )
3988 END DO forwardsolveloop
3991 CALL plbforwardfinalize()
3994 matdirtied = .false.
3995 rhsdirtied = .false.
3998 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
3999 IF(kpdbg)
WRITE(ofu,*)
'------ Forward solve end ------';
CALL fl(ofu)
4001 CALL bsystemclock(globtimer2)
4002 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4004 END SUBROUTINE forwardsolve
4012 SUBROUTINE forwardupdateb
4019 INTEGER :: globrowoff
4020 INTEGER :: startlocrow
4021 INTEGER :: endlocrow
4022 INTEGER :: rowoffset
4024 INTEGER :: nbrmpireqindx
4025 INTEGER :: nbrmpireqcnt
4026 INTEGER :: nbrmpinirecvs
4027 INTEGER :: nbrmpinisends
4028 INTEGER :: nbrmpireq(6)
4029 INTEGER :: nbrmpierr(6)
4030 INTEGER :: mpiwaiterr
4032 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
4033 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4034 INTEGER :: waitmatchindex
4040 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4041 IF(kpdbg)
WRITE(ofu,*)
'------ Forward updateb start ------';
CALL fl(ofu)
4042 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4045 IF ( matdirtied )
THEN
4046 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4047 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied matrix ------';
CALL fl(ofu)
4048 IF(kpdbg)
WRITE(ofu,*)
' ------ Forward solve, not updateb... ------';
CALL fl(ofu)
4053 CALL bsystemclock(globtimer1)
4056 CALL plbforwardinitialize()
4062 forwardsolveloop:
DO level = 1, nlevels, 1
4064 IF ( usebarriers )
THEN
4065 IF(kpdbg)
WRITE(ofu,*)
'Barrier at forward updateb level ', level;
CALL fl(ofu)
4066 CALL mpi_barrier( ns_comm, mpierr )
4067 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at forward updateb level ', level;
CALL fl(ofu)
4075 DO globrow = startglobrow, endglobrow, 1
4076 locrow = gr2lr( globrow, level )
4077 IF ( locrow .GT. 0 )
THEN
4078 IF ( locrow .LT. startlocrow )
THEN
4079 startlocrow = locrow
4081 IF ( locrow .GT. endlocrow )
THEN
4090 IF ( startlocrow .GT. endlocrow )
THEN
4091 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at forward updateb level ',level;
CALL fl(ofu)
4092 CALL plbforwardinitializelevel( level, .false. )
4093 CALL plbforwardfinalizelevel( level, .false. )
4094 cycle forwardsolveloop
4096 CALL plbforwardinitializelevel( level, .true. )
4099 IF(kpdbg)
WRITE(ofu,*)
'**** Forward updateb level ', level,
' ****';
CALL fl(ofu)
4105 IF ( isodd(startlocrow) .AND. &
4106 (lr2rank(startlocrow-1,level) .GE. 0) )
THEN
4108 IF( .NOT. (
ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
4109 ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
4110 ALLOCATED( lelement(level, globrowoff)%b ) ) )
THEN
4111 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +0 memory error';
CALL fl(ofu); stop
4113 lelement(level, globrowoff)%b = 0
4115 IF ( isodd(endlocrow) .AND. &
4116 (lr2rank(endlocrow+1,level) .GE. 0) )
THEN
4117 globrowoff = endglobrow-startglobrow+2
4118 IF ( .NOT. (
ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
4119 ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
4120 ALLOCATED( lelement(level, globrowoff)%b ) ) )
THEN
4121 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +2 memory error';
CALL fl(ofu); stop
4123 lelement(level, globrowoff)%b = 0
4130 IF ( level .LT. nlevels )
THEN
4131 DO locrow = startlocrow, endlocrow, 1
4132 IF ( isodd(locrow) )
THEN
4133 globrow = lr2gr( locrow, level )
4134 globrowoff = globrow-startglobrow+1
4135 IF ( .NOT. (
ALLOCATED( lelement(level+1, globrowoff)%L ) .AND. &
4136 ALLOCATED( lelement(level+1, globrowoff)%D ) .AND. &
4137 ALLOCATED( lelement(level+1, globrowoff)%U ) .AND. &
4138 ALLOCATED( lelement(level+1, globrowoff)%b ) ) )
THEN
4139 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +1 memory error';
CALL fl(ofu); stop
4141 lelement(level+1, globrowoff)%b = 0
4149 DO nbrmpireqindx = 1, 6
4150 nbrmpireq(nbrmpireqindx) = mpi_request_null
4159 nbrrank = lr2rank(startlocrow-1,level)
4160 IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
4166 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',nbrrank,
' ';
CALL fl(ofu)
4168 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
4169 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad L';
CALL fl(ofu); stop
4171 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
4172 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad U';
CALL fl(ofu); stop
4174 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
4175 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad b';
CALL fl(ofu); stop
4178 CALL bsystemclock(loctimer1)
4179 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
4180 nbrrank, 3, ns_comm, nbrmpireq(nbrmpireqindx), &
4181 nbrmpierr(nbrmpireqindx))
4182 nbrmpireqindx = nbrmpireqindx + 1
4183 nbrmpinirecvs = nbrmpinirecvs + 1
4184 IF(kpdbg)
WRITE(ofu,*)
' Irecv 3 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
4185 CALL bsystemclock(loctimer2)
4186 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4189 nbrrank = lr2rank(endlocrow+1,level)
4190 IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
4195 globrowoff = endglobrow-startglobrow+2
4196 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ', nbrrank;
CALL fl(ofu)
4198 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
4199 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad L';
CALL fl(ofu); stop
4201 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
4202 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad U';
CALL fl(ofu); stop
4204 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
4205 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad b';
CALL fl(ofu); stop
4208 CALL bsystemclock(loctimer1)
4209 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
4210 nbrrank, 6, ns_comm, nbrmpireq(nbrmpireqindx), &
4211 nbrmpierr(nbrmpireqindx))
4212 nbrmpireqindx = nbrmpireqindx + 1
4213 nbrmpinirecvs = nbrmpinirecvs + 1
4214 IF(kpdbg)
WRITE(ofu,*)
' Irecv 6 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
4215 CALL bsystemclock(loctimer2)
4216 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4223 DO locrow = startlocrow, endlocrow, 1
4224 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
4225 globrow = lr2gr( locrow, level )
4226 globrowoff = globrow-startglobrow+1
4228 IF(kpdbg)
WRITE(ofu,*)
' Computed even hats ',globrow,
' ', locrow;
CALL fl(ofu)
4233 IF ( level .EQ. nlevels )
THEN
4234 IF ( (rank .NE. 0) .OR. &
4235 (startlocrow .NE. endlocrow) .OR. &
4236 (locrow .NE. 1) .OR. (globrow .NE. 1) )
THEN
4237 IF(kpdbg)
WRITE(ofu,*)
' EVEN ERROR ',globrow,
' ', locrow;
CALL fl(ofu)
4241 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D) )
THEN
4242 IF(kpdbg)
WRITE(ofu,*)
'Chats bad D';
CALL fl(ofu); stop
4244 IF ( .NOT.
ALLOCATED(lelement(1,globrowoff)%pivot) )
THEN
4245 IF(kpdbg)
WRITE(ofu,*)
'Chats bad pivot';
CALL fl(ofu); stop
4247 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
4248 IF(kpdbg)
WRITE(ofu,*)
'Chats bad L';
CALL fl(ofu); stop
4250 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
4251 IF(kpdbg)
WRITE(ofu,*)
'Chats bad U';
CALL fl(ofu); stop
4253 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
4254 IF(kpdbg)
WRITE(ofu,*)
'Chats bad b';
CALL fl(ofu); stop
4264 CALL plbdgetrs( 1, lelement(level,globrowoff)%D, &
4265 lelement(1,globrowoff)%pivot, &
4266 lelement(level,globrowoff)%b, blaserr )
4267 IF (blaserr .NE. 0)
THEN
4268 IF(kpdbg)
WRITE(ofu,*)
'PLBDGETRS b failed';
CALL fl(ofu); stop
4275 DO rowoffset = -1, 1, 2
4276 nbrrank = lr2rank(locrow+rowoffset, level)
4277 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
4278 CALL bsystemclock(loctimer1)
4279 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
4280 IF(kpdbg)
WRITE(ofu,*)
'ISend bad L';
CALL fl(ofu); stop
4282 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
4283 IF(kpdbg)
WRITE(ofu,*)
'ISend bad U';
CALL fl(ofu); stop
4285 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
4286 IF(kpdbg)
WRITE(ofu,*)
'ISend bad b';
CALL fl(ofu); stop
4289 IF(kpdbg)
WRITE(ofu,*)
' ISend ',globrow,
' ',locrow,
' ',nbrrank;
CALL fl(ofu)
4290 globrowoff = globrow-startglobrow+1
4291 stag = (((1-rowoffset))/2)*3
4292 CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
4293 nbrrank, stag+3, ns_comm, &
4294 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
4295 nbrmpireqindx = nbrmpireqindx + 1
4296 nbrmpinisends = nbrmpinisends + 1
4297 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+3,
' ',globrowoff;
CALL fl(ofu)
4298 CALL bsystemclock(loctimer2)
4299 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4308 DO locrow = startlocrow, endlocrow, 1
4309 globrow = lr2gr( locrow, level )
4310 IF ( isodd( locrow ) .AND. (level .NE. nlevels) )
THEN
4311 IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) )
THEN
4312 IF(kpdbg)
WRITE(ofu,*)
' Precomp mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
4313 CALL computeforwardoddrowhats( locrow, level, &
4314 startlocrow, endlocrow, .true. )
4324 nbrmpireqcnt = nbrmpireqindx - 1
4325 IF ( nbrmpireqcnt .GT. 0 )
THEN
4326 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
4327 CALL bsystemclock(loctimer1)
4328 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4329 CALL bsystemclock(loctimer2)
4330 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4336 IF ( isodd(startlocrow) )
THEN
4337 globrow = lr2gr( startlocrow, level )
4338 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',startlocrow;
CALL fl(ofu)
4339 CALL computeforwardoddrowhats( startlocrow, level, &
4340 startlocrow, endlocrow, .true. )
4342 IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) )
THEN
4343 globrow = lr2gr( endlocrow, level )
4344 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',endlocrow;
CALL fl(ofu)
4345 CALL computeforwardoddrowhats( endlocrow, level, &
4346 startlocrow, endlocrow, .true. )
4348 CALL plbforwardfinalizelevel( level, .true. )
4349 END DO forwardsolveloop
4352 CALL plbforwardfinalize()
4355 rhsdirtied = .false.
4358 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
4359 IF(kpdbg)
WRITE(ofu,*)
'------ updateb solve end ------';
CALL fl(ofu)
4361 CALL bsystemclock(globtimer2)
4362 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4364 END SUBROUTINE forwardupdateb
4371 SUBROUTINE backwardsolve
4378 INTEGER :: globrowoff
4379 INTEGER :: prevglobrow
4380 INTEGER :: nextglobrow
4381 INTEGER :: startlocrow
4382 INTEGER :: endlocrow
4383 INTEGER :: rowoffset
4385 INTEGER :: nbrmpireqindx
4386 INTEGER :: nbrmpireqcnt
4387 INTEGER :: nbrmpinirecvs
4388 INTEGER :: nbrmpinisends
4389 INTEGER :: nbrmpireq(2)
4390 INTEGER :: nbrmpierr(2)
4391 INTEGER :: mpiwaiterr
4393 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4394 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4395 INTEGER :: waitmatchindex
4400 CALL bsystemclock(globtimer1)
4403 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4404 IF(kpdbg)
WRITE(ofu,*)
'------ Backward solve start ------';
CALL fl(ofu)
4407 IF ( matdirtied )
THEN
4408 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4409 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied matrix; updating... ------';
CALL fl(ofu)
4412 IF ( rhsdirtied )
THEN
4413 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4414 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied RHS; updating... ------';
CALL fl(ofu)
4415 CALL forwardupdateb()
4419 CALL plbbackwardinitialize()
4424 IF ( (.NOT.
ALLOCATED(lelement)) .OR. (.NOT.
ALLOCATED(selement)) )
THEN
4425 IF(kpdbg)
WRITE(ofu,*)
'Forward not called before backward?';
CALL fl(ofu)
4433 backwardsolveloop:
DO level = nlevels, 1, -1
4435 IF ( usebarriers )
THEN
4436 IF(kpdbg)
WRITE(ofu,*)
'Barrier at backward level ', level;
CALL fl(ofu)
4437 CALL mpi_barrier( ns_comm, mpierr )
4438 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at backward level ', level;
CALL fl(ofu)
4446 DO globrow = startglobrow, endglobrow, 1
4447 locrow = gr2lr( globrow, level )
4448 IF ( locrow .GT. 0 )
THEN
4449 IF ( locrow .LT. startlocrow )
THEN
4450 startlocrow = locrow
4452 IF ( locrow .GT. endlocrow )
THEN
4461 IF ( startlocrow .GT. endlocrow )
THEN
4462 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at backward level', level;
CALL fl(ofu)
4463 CALL plbbackwardinitializelevel( level, .false. )
4464 CALL plbbackwardfinalizelevel( level, .false. )
4465 cycle backwardsolveloop
4467 CALL plbbackwardinitializelevel( level, .true. )
4470 IF(kpdbg)
WRITE(ofu,*)
'**** Backward level ', level,
' ****';
CALL fl(ofu)
4475 DO nbrmpireqindx = 1, 2
4476 nbrmpireq(nbrmpireqindx) = mpi_request_null
4485 nbrrank = lr2rank(startlocrow-1,level)
4486 IF( iseven(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
4492 globrow = lr2gr(startlocrow-1,level)
4493 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',globrow,
' ',nbrrank;
CALL fl(ofu)
4494 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4495 ALLOCATE( selement(globrow)%x(m) )
4496 CALL chargememory( m*dpsz )
4498 CALL bsystemclock(loctimer1)
4499 CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4500 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4501 nbrmpireqindx = nbrmpireqindx + 1
4502 nbrmpinirecvs = nbrmpinirecvs + 1
4503 CALL bsystemclock(loctimer2)
4504 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4506 nbrrank = lr2rank(endlocrow+1,level)
4507 IF ( iseven(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
4513 globrow = lr2gr(endlocrow+1,level)
4514 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ',globrow,
' ',nbrrank;
CALL fl(ofu)
4515 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4516 ALLOCATE( selement(globrow)%x(m) )
4517 CALL chargememory( m*dpsz )
4519 CALL bsystemclock(loctimer1)
4520 CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4521 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4522 nbrmpireqindx = nbrmpireqindx + 1
4523 nbrmpinirecvs = nbrmpinirecvs + 1
4524 CALL bsystemclock(loctimer2)
4525 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4532 DO locrow = startlocrow, endlocrow, 1
4533 IF ( isodd( locrow ) )
THEN
4538 DO rowoffset = -1, 1, 2
4539 nbrrank = lr2rank(locrow+rowoffset, level)
4540 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
4541 IF(kpdbg)
WRITE(ofu,*)
' Isend ', locrow,
' ', nbrrank;
CALL fl(ofu)
4542 globrow = lr2gr(locrow,level)
4543 IF ( .NOT.
ALLOCATED(selement(globrow)%x) )
THEN
4544 IF(kpdbg)
WRITE(ofu,*)
'ERROR BISEND AT LEVEL', level;
CALL fl(ofu)
4545 IF(kpdbg)
WRITE(ofu,*) locrow,
' ', globrow,
' ', nbrrank;
CALL fl(ofu)
4548 stag = (1-rowoffset)/2+1
4549 CALL bsystemclock(loctimer1)
4550 CALL mpi_isend( selement(globrow)%x, m, mpi_real8, &
4551 nbrrank, stag, ns_comm, nbrmpireq(nbrmpireqindx), &
4552 nbrmpierr(nbrmpireqindx) )
4553 nbrmpireqindx = nbrmpireqindx + 1
4554 nbrmpinisends = nbrmpinisends + 1
4555 CALL bsystemclock(loctimer2)
4556 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4565 nbrmpireqcnt = nbrmpireqindx - 1
4566 IF ( nbrmpireqcnt .GT. 0 )
THEN
4567 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
4568 CALL bsystemclock(loctimer1)
4569 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4570 CALL bsystemclock(loctimer2)
4571 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4577 DO locrow = startlocrow, endlocrow, 1
4578 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
4579 globrow = lr2gr( locrow, level )
4580 globrowoff = globrow - startglobrow + 1
4581 IF ( ( level .EQ. nlevels ) .AND. (locrow .NE. 1) )
THEN
4582 IF(kpdbg)
WRITE(ofu,*)
'ERROR ',level,
' ',globrow,
' ', locrow;
CALL fl(ofu)
4586 IF(kpdbg)
WRITE(ofu,*)
' Compute solution ', globrow,
' ', locrow;
CALL fl(ofu)
4588 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4589 ALLOCATE( selement(globrow)%x(m) )
4590 CALL chargememory( m*dpsz )
4596 selement(globrow)%x = lelement(level,globrowoff)%b(:,1)
4601 nbrrank = lr2rank(locrow-1,level)
4602 IF ( nbrrank .GE. 0 )
THEN
4603 prevglobrow = lr2gr(locrow-1,level)
4604 CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
4605 selement(prevglobrow)%x, &
4606 one, selement(globrow)%x )
4612 nbrrank = lr2rank(locrow+1,level)
4613 IF ( nbrrank .GE. 0 )
THEN
4614 nextglobrow = lr2gr(locrow+1,level)
4615 CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
4616 selement(nextglobrow)%x, &
4617 one, selement(globrow)%x )
4624 IF(kpdbg)
WRITE(ofu,*)
' x[',globrow,
']=',selement(globrow)%x;
CALL fl(ofu)
4628 CALL plbbackwardfinalizelevel( level, .false. )
4629 END DO backwardsolveloop
4632 CALL plbbackwardfinalize()
4635 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
4636 IF(kpdbg)
WRITE(ofu,*)
'------ Backward solve end ------';
CALL fl(ofu)
4638 CALL bsystemclock(globtimer2)
4639 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4641 END SUBROUTINE backwardsolve
4648 SUBROUTINE verifysolution
4652 INTEGER :: i, k, globrow, globrowoff
4653 REAL(dp) :: term, totrmserr
4655 INTEGER :: nbrmpireqindx
4656 INTEGER :: nbrmpireqcnt
4657 INTEGER :: nbrmpinirecvs
4658 INTEGER :: nbrmpinisends
4659 INTEGER :: nbrmpireq(2)
4660 INTEGER :: nbrmpierr(2)
4661 INTEGER :: mpiwaiterr
4663 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4664 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4665 INTEGER :: waitmatchindex
4668 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4669 IF(kpdbg)
WRITE(ofu,*)
'------ Verifying solution ------';
CALL fl(ofu)
4677 nbrrank = gr2rank(startglobrow-1)
4678 IF ( isodd(startglobrow) .AND. (nbrrank .GE. 0) )
THEN
4679 IF ( .NOT.
ALLOCATED( selement(startglobrow-1)%x ) )
THEN
4680 ALLOCATE( selement(startglobrow-1)%x(m) )
4682 CALL mpi_irecv( selement(startglobrow-1)%x, m, mpi_real8, nbrrank, 1, &
4683 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4684 nbrmpireqindx = nbrmpireqindx + 1
4685 nbrmpinirecvs = nbrmpinirecvs + 1
4687 nbrrank = gr2rank(endglobrow+1)
4688 IF ( isodd(endglobrow) .AND. (nbrrank .GE. 0) )
THEN
4689 IF ( .NOT.
ALLOCATED( selement(endglobrow+1)%x ) )
THEN
4690 ALLOCATE( selement(endglobrow+1)%x(m) )
4692 CALL mpi_irecv( selement(endglobrow+1)%x, m, mpi_real8, nbrrank, 2, &
4693 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4694 nbrmpireqindx = nbrmpireqindx + 1
4695 nbrmpinirecvs = nbrmpinirecvs + 1
4697 nbrrank = gr2rank(startglobrow-1)
4698 IF ( iseven(startglobrow) .AND. (nbrrank .GE. 0) )
THEN
4699 CALL mpi_isend( selement(startglobrow)%x, m, mpi_real8, nbrrank, 2, &
4700 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4701 nbrmpireqindx = nbrmpireqindx + 1
4702 nbrmpinisends = nbrmpinisends + 1
4704 nbrrank = gr2rank(endglobrow+1)
4705 IF ( iseven(endglobrow) .AND. (nbrrank .GE. 0) )
THEN
4706 CALL mpi_isend( selement(endglobrow)%x, m, mpi_real8, nbrrank, 1, &
4707 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4708 nbrmpireqindx = nbrmpireqindx + 1
4709 nbrmpinisends = nbrmpinisends + 1
4711 nbrmpireqcnt = nbrmpireqindx - 1
4712 IF ( nbrmpireqcnt .GT. 0 )
THEN
4713 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
4714 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4719 DO globrow = startglobrow, endglobrow, 1
4720 globrowoff = globrow - startglobrow + 1
4723 IF ( globrow .GT. 1 )
THEN
4724 term = term + sum( orig(globrowoff)%L(i,:) * selement(globrow-1)%x )
4727 term = term + sum( orig(globrowoff)%D(i,:) * selement(globrow)%x )
4729 IF (writesolution)
THEN
4732 IF(kpdbg)
WRITE(ofu,*)
'X[',(globrow-1)*m+k,
']=', selement(globrow)%x(k);
CALL fl(ofu)
4737 IF ( globrow .LT. n )
THEN
4738 term = term + sum( orig(globrowoff)%U(i,:) * selement(globrow+1)%x )
4741 totrmserr = totrmserr + (term - orig(globrowoff)%b(i,1))**2
4745 IF ( endglobrow .LT. startglobrow )
THEN
4748 totrmserr = sqrt( totrmserr / ((endglobrow-startglobrow+1) * m) )
4750 IF(kpdbg)
WRITE(ofu,
'(A,E15.8E3)')
'TOTAL RMS ERROR = ', totrmserr;
CALL fl(ofu)
4751 IF(kpdbg)
WRITE(ofu,*)
'------ Solution verified ------';
CALL fl(ofu)
4752 END SUBROUTINE verifysolution
4756 SUBROUTINE checksymmetry (asymIndx)
4759 REAL(dp),
INTENT (OUT) :: asymIndx
4760 REAL(dp) :: diff, partdiff, norm, eij, eji, tdiff, tnorm, summ
4761 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:) :: sendBuf, bottomLeftMatrix
4763 INTEGER :: topNeighbor, bottomNeighbor
4765 INTEGER :: mpistatus(MPI_STATUS_SIZE), mpi_err
4766 INTEGER :: blkrow,blktype,i,j,k
4768 IF (.NOT.
ALLOCATED(bottomleftmatrix))
THEN
4769 ALLOCATE(bottomleftmatrix(m,m))
4770 bottomleftmatrix=0.0
4773 IF (.NOT.
ALLOCATED(sendbuf))
THEN
4774 ALLOCATE(sendbuf(m,m))
4778 IF(nranks.EQ.1)
THEN
4779 topneighbor=mpi_proc_null
4780 bottomneighbor=mpi_proc_null
4783 topneighbor=mpi_proc_null
4784 bottomneighbor=rank+1
4785 ELSE IF (rank.EQ.nranks-1)
THEN
4787 bottomneighbor=mpi_proc_null
4788 sendbuf=lelement(1,1)%L
4791 bottomneighbor=rank+1
4792 sendbuf=lelement(1,1)%L
4796 CALL mpi_sendrecv(sendbuf,m*m,mpi_real8,topneighbor,1,&
4797 &bottomleftmatrix,m*m,mpi_real8,bottomneighbor,1,ns_comm,&
4800 IF (
ALLOCATED(sendbuf))
DEALLOCATE(sendbuf)
4801 IF (rank.EQ.nranks-1) bottomleftmatrix=0.0
4803 summ=0.0; diff=0.0; norm=0.0
4804 DO blkrow=1,endglobrow-startglobrow+1
4809 eij=lelement(1,blkrow)%D(i,j)
4810 eji=lelement(1,blkrow)%D(j,i)
4811 partdiff = partdiff+(eij-eji)*(eij-eji)
4815 norm = norm + eij*eij+eji*eji
4819 norm = norm + eij*eij
4823 diff=diff+2*partdiff
4828 IF (blkrow.NE.endglobrow-startglobrow+1)
THEN
4829 eij=lelement(1,blkrow)%U(i,j)
4830 eji=lelement(1,blkrow+1)%L(j,i)
4831 diff = diff+(eij-eji)*(eij-eji)
4832 norm = norm+eij*eij+eji*eji
4834 IF (rank.NE.nranks-1)
THEN
4835 eij=lelement(1,blkrow)%U(i,j)
4836 eji=bottomleftmatrix(j,i)
4837 diff = diff+(eij-eji)*(eij-eji)
4838 norm = norm+eij*eij+eji*eji
4845 IF (
ALLOCATED(bottomleftmatrix))
DEALLOCATE(bottomleftmatrix)
4847 CALL mpi_reduce(diff,tdiff,1,mpi_real8,mpi_sum,0,ns_comm,mpi_err)
4848 CALL mpi_reduce(norm,tnorm,1,mpi_real8,mpi_sum,0,ns_comm,mpi_err)
4850 asymindx=sqrt(tdiff/tnorm)
4854 END SUBROUTINE checksymmetry
4858 SUBROUTINE storediagonal(blkrow,colnum,buf)
4861 INTEGER,
INTENT(IN) :: blkrow, colnum
4862 REAL(dp),
DIMENSION(1:M),
INTENT(IN) :: buf
4865 IF(blkrow.LT.startglobrow)
THEN
4866 IF(kpdbg)
WRITE(ofu,*)
'Error in indexing global block row index';
CALL fl(ofu)
4869 indx=(blkrow-startglobrow)*m+colnum
4870 origdiagelement(indx)=buf(colnum)
4872 END SUBROUTINE storediagonal
4877 SUBROUTINE writeblocks(flag)
4880 INTEGER,
INTENT(IN) :: flag
4882 INTEGER :: row, col, localblkrow, globalrow
4884 CHARACTER(100) :: fname
4885 CHARACTER(50) :: ciam, cnprocs
4886 INTEGER :: WOFU, istat
4888 WRITE(ciam,*) rank;
WRITE(cnprocs,*) nranks
4889 ciam=adjustl(ciam); cnprocs=adjustl(cnprocs)
4890 wofu = 8*nranks+rank
4892 fname=
'block-'//trim(ciam)//
'-P-'//trim(cnprocs)//
'.txt'
4893 OPEN(unit=wofu, file=fname, status=
"REPLACE", action=
"WRITE",&
4894 &form=
"FORMATTED",position=
"APPEND", iostat=istat)
4899 DO row=1, (endglobrow-startglobrow+1)*m
4900 WRITE(wofu,*)
'SavedDiag:',row,origdiagelement(row), flag
4906 DO globalrow=startglobrow,endglobrow
4907 localblkrow=globalrow-startglobrow+1
4911 IF (globalrow.GT.1)
THEN
4914 WRITE(wofu,*) globalrow,
'L', row, col, orig(localblkrow)%L(row,col), flag
4925 WRITE(wofu,*) globalrow,
'D', row, col, orig(localblkrow)%D(row,col), flag
4934 WRITE(wofu,*) globalrow,
'U', row, col, orig(localblkrow)%U(row,col), flag
4938 WRITE(wofu,*)
'--------------------------------------------------------'
4945 END SUBROUTINE writeblocks
4950 SUBROUTINE displayblocks(flag)
4953 INTEGER,
INTENT(IN) :: flag
4954 INTEGER :: row, localblkrow, globalrow
4956 IF(kpdbg)
WRITE(ofu,*)
'------- Here ------- ', 1;
CALL fl(ofu)
4958 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
4959 DO row=1, (endglobrow-startglobrow+1)*m
4960 IF(kpdbg)
WRITE(ofu,*)
'OrigDiagElement:',origdiagelement(row),
'-',flag
4963 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
4965 IF(kpdbg)
WRITE(ofu,*)
'------- Here ------- ', 2;
CALL fl(ofu)
4968 DO globalrow=startglobrow,endglobrow
4969 localblkrow=globalrow-startglobrow+1
4971 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
4974 IF(kpdbg)
WRITE(ofu,*)
'orig-L:',orig(localblkrow)%L(row,1:m),
'-',flag
4978 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
4980 IF(kpdbg)
WRITE(ofu,*)
'orig-D:',orig(localblkrow)%D(row,1:m),
'-',flag
4984 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
4986 IF(kpdbg)
WRITE(ofu,*)
'orig-U:',orig(localblkrow)%U(row,1:m),
'-',flag
4989 IF(kpdbg)
WRITE(ofu,*)
' ';
CALL fl(ofu)
4992 IF(kpdbg)
WRITE(ofu,*)
'------- Here ------- ', 3;
CALL fl(ofu)
4995 END SUBROUTINE displayblocks
5000 SUBROUTINE parmatvec (invec,outvec,outveclength)
5003 INTEGER,
INTENT(IN) :: outveclength
5004 REAL(dp),
INTENT(IN) :: invec(1:N*M)
5005 REAL(dp),
INTENT(OUT) :: outvec(1:outveclength)
5007 INTEGER :: col, row, offset
5008 INTEGER :: globalrow, localblkrow, cnt, dcnt
5009 REAL(dp) :: melement, velement, total
5011 outvec=0; cnt=0; dcnt=0
5012 DO globalrow=startglobrow,endglobrow
5014 localblkrow=globalrow-startglobrow+1
5016 IF (globalrow.EQ.1)
THEN
5019 offset=(globalrow-2)*m
5022 IF (globalrow.EQ.1)
THEN
5026 velement=invec(offset+col)
5030 melement=origdiagelement(dcnt)
5032 melement=orig(localblkrow)%D(row,col)
5035 melement=orig(localblkrow)%U(row,col-m)
5037 total=total+melement*velement
5042 ELSE IF (globalrow.EQ.n)
THEN
5046 velement=invec(offset+col)
5048 melement=orig(localblkrow)%L(row,col)
5050 IF (row.EQ.col-m)
THEN
5052 melement=origdiagelement(dcnt)
5054 melement=orig(localblkrow)%D(row,col-m)
5057 total=total+melement*velement
5066 velement=invec(offset+col)
5068 melement=orig(localblkrow)%L(row,col)
5069 ELSE IF (col.GT.m.AND.col.LE.2*m)
THEN
5070 IF (row.EQ.col-m)
THEN
5072 melement=origdiagelement(dcnt)
5074 melement=orig(localblkrow)%D(row,col-m)
5077 melement=orig(localblkrow)%U(row,col-2*m)
5079 total=total+melement*velement
5080 IF (globalrow.EQ.2.AND.row.EQ.1)
THEN
5089 END SUBROUTINE parmatvec
5093 SUBROUTINE setblockrowcol( globrow, colnum, buf, opt)
5095 REAL(dp),
INTENT(IN),
DIMENSION(M) :: buf
5096 INTEGER :: colnum, opt
5098 CALL setmatrixrowcolu( globrow, buf, colnum )
5099 ELSE IF (opt.EQ.2)
THEN
5100 CALL setmatrixrowcold( globrow, buf, colnum )
5101 ELSE IF (opt.EQ.3)
THEN
5102 CALL setmatrixrowcoll( globrow, buf, colnum )
5104 WRITE(*,*)
'Error in diagonal type option'
5106 END SUBROUTINE setblockrowcol
5114 SUBROUTINE getcolsum(cs)
5115 REAL(dp),
DIMENSION(M,startglobrow:endglobrow),
INTENT(OUT) :: cs
5116 REAL(dp),
PARAMETER :: minScale = 1.e-6_dp
5117 REAL(dp) :: eps, eps0, s1(1), r1(1)
5120 CALL initscalefactors
5121 DO js = startglobrow, endglobrow
5122 CALL getscalefactors(js,cs(:,js))
5129 CALL mpi_allreduce(s1,r1,1,mpi_real8, mpi_max, ns_comm,mpi_err)
5130 eps0 = minscale*r1(1)
5133 DO js = startglobrow, endglobrow
5134 eps = minscale*maxval(cs(:,js))
5135 IF (eps .EQ. zero) eps = eps0
5136 WHERE (cs(:,js) .LT. eps) cs(:,js) = eps
5141 CALL finalizescalefactors
5143 END SUBROUTINE getcolsum
5147 SUBROUTINE initscalefactors
5149 INTEGER :: top, bot, MPI_ERR, ic, j, k
5150 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
5151 REAL(dp),
DIMENSION(:),
ALLOCATABLE :: tmp1, tmp2
5152 INTEGER :: numBlockRows
5154 numblockrows = endglobrow-startglobrow+1
5156 top=rank-1;
IF(rank.EQ.0) top=mpi_proc_null
5157 bot=rank+1;
IF(rank.EQ.nranks-1) bot=mpi_proc_null
5159 ALLOCATE (topscalefac(m),botscalefac(m))
5160 ALLOCATE (tmp1(m),tmp2(m))
5163 tmp1(ic)=sum(orig(1)%L(:,ic)**2)
5164 tmp2(ic)=sum(orig(numblockrows)%U(:,ic)**2)
5167 CALL mpi_sendrecv(tmp1,m,mpi_real8,top,1,&
5168 botscalefac,m,mpi_real8,bot,1,ns_comm,mpi_stat,mpi_err)
5170 CALL mpi_sendrecv(tmp2,m,mpi_real8,bot,1,&
5171 topscalefac,m,mpi_real8,top,1,ns_comm,mpi_stat,mpi_err)
5173 DEALLOCATE(tmp1, tmp2)
5175 END SUBROUTINE initscalefactors
5179 SUBROUTINE getscalefactors(js,scalevector)
5182 INTEGER,
INTENT(IN) :: js
5183 REAL(dp),
INTENT(OUT) :: scalevector(M)
5185 INTEGER :: ic, ir, numBlockRows
5187 numblockrows = endglobrow-startglobrow+1
5189 IF (js.EQ.startglobrow)
THEN
5192 scalevector(ic)=sum(orig(1)%D(:,ic)*orig(1)%D(:,ic)) + &
5193 sum(orig(2)%L(:,ic)*orig(2)%L(:,ic))
5196 ir = js-startglobrow+1
5198 scalevector(ic)=sum(orig(ir)%D(:,ic)*orig(ir)%D(:,ic)) + &
5199 topscalefac(ic) + sum(orig(ir+1)%L(:,ic)*orig(ir+1)%L(:,ic))
5202 ELSE IF (js.EQ.endglobrow)
THEN
5205 scalevector(ic)=sum(orig(numblockrows)%D(:,ic)*orig(numblockrows)%D(:,ic)) + &
5206 sum(orig(numblockrows-1)%U(:,ic)*orig(numblockrows-1)%U(:,ic))
5210 scalevector(ic)=sum(orig(numblockrows)%D(:,ic)*orig(numblockrows)%D(:,ic)) + &
5211 sum(orig(numblockrows-1)%U(:,ic)*orig(numblockrows-1)%U(:,ic)) + botscalefac(ic)
5215 ir = js-startglobrow+1
5217 scalevector(ic)=sum(orig(ir)%D(:,ic)*orig(ir)%D(:,ic)) + &
5218 sum(orig(ir-1)%U(:,ic)*orig(ir-1)%U(:,ic)) + &
5219 sum(orig(ir+1)%L(:,ic)*orig(ir+1)%L(:,ic))
5223 IF (any(scalevector .LT. zero)) stop
'SCALEVECTOR < 0!'
5224 scalevector = sqrt(scalevector)
5226 END SUBROUTINE getscalefactors
5230 SUBROUTINE finalizescalefactors
5231 DEALLOCATE (topscalefac,botscalefac)
5232 END SUBROUTINE finalizescalefactors
5236 SUBROUTINE parallelscaling(lmp,colsum)
5238 REAL(dp),
INTENT(IN) :: lmp
5239 REAL(dp),
DIMENSION(M,startglobrow:endglobrow),
INTENT(IN) :: colsum
5241 INTEGER :: js, icol, ir, top, bot, MPI_ERR, i4, i3, dcnt
5242 REAL(dp) :: eps, s1(1), r1(1)
5243 REAL(dp),
DIMENSION(:),
ALLOCATABLE :: toprow, botrow
5244 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
5246 top=rank-1;
IF(rank.EQ.0) top=mpi_proc_null
5247 bot=rank+1;
IF(rank.EQ.nranks-1) bot=mpi_proc_null
5248 ALLOCATE (toprow(m),botrow(m))
5252 CALL mpi_sendrecv(colsum(:,endglobrow),m,mpi_real8,bot,1,&
5253 toprow,m,mpi_real8,top,1,ns_comm,mpi_stat,mpi_err)
5255 CALL mpi_sendrecv(colsum(:,startglobrow),m,mpi_real8,top,1,&
5256 botrow,m,mpi_real8,bot,1,ns_comm,mpi_stat,mpi_err)
5259 DO js = startglobrow, endglobrow
5260 ir=js-startglobrow+1
5262 lelement(1,ir)%D(:,icol) = colsum(:,js)*orig(ir)%D(:,icol)*colsum(icol,js)
5263 orig(ir)%D(:,icol) = lelement(1,ir)%D(:,icol)
5265 origdiagelement(dcnt) = orig(ir)%D(icol,icol)
5268 lelement(1,ir)%L(:,icol) = colsum(:,js)*orig(ir)%L(:,icol)*toprow(icol)
5270 lelement(1,ir)%L(:,icol) = colsum(:,js)*orig(ir)%L(:,icol)*colsum(icol,js-1)
5272 orig(ir)%L(:,icol) = lelement(1,ir)%L(:,icol)
5275 IF (ir.EQ.(endglobrow-startglobrow+1))
THEN
5276 lelement(1,ir)%U(:,icol) = colsum(:,js)*orig(ir)%U(:,icol)*botrow(icol)
5278 lelement(1,ir)%U(:,icol) = colsum(:,js)*orig(ir)%U(:,icol)*colsum(icol,js+1)
5280 orig(ir)%U(:,icol) = lelement(1,ir)%U(:,icol)
5284 DEALLOCATE (toprow,botrow)
5287 CALL findminmax_tri(lmp)
5289 END SUBROUTINE parallelscaling
5294 SUBROUTINE findminmax_tri(lmp)
5296 REAL(dp),
PARAMETER :: minThreshold=1.e-6_dp
5297 REAL(dp),
INTENT(IN) :: lmp
5298 INTEGER :: js, icol, ir, dcnt
5299 REAL(dp) :: eps, s1(2), r1(2), minDiag
5303 dmin_tri = huge(dmin_tri)
5304 DO js = startglobrow, endglobrow
5305 ir=js-startglobrow+1
5308 mindiag = minthreshold*maxval(abs(origdiagelement(dcnt+1:dcnt+m)))
5309 IF (mindiag .EQ. 0) mindiag = minthreshold
5313 eps = origdiagelement(dcnt)
5315 IF (abs(eps) .LE. mindiag)
THEN
5317 origdiagelement(dcnt) = eps
5318 orig(ir)%D(icol,icol) = eps
5319 lelement(1,ir)%D(icol,icol) = eps
5321 IF (js .LT. n) dmin_tri = min(max(1.e-12_dp,abs(eps)), dmin_tri)
5324 maxeigen_tri = max(maxeigen_tri, sum(abs(orig(ir)%L(icol,:)) + abs(orig(ir)%D(icol,:)) &
5325 + abs(orig(ir)%U(icol,:))))
5329 s1(1)=-dmin_tri; s1(2)=maxeigen_tri
5330 CALL mpi_allreduce(s1,r1,2,mpi_real8, mpi_max, ns_comm,mpi_err)
5331 dmin_tri=sign(-r1(1), eps)
5335 CALL resetdiagonal(lmp, .false.)
5337 END SUBROUTINE findminmax_tri
5342 SUBROUTINE resetdiagonal(lmp, bReset)
5343 REAL(dp),
INTENT(IN) :: lmp
5344 LOGICAL,
INTENT(IN) :: bReset
5345 REAL(dp) :: DminL, eps
5346 INTEGER :: dcnt, js, ir, icol
5349 dminl = dmin_tri*lmp
5351 DO js = startglobrow, endglobrow
5352 ir=js-startglobrow+1
5355 eps = origdiagelement(dcnt)
5358 IF (l_colscale)
THEN
5359 lelement(1,ir)%D(icol,icol) = eps + sign(dminl,eps)
5361 lelement(1,ir)%D(icol,icol) = eps*(1 + abs(lmp))
5363 orig(ir)%D(icol,icol) = lelement(1,ir)%D(icol,icol)
5367 lelement(1,ir)%L = orig(ir)%L
5368 lelement(1,ir)%D = orig(ir)%D
5369 lelement(1,ir)%U = orig(ir)%U
5370 lelement(1,ir)%pivot = 0
5374 END SUBROUTINE resetdiagonal