2 MODULE blocktridiagonalsolver_bst
4 USE parallel_include_module,
ONLY: stopmpi
5 USE parallel_include_module,
ONLY: tofu
6 USE parallel_include_module,
ONLY: ns_comm
7 USE parallel_include_module,
ONLY: grank, gnranks
8 USE parallel_include_module,
ONLY: rank, nranks
9 USE parallel_include_module,
ONLY: ns_resltn
11 USE parallel_vmec_module,
ONLY: bcyclic_comp_time
12 USE parallel_vmec_module,
ONLY: bcyclic_comm_time
13 USE parallel_vmec_module,
ONLY: waitall_time
14 USE parallel_vmec_module,
ONLY: dgemm_time
15 USE parallel_vmec_module,
ONLY: dgemv_time
16 USE parallel_vmec_module,
ONLY: dgetrf_time
17 USE parallel_vmec_module,
ONLY: dgetrs_time
18 USE parallel_vmec_module,
ONLY: forwardsolveloop_time
19 USE parallel_vmec_module,
ONLY: t
24 INTEGER,
PARAMETER :: rprec = selected_real_kind(15,300)
25 INTEGER,
PARAMETER :: iprec = selected_int_kind(8)
26 INTEGER,
PARAMETER :: cprec = kind((1.0_rprec,1.0_rprec))
27 INTEGER,
PARAMETER :: dp = rprec
35 REAL(dp),
ALLOCATABLE :: L(:,:), D(:,:), U(:,:), b(:,:)
36 INTEGER,
ALLOCATABLE :: pivot(:)
45 REAL(dp),
ALLOCATABLE :: x(:)
56 TYPE (LevelElement),
ALLOCATABLE :: lelement(:,:)
64 TYPE (LevelElement),
ALLOCATABLE :: orig(:)
72 TYPE (SolutionElement),
ALLOCATABLE :: selement(:)
80 INTEGER :: startglobrow
87 CHARACTER*100 :: kenvvar
88 CHARACTER*100 :: kenvval
92 LOGICAL :: writeproblemfile
93 LOGICAL :: writesolution
94 LOGICAL :: usebarriers
101 REAL(dp) :: skston, skstoff
105 LOGICAL :: use_mpiwtime
106 DOUBLE PRECISION :: loctimer1, loctimer2
107 DOUBLE PRECISION :: mattimer1, mattimer2
108 DOUBLE PRECISION :: globtimer1, globtimer2
109 DOUBLE PRECISION :: timerfreq
110 DOUBLE PRECISION :: tottime
112 DOUBLE PRECISION :: totcommtime
113 INTEGER :: totcommcount
114 DOUBLE PRECISION :: totinvtime
115 INTEGER :: totinvcount
116 DOUBLE PRECISION :: totmatmultime
117 INTEGER :: totmatmulcount
118 DOUBLE PRECISION :: totmatsoltime
119 INTEGER :: totmatsolcount
128 LOGICAL :: doblasonly
129 LOGICAL :: doblacscomm
137 INTEGER :: myrow, mycol
138 INTEGER :: nrows, ncols
139 INTEGER :: blockszrows, blockszcols
140 INTEGER,
ALLOCATABLE :: map(:,:)
151 INTEGER :: maincontext
152 INTEGER :: levelcontext
153 TYPE(BlacsProcessGrid) :: pgrid
165 INTEGER :: masterrank
167 INTEGER,
ALLOCATABLE :: slaveranks(:)
177 TYPE(MasterToSlaveMapping) :: msmap
179 INTEGER :: mpicomm, mpitag, mpierr
181 INTEGER :: mpistatus(MPI_STATUS_SIZE)
186 TYPE(PBLASLevelParameters) :: pblas
193 INTEGER,
PARAMETER :: OP_NONE = 0
194 INTEGER,
PARAMETER :: OP_DONE = 1
195 INTEGER,
PARAMETER :: OP_DGEMM = 2
196 INTEGER,
PARAMETER :: OP_DGEMV = 3
197 INTEGER,
PARAMETER :: OP_DGETRF = 4
198 INTEGER,
PARAMETER :: OP_DGETRS = 5
206 DOUBLE PRECISION :: tm
208 DOUBLE PRECISION :: t1, t2
211 TYPE(TimeCount) :: wait, comm, comp
212 TYPE(TimeCount) :: mm, trf, pmm, ptrf
213 TYPE(TimeCount) :: mma, mmb, mmc, mmalpha, mmbeta, mmrc
214 TYPE(TimeCount) :: extract, waitall
217 TYPE(PBLASStats) :: pstats
221 DOUBLE PRECISION,
ALLOCATABLE :: temparray(:)
233 SUBROUTINE bclockinit()
239 IF ( use_mpiwtime )
THEN
242 CALL system_clock(count_rate=tempint)
245 END SUBROUTINE bclockinit
252 SUBROUTINE bsystemclock( ts )
256 DOUBLE PRECISION,
INTENT(INOUT) :: ts
262 DOUBLE PRECISION :: tempdbl
264 IF ( use_mpiwtime )
THEN
266 tempdbl = mpi_wtime()
270 CALL system_clock( tempint )
273 END SUBROUTINE bsystemclock
284 INTEGER,
INTENT(IN) :: u
294 SUBROUTINE chargememory( bytes )
298 REAL,
INTENT(IN) :: bytes
301 membytes = membytes + bytes
302 END SUBROUTINE chargememory
309 SUBROUTINE chargetime( tot, t2, t1, cnt )
313 DOUBLE PRECISION,
INTENT(INOUT) :: tot
314 DOUBLE PRECISION,
INTENT(IN) :: t2
315 DOUBLE PRECISION,
INTENT(IN) :: t1
316 INTEGER,
INTENT(INOUT) :: cnt
319 tot = tot + (real(t2-t1))/timerfreq
321 END SUBROUTINE chargetime
329 SUBROUTINE timecountinit( tc )
333 TYPE(TimeCount),
INTENT(INOUT) :: tc
338 END SUBROUTINE timecountinit
345 SUBROUTINE timecountprint( tc, msg )
349 TYPE(TimeCount),
INTENT(INOUT) :: tc
350 CHARACTER(*),
INTENT(IN) :: msg
354 DOUBLE PRECISION :: avg
358 IF (tc%cnt .GT. 0) avg = tc%tm / tc%cnt
359 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)') msg,tc%cnt,
' * ',avg,
' sec = ',tc%tm,
' sec'
360 END SUBROUTINE timecountprint
367 SUBROUTINE plbinitstats()
368 CALL timecountinit( pstats%wait )
369 CALL timecountinit( pstats%comm )
370 CALL timecountinit( pstats%comp )
371 CALL timecountinit( pstats%mm )
372 CALL timecountinit( pstats%trf )
373 CALL timecountinit( pstats%pmm )
374 CALL timecountinit( pstats%ptrf )
376 CALL timecountinit( pstats%mma )
377 CALL timecountinit( pstats%mmb )
378 CALL timecountinit( pstats%mmc )
379 CALL timecountinit( pstats%mmalpha )
380 CALL timecountinit( pstats%mmbeta )
381 CALL timecountinit( pstats%mmrc )
382 CALL timecountinit( pstats%extract )
383 CALL timecountinit( pstats%waitall )
384 END SUBROUTINE plbinitstats
391 SUBROUTINE plbprintstats()
392 CALL timecountprint( pstats%wait,
'PBLAS Wait ' )
393 CALL timecountprint( pstats%comm,
'PBLAS Comm ' )
394 CALL timecountprint( pstats%comp,
'PBLAS Comp ' )
395 CALL timecountprint( pstats%mm,
'PBLAS MM ' )
396 CALL timecountprint( pstats%trf,
'PBLAS TRF ' )
397 CALL timecountprint( pstats%pmm,
'PBLAS PMM ' )
398 CALL timecountprint( pstats%ptrf,
'PBLAS PTRF ' )
400 CALL timecountprint( pstats%mma,
'PBLAS MMA ' )
401 CALL timecountprint( pstats%mmb,
'PBLAS MMB ' )
402 CALL timecountprint( pstats%mmc,
'PBLAS MMC ' )
403 CALL timecountprint( pstats%mmalpha,
'PBLAS MMalpha ' )
404 CALL timecountprint( pstats%mmbeta,
'PBLAS MMbeta ' )
405 CALL timecountprint( pstats%mmrc,
'PBLAS MMRC ' )
406 CALL timecountprint( pstats%extract,
'PBLAS Extract ' )
407 CALL timecountprint( pstats%waitall,
'PBLAS Waitall ' )
408 END SUBROUTINE plbprintstats
415 SUBROUTINE plbinitialize
419 CHARACTER*100 :: envvar
420 CHARACTER*100 :: envval
422 IF(kpdbg)
WRITE(ofu,*)
'PLBInitialize Started';
CALL fl(ofu)
426 IF ( m .GE. 2048 )
THEN
428 envvar =
'BLOCKTRI_BLASONLY'
429 CALL getenv( envvar, envval )
430 IF ( envval .EQ.
'TRUE' )
THEN
432 IF(kpdbg)
WRITE(ofu,*)
'BLAS ONLY -- obeying env var ', envvar;
CALL fl(ofu)
435 IF(kpdbg)
WRITE(ofu,*)
'doblasonly = ', doblasonly;
CALL fl(ofu)
438 doblacscomm = .false.
439 envvar =
'BLOCKTRI_BLACSCOMM'
440 CALL getenv( envvar, envval )
441 IF ( envval .EQ.
'TRUE' )
THEN
443 IF(kpdbg)
WRITE(ofu,*)
'BLACS COMM -- obeying env var ', envvar;
CALL fl(ofu)
445 IF(kpdbg)
WRITE(ofu,*)
'doblacscomm = ', doblacscomm;
CALL fl(ofu)
449 envvar =
'BLOCKTRI_NBPP'
450 CALL getenv( envvar, envval )
451 IF ( envval .NE.
'' )
THEN
452 READ( envval, *) blacs%nbpp
453 IF(kpdbg)
WRITE(ofu,*)
'NBPP -- obeying env var ', envvar;
CALL fl(ofu)
455 IF(kpdbg)
WRITE(ofu,*)
'NBPP = ', blacs%nbpp;
CALL fl(ofu)
462 IF(kpdbg)
WRITE(ofu,*)
'BLAS only (not using PBLAS)';
CALL fl(ofu)
465 CALL blacs_pinfo( blacs%iam, blacs%nprocs )
466 IF(kpdbg)
WRITE(ofu,*)
'BLACS_PINFO ', blacs%iam,
' ', blacs%nprocs;
CALL fl(ofu)
467 CALL blacs_get( 0, 0, blacs%maincontext )
468 IF(kpdbg)
WRITE(ofu,*)
'BLACS_GET ', blacs%maincontext;
CALL fl(ofu)
469 CALL blacs_gridinit( blacs%maincontext,
'R', 1, blacs%nprocs )
470 IF(kpdbg)
WRITE(ofu,*)
'BLACS_GRIDINIT';
CALL fl(ofu)
471 CALL blacs_barrier( blacs%maincontext,
'All' )
475 IF(kpdbg)
WRITE(ofu,*)
'PLBInitialize Done';
CALL fl(ofu)
476 END SUBROUTINE plbinitialize
483 SUBROUTINE plbfinalize
485 IF(kpdbg)
WRITE(ofu,*)
'PLBFinalize Started';
CALL fl(ofu)
487 IF(kpdbg)
WRITE(ofu,*)
'BLAS only (not using PBLAS)';
CALL fl(ofu)
490 CALL blacs_barrier( blacs%maincontext,
'All' )
491 IF(kpdbg)
WRITE(ofu,*)
'BLACS_BARRIER';
CALL fl(ofu)
493 IF(kpdbg)
WRITE(ofu,*)
'BLACS_EXIT nprocs= ', blacs%nprocs;
CALL fl(ofu)
497 IF(kpdbg)
WRITE(ofu,*)
'PLBFinalize Done';
CALL fl(ofu)
498 END SUBROUTINE plbfinalize
507 SUBROUTINE plbforwardinitializelevel( lvl, ammaster )
517 INTEGER :: maxslaves, actualslaves
521 IF ( doblasonly )
THEN
522 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel BLAS only';
CALL fl(ofu)
526 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Started', ammaster;
CALL fl(ofu)
530 pblas%ammaster = ammaster
531 pblas%msmap%masterrank = -1
532 pblas%msmap%nslaves = 0
533 CALL determinemasterslaveranks()
537 blacs%levelcontext = -1
538 CALL blacs_get( blacs%maincontext, 10, blacs%levelcontext )
539 IF(kpdbg)
WRITE(ofu,*)
'Created new context';
CALL fl(ofu)
540 IF(kpdbg)
WRITE(ofu,*)
'Nslaves ',pblas%msmap%nslaves;
CALL fl(ofu)
542 blacs%pgrid%blockszrows = 64
543 IF ( blacs%pgrid%blockszrows .GT. m ) blacs%pgrid%blockszrows = m
544 blacs%pgrid%blockszcols = blacs%pgrid%blockszrows
545 IF(kpdbg)
WRITE(ofu,*)
'Block NR=',blacs%pgrid%blockszrows;
CALL fl(ofu)
546 IF(kpdbg)
WRITE(ofu,*)
'Block NC=',blacs%pgrid%blockszcols;
CALL fl(ofu)
548 maxslaves = (m*m) / (blacs%nbpp*blacs%nbpp* &
549 blacs%pgrid%blockszrows*blacs%pgrid%blockszcols)
550 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
551 IF ( maxslaves .LT. 1 ) maxslaves = 1
552 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
553 IF ( maxslaves .GT. pblas%msmap%nslaves ) maxslaves = pblas%msmap%nslaves
554 IF(kpdbg)
WRITE(ofu,*)
'Max slaves ', maxslaves;
CALL fl(ofu)
555 actualslaves = maxslaves
556 IF(kpdbg)
WRITE(ofu,*)
' Actual slaves ', actualslaves;
CALL fl(ofu)
558 blacs%pgrid%nrows = int( sqrt( real( actualslaves ) ) )
559 IF ( blacs%pgrid%nrows .LT. 1 ) blacs%pgrid%nrows = 1
560 blacs%pgrid%ncols = int( actualslaves / blacs%pgrid%nrows )
561 IF(kpdbg)
WRITE(ofu,*)
'NR=',blacs%pgrid%nrows,
' NC=',blacs%pgrid%ncols;
CALL fl(ofu)
562 ALLOCATE( blacs%pgrid%map( 1 : blacs%pgrid%nrows, 1 : blacs%pgrid%ncols ) )
564 DO i = 1, blacs%pgrid%nrows
565 DO j = 1, blacs%pgrid%ncols
566 blacs%pgrid%map(i,j) = pblas%msmap%slaveranks(k+1)
570 IF(kpdbg)
WRITE(ofu,*)
'NR*NC=',k;
CALL fl(ofu)
571 CALL blacs_gridmap( blacs%levelcontext, blacs%pgrid%map, &
572 blacs%pgrid%nrows, blacs%pgrid%nrows, blacs%pgrid%ncols )
573 IF(kpdbg)
WRITE(ofu,*)
'GridMap done';
CALL fl(ofu)
574 CALL blacs_gridinfo( blacs%levelcontext, &
575 blacs%pgrid%nrows, blacs%pgrid%ncols, &
576 blacs%pgrid%myrow, blacs%pgrid%mycol )
577 IF(kpdbg)
WRITE(ofu,*)
'GridInfo done';
CALL fl(ofu)
578 IF(kpdbg)
WRITE(ofu,*)
'Myrowcol ',blacs%pgrid%myrow,
' ',blacs%pgrid%mycol;
CALL fl(ofu)
581 pblas%mpicomm = ns_comm
585 CALL blacs_barrier( blacs%maincontext,
'All' )
586 IF(kpdbg)
WRITE(ofu,*)
'BLACS_BARRIER';
CALL fl(ofu)
590 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Master';
CALL fl(ofu)
592 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Slave';
CALL fl(ofu)
596 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardInitializeLevel Done', ammaster;
CALL fl(ofu)
599 END SUBROUTINE plbforwardinitializelevel
607 SUBROUTINE plbforwardfinalizelevel( lvl, ammaster )
616 IF ( doblasonly )
THEN
617 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel BLAS only';
CALL fl(ofu)
622 IF ( ammaster .AND. (pblas%msmap%nslaves .GT. 1) )
THEN
623 CALL masterbcastnextop( op_done )
626 IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
627 (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) )
THEN
628 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel pariah !level-barrier';
CALL fl(ofu)
630 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel level-barrier';
CALL fl(ofu)
631 CALL blacs_barrier( blacs%levelcontext,
'All' )
632 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel level grid exit';
CALL fl(ofu)
633 CALL blacs_gridexit( blacs%levelcontext )
636 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel main-barrier';
CALL fl(ofu)
637 CALL blacs_barrier( blacs%maincontext,
'All' )
640 pblas%msmap%masterrank = -1
641 pblas%msmap%nslaves = 0
642 DEALLOCATE( pblas%msmap%slaveranks )
643 DEALLOCATE( blacs%pgrid%map )
648 IF(kpdbg)
WRITE(ofu,*)
'PLBForwardFinalizeLevel ', ammaster;
CALL fl(ofu)
650 END SUBROUTINE plbforwardfinalizelevel
658 SUBROUTINE plbbackwardinitializelevel( lvl, ammaster )
667 END SUBROUTINE plbbackwardinitializelevel
674 SUBROUTINE plbbackwardfinalizelevel( lvl, ammaster )
683 END SUBROUTINE plbbackwardfinalizelevel
691 SUBROUTINE determinemasterslaveranks()
695 LOGICAL,
ALLOCATABLE :: send_ammaster(:), recv_ammaster(:), assigned(:)
696 TYPE(MasterToSlaveMapping),
ALLOCATABLE :: allmsmaps(:)
697 INTEGER :: totmasters, nslavespermaster, adjustednslavespermaster, tempmaster
698 INTEGER :: mpi_err, I, J, masterindex
700 IF(kpdbg)
WRITE(ofu,*)
'DetermineMasterSlaveRanks started';
CALL fl(ofu)
704 ALLOCATE( send_ammaster(1:1) )
705 ALLOCATE( recv_ammaster(1:nranks) )
706 send_ammaster(1) = pblas%ammaster
707 ALLOCATE( assigned(1:nranks) )
709 assigned( i ) = .false.
711 ALLOCATE( allmsmaps(1:nranks) )
713 allmsmaps(i)%masterrank = -1
714 allmsmaps(i)%nslaves = 0
718 CALL mpi_allgather( send_ammaster, 1, mpi_logical, &
719 recv_ammaster, 1, mpi_logical, &
726 IF(kpdbg)
WRITE(ofu,*)
' recv_ammaster ',i,
' ',recv_ammaster(i);
CALL fl(ofu)
727 IF ( recv_ammaster(i) )
THEN
728 totmasters = totmasters + 1
732 IF ( totmasters .LE. 0 )
THEN
733 IF(kpdbg)
WRITE(ofu,*)
'Total masters must be greater than 0';
CALL fl(ofu)
737 nslavespermaster = (nranks / totmasters)
738 IF(kpdbg)
WRITE(ofu,*)
'Avg nslavespermaster',nslavespermaster;
CALL fl(ofu)
743 masterloop:
DO i = 1, nranks
744 IF ( recv_ammaster(i) )
THEN
746 tempmaster = tempmaster + 1
750 adjustednslavespermaster = nslavespermaster
751 IF ( tempmaster .LE. mod( nranks, totmasters ) )
THEN
752 adjustednslavespermaster = adjustednslavespermaster + 1
755 IF(kpdbg)
WRITE(ofu,*)
'Adjusted nslavespm',adjustednslavespermaster;
CALL fl(ofu)
756 allmsmaps(i)%masterrank = i-1
757 ALLOCATE( allmsmaps(i)%slaveranks(1:adjustednslavespermaster) )
762 allmsmaps(i)%nslaves = 1
763 allmsmaps(i)%slaveranks(1) = i-1
767 slaveloop:
DO j = 1, nranks
768 IF ( allmsmaps(i)%nslaves .GE. adjustednslavespermaster )
THEN
771 IF ( (.NOT. assigned(j)) .AND. (.NOT. recv_ammaster(j)) )
THEN
773 allmsmaps(j)%masterrank = i-1
774 allmsmaps(i)%nslaves = allmsmaps(i)%nslaves + 1
775 allmsmaps(i)%slaveranks(allmsmaps(i)%nslaves) = j-1
780 IF(kpdbg)
WRITE(ofu,*)
'Computed all mappings';
CALL fl(ofu)
785 IF ( allmsmaps(masterindex)%masterrank .NE. rank )
THEN
786 masterindex = allmsmaps(masterindex)%masterrank+1
788 pblas%msmap%masterrank = allmsmaps(masterindex)%masterrank
789 pblas%msmap%nslaves = allmsmaps(masterindex)%nslaves
790 ALLOCATE(pblas%msmap%slaveranks(1:allmsmaps(masterindex)%nslaves))
791 pblas%msmap%slaveranks(:) = allmsmaps(masterindex)%slaveranks(:)
793 IF(kpdbg)
WRITE(ofu,*)
'Extracted my mappings';
CALL fl(ofu)
794 IF(kpdbg)
WRITE(ofu,*)
'My master rank',masterindex-1;
CALL fl(ofu)
795 IF(kpdbg)
WRITE(ofu,*)
'Nslaves in my set',allmsmaps(masterindex)%nslaves;
CALL fl(ofu)
796 DO j = 1, allmsmaps(masterindex)%nslaves
797 IF(kpdbg)
WRITE(ofu,*)
'Slave',j,
' ',allmsmaps(masterindex)%slaveranks(j);
CALL fl(ofu)
803 IF ( recv_ammaster(i) )
THEN
804 DEALLOCATE( allmsmaps(i)%slaveranks )
807 DEALLOCATE( assigned )
808 DEALLOCATE( allmsmaps )
809 DEALLOCATE( send_ammaster )
810 DEALLOCATE( recv_ammaster )
812 IF(kpdbg)
WRITE(ofu,*)
'DetermineMasterSlaveRanks done';
CALL fl(ofu)
813 END SUBROUTINE determinemasterslaveranks
821 SUBROUTINE masterbcastnextop( nextop )
825 INTEGER,
INTENT(IN) :: nextop
829 INTEGER :: K, destrank
830 REAL(dp) :: realnextop
832 realnextop = real(nextop)
833 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastNextOp started ', nextop;
CALL fl(ofu)
834 CALL masterbcastvalue( realnextop )
835 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastNextOp done ', nextop;
CALL fl(ofu)
836 END SUBROUTINE masterbcastnextop
843 SUBROUTINE slavegetnextop( nextop )
847 REAL(dp) :: realnextop
848 INTEGER,
INTENT(INOUT) :: nextop
850 IF(kpdbg)
WRITE(ofu,*)
'SlaveGetNextOp started';
CALL fl(ofu)
851 CALL slavereceivevalue( realnextop )
852 nextop = int( realnextop )
853 IF(kpdbg)
WRITE(ofu,*)
'SlaveGetNextOp done ', nextop;
CALL fl(ofu)
854 END SUBROUTINE slavegetnextop
861 SUBROUTINE slaveservice()
868 IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
869 (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) )
THEN
870 IF(kpdbg)
WRITE(ofu,*)
'SlaveService pariah falling out ';
CALL fl(ofu)
872 IF(kpdbg)
WRITE(ofu,*)
'SlaveService started ';
CALL fl(ofu)
873 oploop:
DO WHILE (.true.)
875 CALL slavegetnextop( nextop )
876 IF ( nextop .EQ. op_done )
THEN
878 ELSE IF ( nextop .EQ. op_dgemm )
THEN
880 ELSE IF ( nextop .EQ. op_dgetrf )
THEN
882 ELSE IF ( nextop .EQ. op_dgetrs )
THEN
885 IF(kpdbg)
WRITE(ofu,*)
'Bad Next Op', nextop;
CALL fl(ofu)
889 IF(kpdbg)
WRITE(ofu,*)
'SlaveService done ';
CALL fl(ofu)
891 END SUBROUTINE slaveservice
898 SUBROUTINE extractsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
899 A, nrows, ncols, subA, subnrows, subncols )
903 INTEGER,
INTENT(IN) :: bszr, bszc
904 INTEGER,
INTENT(IN) :: pnr, pnc
905 INTEGER,
INTENT(IN) :: pi, pj
906 REAL(dp),
INTENT(IN) :: A(:,:)
907 INTEGER,
INTENT(IN) :: nrows, ncols
908 REAL(dp),
INTENT(OUT) :: subA(:)
909 INTEGER,
INTENT(IN) :: subnrows, subncols
913 INTEGER :: I, J, K, Q, R
914 INTEGER :: thisblkr, thisblkc
916 IF(kpdbg)
WRITE(ofu,*)
'ExtractSubMatrix NR=', subnrows,
' NC=', subncols;
CALL fl(ofu)
918 CALL bsystemclock(pstats%extract%t1)
921 DO j = 1, ncols, bszc
922 thisblkc = (j-1) / bszc
923 IF ( mod(thisblkc, pnc) .EQ. pj-1 )
THEN
925 IF ( j+r-1 .LE. ncols )
THEN
926 DO i = 1, nrows, bszr
927 thisblkr = (i-1) / bszr
928 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
930 IF ( i+q-1 .LE. nrows )
THEN
932 suba(k) = a(i+q-1,j+r-1)
943 IF ( k .NE. subnrows*subncols )
THEN
944 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
945 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows,
' subnc=', subncols;
CALL fl(ofu)
949 CALL bsystemclock(pstats%extract%t2)
950 CALL chargetime( pstats%extract%tm, pstats%extract%t2, pstats%extract%t1, pstats%extract%cnt )
952 IF(kpdbg)
WRITE(ofu,*)
'ExtractSubMatrix done K', k;
CALL fl(ofu)
954 END SUBROUTINE extractsubmatrix
963 SUBROUTINE mastersendmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
967 REAL(dp),
INTENT(IN) :: A(:,:)
968 INTEGER,
INTENT(IN) :: nrows, ncols
969 REAL(dp),
INTENT(OUT) :: ssubA(:)
970 INTEGER,
INTENT(IN) :: ssubnrows, ssubncols
975 INTEGER :: aslaverank
976 TYPE(PBLASTempArray),
ALLOCATABLE :: subA(:,:)
977 INTEGER :: subnrows, subncols
978 INTEGER :: maxsubnrows, maxsubncols
979 INTEGER :: bszr, bszc
982 INTEGER,
EXTERNAL :: NUMROC
983 INTEGER :: mpistatus(MPI_STATUS_SIZE)
987 INTEGER :: mpinisends
988 INTEGER,
ALLOCATABLE :: mpireqs(:)
990 INTEGER,
ALLOCATABLE :: mpistatuses(:,:)
991 INTEGER :: waitmatchindex
994 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix started';
CALL fl(ofu)
995 CALL bsystemclock(pstats%comm%t1)
998 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
999 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1003 ALLOCATE( mpireqs( pnr * pnc ) )
1004 ALLOCATE( mpistatuses( pnr * pnc, mpi_status_size ) )
1007 maxsubnrows = numroc( nrows, bszr, 0, 0, pnr )
1008 maxsubncols = numroc( ncols, bszc, 0, 0, pnc )
1009 ALLOCATE( suba( 1 : pnr, 1 : pnc ) )
1013 aslaverank = blacs%pgrid%map(i,j)
1015 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1016 subncols = numroc( ncols, bszc, j-1, 0, pnc )
1018 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix to ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1020 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1022 IF ( aslaverank .NE. rank )
THEN
1023 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1026 IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) )
THEN
1027 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1028 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows,
' SSNC ', ssubncols;
CALL fl(ofu)
1029 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows,
' SNC ', subncols;
CALL fl(ofu)
1034 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracting self submatrix';
CALL fl(ofu)
1035 CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1036 i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1037 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix kept self submatrix';
CALL fl(ofu)
1040 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracting submatrix',i,j;
CALL fl(ofu)
1041 ALLOCATE( suba(i,j)%temparray( subnrows*subncols ) )
1042 CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1043 i, j, a, nrows, ncols, suba(i,j)%temparray, &
1045 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix extracted submatrix';
CALL fl(ofu)
1046 IF ( doblacscomm )
THEN
1047 CALL dgesd2d( blacs%levelcontext, subnrows, subncols, &
1048 suba(i,j)%temparray, subnrows, i-1, j-1 )
1050 mpinisends = mpinisends + 1
1051 CALL mpi_isend( suba(i,j)%temparray, subnrows*subncols, mpi_real8, &
1052 aslaverank, pblas%mpitag, pblas%mpicomm, mpireqs(mpinisends), mpierr )
1054 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix sent slave submatrix';
CALL fl(ofu)
1055 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix deallocated temp submatrix';
CALL fl(ofu)
1060 CALL bsystemclock(pstats%waitall%t1)
1061 CALL mpi_waitall( mpinisends, mpireqs, mpistatuses, mpierr )
1062 CALL bsystemclock(pstats%waitall%t2)
1063 waitall_time=waitall_time+(pstats%waitall%t2-pstats%waitall%t1)
1064 CALL chargetime( pstats%waitall%tm, pstats%waitall%t2, pstats%waitall%t1, pstats%waitall%cnt )
1068 IF ( .NOT. (i .EQ. 1 .AND. j .EQ. 1) )
THEN
1069 DEALLOCATE( suba(i,j)%temparray )
1074 DEALLOCATE( mpireqs )
1075 DEALLOCATE( mpistatuses )
1077 CALL bsystemclock(pstats%comm%t2)
1078 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1080 IF(kpdbg)
WRITE(ofu,*)
'MasterSendMatrix done';
CALL fl(ofu)
1082 END SUBROUTINE mastersendmatrix
1089 SUBROUTINE slavereceivematrix( nrows, ncols, subA, subnrows, subncols )
1093 #if defined(MPI_OPT)
1094 INTEGER :: mpistatus(MPI_STATUS_SIZE)
1096 INTEGER,
INTENT(IN) :: nrows, ncols
1097 REAL(dp),
INTENT(OUT) :: subA(:)
1098 INTEGER,
INTENT(IN) :: subnrows, subncols
1102 INTEGER :: masterrank
1105 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveMatrix started ',subnrows,
' ',subncols;
CALL fl(ofu)
1107 #if defined(MPI_OPT)
1108 masterrank = blacs%pgrid%map(1,1)
1110 CALL bsystemclock(pstats%comm%t1)
1112 IF ( doblacscomm )
THEN
1113 CALL dgerv2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1115 CALL mpi_recv( suba, subnrows*subncols, mpi_real8, &
1116 masterrank, pblas%mpitag, pblas%mpicomm, mpistatus, mpierr )
1119 CALL bsystemclock(pstats%comm%t2)
1120 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1123 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveMatrix done';
CALL fl(ofu)
1124 END SUBROUTINE slavereceivematrix
1131 SUBROUTINE masterbcastvalue( val )
1136 REAL(dp),
INTENT(IN) :: val
1138 #if defined(MPI_OPT)
1139 CALL dgebs2d( blacs%levelcontext,
'All',
' ', 1, 1, val, 1 );
1141 IF(kpdbg)
WRITE(ofu,*)
'MasterBcastValue bcast to slaves';
CALL fl(ofu)
1143 END SUBROUTINE masterbcastvalue
1150 SUBROUTINE slavereceivevalue( val )
1155 REAL(dp),
INTENT(OUT) :: val
1157 #if defined(MPI_OPT)
1158 CALL dgebr2d( blacs%levelcontext,
'All',
' ', 1, 1, val, 1, 0, 0 )
1160 IF(kpdbg)
WRITE(ofu,*)
'SlaveReceiveValue bcast from master'
1163 END SUBROUTINE slavereceivevalue
1170 SUBROUTINE injectsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
1171 A, nrows, ncols, subA, subnrows, subncols )
1175 INTEGER,
INTENT(IN) :: bszr, bszc
1176 INTEGER,
INTENT(IN) :: pnr, pnc
1177 INTEGER,
INTENT(IN) :: pi, pj
1178 REAL(dp),
INTENT(OUT) :: A(:,:)
1179 INTEGER,
INTENT(IN) :: nrows, ncols
1180 REAL(dp),
INTENT(IN) :: subA(:)
1181 INTEGER,
INTENT(IN) :: subnrows, subncols
1185 INTEGER :: I, J, K, Q, R
1186 INTEGER :: thisblkr, thisblkc
1188 IF(kpdbg)
WRITE(ofu,*)
'InjectSubMatrix NR=', subnrows,
' NC=', subncols;
CALL fl(ofu)
1191 DO j = 1, ncols, bszc
1192 thisblkc = (j-1) / bszc
1193 IF ( mod(thisblkc, pnc) .EQ. pj-1 )
THEN
1195 IF ( j+r-1 .LE. ncols )
THEN
1196 DO i = 1, nrows, bszr
1197 thisblkr = (i-1) / bszr
1198 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
1200 IF ( i+q-1 .LE. nrows )
THEN
1202 a(i+q-1,j+r-1) = suba(k)
1213 IF ( k .NE. subnrows*subncols )
THEN
1214 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
1215 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows,
' subnc=', subncols;
CALL fl(ofu)
1219 IF(kpdbg)
WRITE(ofu,*)
'InjectSubMatrix done K', k;
CALL fl(ofu)
1221 END SUBROUTINE injectsubmatrix
1228 SUBROUTINE injectsubvector( bszr, pnr, pi, V, nrows, subV, subnrows )
1232 INTEGER,
INTENT(IN) :: bszr
1233 INTEGER,
INTENT(IN) :: pnr
1234 INTEGER,
INTENT(IN) :: pi
1235 INTEGER,
INTENT(OUT) :: V(:)
1236 INTEGER,
INTENT(IN) :: nrows
1237 INTEGER,
INTENT(IN) :: subV(:)
1238 INTEGER,
INTENT(IN) :: subnrows
1245 IF(kpdbg)
WRITE(ofu,*)
'InjectSubVector NR=', subnrows;
CALL fl(ofu)
1248 DO i = 1, nrows, bszr
1249 thisblkr = (i-1) / bszr
1250 IF ( mod(thisblkr, pnr) .EQ. pi-1 )
THEN
1252 IF ( i+q-1 .LE. nrows )
THEN
1261 IF ( k .NE. subnrows )
THEN
1262 IF(kpdbg)
WRITE(ofu,*)
'Sanity check failed ';
CALL fl(ofu)
1263 IF(kpdbg)
WRITE(ofu,*)
'K=', k,
' subnr=', subnrows;
CALL fl(ofu)
1267 IF(kpdbg)
WRITE(ofu,*)
'InjectSubVector done K', k;
CALL fl(ofu)
1269 END SUBROUTINE injectsubvector
1278 SUBROUTINE masterrecvmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1282 REAL(dp),
INTENT(OUT) :: A(:,:)
1283 INTEGER,
INTENT(IN) :: nrows, ncols
1284 REAL(dp),
INTENT(IN) :: ssubA(:)
1285 INTEGER,
INTENT(IN) :: ssubnrows, ssubncols
1290 INTEGER :: aslaverank
1291 REAL(dp),
ALLOCATABLE :: subA(:)
1292 INTEGER :: subnrows, subncols
1293 INTEGER :: bszr, bszc
1295 #if defined(MPI_OPT)
1296 INTEGER,
EXTERNAL :: NUMROC
1299 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix started';
CALL fl(ofu)
1300 CALL bsystemclock(pstats%comm%t1)
1302 #if defined(MPI_OPT)
1303 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1304 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1308 aslaverank = blacs%pgrid%map(i,j)
1310 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1311 subncols = numroc( ncols, bszc, j-1, 0, pnc )
1313 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix from ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1315 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1317 IF ( aslaverank .NE. rank )
THEN
1318 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1321 IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) )
THEN
1322 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1323 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows,
' SSNC ', ssubncols;
CALL fl(ofu)
1324 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows,
' SNC ', subncols;
CALL fl(ofu)
1329 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injecting self submatrix';
CALL fl(ofu)
1330 CALL injectsubmatrix(bszr, bszc, pnr, pnc, &
1331 i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1332 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix kept self submatrix';
CALL fl(ofu)
1335 ALLOCATE( suba( 1 : subnrows*subncols ) )
1336 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix receiving slave submatrix';
CALL fl(ofu)
1337 CALL bsystemclock(pstats%wait%t1)
1338 CALL dgerv2d( blacs%levelcontext, subnrows, subncols, &
1339 suba, subnrows, i-1, j-1 )
1340 CALL bsystemclock(pstats%wait%t2)
1341 CALL chargetime( pstats%wait%tm, pstats%wait%t2, pstats%wait%t1, pstats%wait%cnt )
1342 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injecting submatrix',i,j;
CALL fl(ofu)
1343 CALL injectsubmatrix( bszr, bszc, pnr, pnc, &
1344 i, j, a, nrows, ncols, suba, subnrows, subncols )
1345 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix injected submatrix';
CALL fl(ofu)
1347 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix deallocated temp submatrix';
CALL fl(ofu)
1352 CALL bsystemclock(pstats%comm%t2)
1353 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1356 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvMatrix done';
CALL fl(ofu)
1358 END SUBROUTINE masterrecvmatrix
1365 SUBROUTINE slavesendmatrix( nrows, ncols, subA, subnrows, subncols )
1369 INTEGER,
INTENT(IN) :: nrows, ncols
1370 REAL(dp),
INTENT(IN) :: subA(:)
1371 INTEGER,
INTENT(IN) :: subnrows, subncols
1373 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendMatrix started ',subnrows,
' ',subncols;
CALL fl(ofu)
1375 CALL bsystemclock(pstats%comm%t1)
1376 #if defined(MPI_OPT)
1377 CALL dgesd2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1379 CALL bsystemclock(pstats%comm%t2)
1380 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1382 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendMatrix done';
CALL fl(ofu)
1383 END SUBROUTINE slavesendmatrix
1392 SUBROUTINE masterrecvvector( V, nrows, ssubV, ssubnrows )
1396 INTEGER,
INTENT(OUT) :: V(:)
1397 INTEGER,
INTENT(IN) :: nrows
1398 INTEGER,
INTENT(IN) :: ssubV(:)
1399 INTEGER,
INTENT(IN) :: ssubnrows
1404 INTEGER :: aslaverank
1405 INTEGER,
ALLOCATABLE :: subV(:)
1409 #if defined(MPI_OPT)
1410 INTEGER,
EXTERNAL :: NUMROC
1413 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector started';
CALL fl(ofu)
1414 CALL bsystemclock(pstats%comm%t1)
1416 #if defined(MPI_OPT)
1417 bszr = blacs%pgrid%blockszrows
1418 pnr = blacs%pgrid%nrows
1422 aslaverank = blacs%pgrid%map(i,j)
1424 subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1426 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector from ',i,
' ',j,
' ',aslaverank;
CALL fl(ofu)
1428 IF ( i .EQ. 1 .AND. j .EQ. 1 )
THEN
1430 IF ( aslaverank .NE. rank )
THEN
1431 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in slave rank of master';
CALL fl(ofu)
1434 IF ( ssubnrows .NE. subnrows )
THEN
1435 IF(kpdbg)
WRITE(ofu,*)
'Inconsistency in ssub dimensions';
CALL fl(ofu)
1436 IF(kpdbg)
WRITE(ofu,*)
'SSNR ', ssubnrows;
CALL fl(ofu)
1437 IF(kpdbg)
WRITE(ofu,*)
'SNR ', subnrows;
CALL fl(ofu)
1442 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injecting self subvector';
CALL fl(ofu)
1443 CALL injectsubvector(bszr, pnr, i, v, nrows, ssubv, subnrows)
1444 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector kept self subvector';
CALL fl(ofu)
1447 ALLOCATE( subv( 1 : subnrows ) )
1448 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector receiving slave subvector';
CALL fl(ofu)
1449 CALL igerv2d( blacs%levelcontext, subnrows, 1, subv, subnrows, i-1, 0 )
1450 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injecting subvector',i,j;
CALL fl(ofu)
1451 CALL injectsubvector( bszr, pnr, i, v, nrows, subv, subnrows )
1452 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector injected subvector';
CALL fl(ofu)
1454 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector deallocated temp subvector';
CALL fl(ofu)
1459 CALL bsystemclock(pstats%comm%t2)
1460 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1463 IF(kpdbg)
WRITE(ofu,*)
'MasterRecvVector done';
CALL fl(ofu)
1465 END SUBROUTINE masterrecvvector
1474 SUBROUTINE slavesendvector( nrows, subV, subnrows )
1478 INTEGER,
INTENT(IN) :: nrows
1479 INTEGER,
INTENT(IN) :: subV(:)
1480 INTEGER,
INTENT(IN) :: subnrows
1483 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector started ', subnrows;
CALL fl(ofu)
1485 CALL bsystemclock(pstats%comm%t1)
1487 pj = blacs%pgrid%mycol
1488 IF ( pj .GT. 0 )
THEN
1489 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector skipping since mycol>0 ',pj;
CALL fl(ofu)
1491 #if defined(MPI_OPT)
1492 CALL igesd2d( blacs%levelcontext, subnrows, 1, subv, subnrows, 0, 0 )
1496 CALL bsystemclock(pstats%comm%t2)
1497 CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1499 IF(kpdbg)
WRITE(ofu,*)
'SlaveSendVector done';
CALL fl(ofu)
1500 END SUBROUTINE slavesendvector
1507 SUBROUTINE plbdgemm( alpha, A, B, beta, C )
1511 REAL(dp),
INTENT(IN) :: alpha, beta
1512 REAL(dp),
INTENT(IN) :: A(:,:), B(:,:)
1513 REAL(dp),
INTENT(INOUT) :: C(:,:)
1517 REAL(dp),
ALLOCATABLE :: subA(:), subB(:), subC(:)
1518 INTEGER :: descA(9), descB(9), descC(9)
1519 INTEGER :: lldA, lldB, lldC
1520 INTEGER :: nrows, ncols, subnrows, subncols
1521 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1523 INTEGER :: ctxt, info
1524 #if defined(MPI_OPT)
1525 INTEGER,
EXTERNAL :: NUMROC
1527 REAL(dp) :: ton, toff
1530 CALL bsystemclock(skston)
1532 IF(kpdbg)
WRITE(ofu,*)
'Using direct diagonal matrix mul';
CALL fl(ofu)
1533 CALL bsystemclock(ton)
1535 c(row,1)=alpha*a(row,1)*b(row,1)+beta*c(row,1)
1537 CALL bsystemclock(toff)
1538 dgemm_time = dgemm_time + (toff-ton)
1540 CALL bsystemclock(skstoff)
1542 END SUBROUTINE plbdgemm
1549 SUBROUTINE slavedgemm()
1553 REAL(dp),
ALLOCATABLE :: subA(:), subB(:), subC(:)
1554 REAL(dp) :: alpha, beta
1555 INTEGER :: descA(9), descB(9), descC(9)
1556 INTEGER :: lldA, lldB, lldC
1557 INTEGER :: nrows, ncols, subnrows, subncols
1558 INTEGER :: bszr, bszc, pnr, pnc
1560 INTEGER :: ctxt, info
1561 #if defined(MPI_OPT)
1562 INTEGER,
EXTERNAL :: NUMROC
1565 CALL bsystemclock(pstats%mm%t1)
1567 #if defined(MPI_OPT)
1568 nrows = m; ncols = m
1569 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1570 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1571 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1573 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1574 subncols = numroc( ncols, bszc, pj, 0, pnc )
1576 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM allocating subABC';
CALL fl(ofu)
1577 ALLOCATE( suba(1:subnrows*subncols) )
1578 ALLOCATE( subb(1:subnrows*subncols) )
1579 ALLOCATE( subc(1:subnrows*subncols) )
1580 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM allocated subABC';
CALL fl(ofu)
1582 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM desciniting subABC';
CALL fl(ofu)
1583 ctxt = blacs%levelcontext
1584 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1585 lldb = llda; lldc = llda
1586 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1587 CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1588 CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1589 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM desciniting subABC';
CALL fl(ofu)
1591 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving A';
CALL fl(ofu)
1592 CALL bsystemclock(pstats%mma%t1)
1593 CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1594 CALL bsystemclock(pstats%mma%t2)
1595 CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1597 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving B';
CALL fl(ofu)
1598 CALL bsystemclock(pstats%mmb%t1)
1599 CALL slavereceivematrix( nrows, ncols, subb, subnrows, subncols )
1600 CALL bsystemclock(pstats%mmb%t2)
1601 CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1603 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving C';
CALL fl(ofu)
1604 CALL bsystemclock(pstats%mmc%t1)
1605 CALL slavereceivematrix( nrows, ncols, subc, subnrows, subncols )
1606 CALL bsystemclock(pstats%mmc%t2)
1607 CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1609 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving alpha';
CALL fl(ofu)
1610 CALL bsystemclock(pstats%mmalpha%t1)
1611 CALL slavereceivevalue( alpha )
1612 CALL bsystemclock(pstats%mmalpha%t2)
1613 CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1615 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM receiving beta';
CALL fl(ofu)
1616 CALL bsystemclock(pstats%mmbeta%t1)
1617 CALL slavereceivevalue( beta )
1618 CALL bsystemclock(pstats%mmbeta%t2)
1619 CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1621 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM invoking PDGEMM';
CALL fl(ofu)
1622 CALL bsystemclock(pstats%comp%t1)
1623 CALL pdgemm(
'N',
'N', m, m, m, alpha, &
1624 suba, 1, 1, desca, &
1625 subb, 1, 1, descb, &
1628 CALL bsystemclock(pstats%comp%t2)
1629 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1630 CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1631 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM done PDGEMM';
CALL fl(ofu)
1633 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM sending result matrix to master';
CALL fl(ofu)
1634 CALL bsystemclock(pstats%mmrc%t1)
1635 CALL slavesendmatrix( nrows, ncols, subc, subnrows, subncols )
1636 CALL bsystemclock(pstats%mmrc%t2)
1637 CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1638 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM sent result matrix to master';
CALL fl(ofu)
1640 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM deallocating subABC';
CALL fl(ofu)
1644 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGEMM deallocated subABC';
CALL fl(ofu)
1647 CALL bsystemclock(pstats%mm%t2)
1648 CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1650 END SUBROUTINE slavedgemm
1657 SUBROUTINE plbdgemv( alpha, A, x, beta, y )
1661 REAL(dp),
INTENT(IN) :: alpha, beta
1662 REAL(dp),
INTENT(IN) :: A(:,:)
1663 REAL(dp),
INTENT(IN) :: x(:)
1664 REAL(dp),
INTENT(INOUT) :: y(:)
1667 REAL(dp) :: ton, toff
1669 CALL bsystemclock(ton)
1671 y(i) = alpha*a(i,1)*x(i) + beta*y(i)
1673 CALL bsystemclock(toff)
1674 dgemv_time = dgemv_time + (toff-ton)
1675 END SUBROUTINE plbdgemv
1682 SUBROUTINE plbdgetrf( A, piv, info )
1686 REAL(dp),
INTENT(INOUT) :: A(:,:)
1687 INTEGER,
INTENT(OUT) :: piv(:)
1688 INTEGER,
INTENT(INOUT) :: info
1692 REAL(dp),
ALLOCATABLE :: subA(:)
1693 INTEGER,
ALLOCATABLE :: subpiv(:)
1696 INTEGER :: nrows, ncols, subnrows, subncols
1697 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1700 #if defined(MPI_OPT)
1701 INTEGER,
EXTERNAL :: NUMROC
1703 REAL(dp) :: ton, toff
1707 END SUBROUTINE plbdgetrf
1714 SUBROUTINE slavedgetrf()
1718 REAL(dp),
ALLOCATABLE :: subA(:)
1719 INTEGER,
ALLOCATABLE :: subpiv(:)
1722 INTEGER :: nrows, ncols, subnrows, subncols
1723 INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1724 INTEGER :: ctxt, info
1725 #if defined(MPI_OPT)
1726 INTEGER,
EXTERNAL :: NUMROC
1729 CALL bsystemclock(pstats%trf%t1)
1731 #if defined(MPI_OPT)
1732 nrows = m; ncols = m
1733 bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1734 pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1735 pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1736 subnrows = numroc( nrows, bszr, pi, 0, pnr )
1737 subncols = numroc( ncols, bszc, pj, 0, pnc )
1739 ctxt = blacs%levelcontext
1740 llda = subnrows;
IF( llda .LT. 1 ) llda = 1
1741 CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1743 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF allocating subAPiv';
CALL fl(ofu)
1744 ALLOCATE( suba(1:subnrows*subncols) )
1745 ALLOCATE( subpiv(1:subnrows+bszr) )
1746 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF allocated subAPiv';
CALL fl(ofu)
1748 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF receiving A submatrix';
CALL fl(ofu)
1749 CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1750 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF received A submatrix';
CALL fl(ofu)
1752 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF invoking PDGETRF';
CALL fl(ofu)
1753 CALL bsystemclock(pstats%comp%t1)
1754 CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1755 CALL bsystemclock(pstats%comp%t2)
1756 CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1757 CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1758 IF(kpdbg)
WRITE(ofu,*)
'MasterDGETRF done PDGETRF';
CALL fl(ofu)
1760 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sending result matrix to master';
CALL fl(ofu)
1761 CALL slavesendmatrix( nrows, ncols, suba, subnrows, subncols )
1762 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sent result matrix to master';
CALL fl(ofu)
1763 CALL slavesendvector( nrows, subpiv, subnrows )
1764 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF sent result vector to master';
CALL fl(ofu)
1766 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF deallocating subAPiv';
CALL fl(ofu)
1767 DEALLOCATE( subpiv )
1769 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRF deallocated subAPiv';
CALL fl(ofu)
1773 CALL bsystemclock(pstats%trf%t2)
1774 CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1776 END SUBROUTINE slavedgetrf
1783 SUBROUTINE plbdgetrs( nrhs, A, piv, B, info )
1787 INTEGER,
INTENT(IN) :: nrhs
1788 REAL(dp),
INTENT(IN) :: A(:,:)
1789 INTEGER,
INTENT(IN) :: piv(:)
1790 REAL(dp),
INTENT(INOUT) :: B(:,:)
1791 INTEGER :: info, i, j
1792 REAL(dp) :: ton, toff
1802 CALL bsystemclock(ton)
1805 IF (a(i,1).EQ.0) stop
'Inverting zero'
1806 b(i,j) = b(i,j)/a(i,1)
1810 CALL bsystemclock(toff)
1811 dgetrs_time = dgetrs_time + (toff-ton)
1812 END SUBROUTINE plbdgetrs
1819 SUBROUTINE slavedgetrs()
1821 IF(kpdbg)
WRITE(ofu,*)
'SlaveDGETRS not implemented';
CALL fl(ofu)
1824 END SUBROUTINE slavedgetrs
1833 SUBROUTINE initialize_bst( do_mpiinit, inN, inM )
1837 LOGICAL,
INTENT(IN) :: do_mpiinit
1838 INTEGER,
INTENT(IN) :: inN
1839 INTEGER,
INTENT(IN) :: inM
1844 INTEGER :: nrowsperrank
1845 INTEGER :: nspillrows
1848 INTEGER :: globrowoff
1855 use_mpiwtime = .false.
1856 #if defined(MPI_OPT)
1857 IF ( do_mpiinit )
THEN
1858 CALL mpi_init( mpierr )
1862 use_mpiwtime = .true.
1871 IF ( p .GT. n )
THEN
1872 IF(kpdbg)
WRITE(ofu,*)
'Detected extra ', p-n,
' ranks';
CALL fl(ofu)
1884 kpdbg=.false.; kenvvar=
'KPDBG'
1885 CALL getenv(kenvvar,kenvval)
1886 IF (kenvval.NE.
'')
THEN
1887 IF (kenvval.EQ.
'TRUE')
THEN
1895 IF(kpdbg)
WRITE(ofu,*)
'Rank ', rank,
' NRanks ', nranks;
CALL fl(ofu)
1896 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
1897 IF(kpdbg)
WRITE(ofu,*)
'------ Initialization start ------';
CALL fl(ofu)
1898 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
1901 IF ( n .LT. 1 )
THEN
1902 IF(kpdbg)
WRITE(ofu,*)
'Bad N', n;
CALL fl(ofu)
1911 nspillrows = mod(n, p)
1912 IF ( rank .LT. nspillrows )
THEN
1913 startglobrow = rank*nrowsperrank + rank + 1
1914 endglobrow = startglobrow + nrowsperrank
1915 ELSE IF ( rank .LT. p )
THEN
1916 startglobrow = rank*nrowsperrank + nspillrows + 1
1917 endglobrow = startglobrow + nrowsperrank - 1
1926 lognbase2 = log(real(n))/log(2.0)
1927 nlevels = 1 + int(lognbase2)
1928 IF ( real(int(lognbase2)) .NE. lognbase2 )
THEN
1929 nlevels = nlevels + 1
1931 IF(kpdbg)
WRITE(ofu,*)
'NLEVELS=', nlevels;
CALL fl(ofu)
1941 usebarriers = .false.
1944 writeproblemfile = .false.
1945 writesolution = .false.
1971 IF (.NOT.
ALLOCATED(lelement))
ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
1973 CALL chargememory( nlevels*(endglobrow-startglobrow+3)*5*ptrsz )
1976 IF (.NOT.
ALLOCATED(selement))
ALLOCATE( selement(n) )
1978 CALL chargememory( n*ptrsz )
1984 DO globrow = startglobrow, endglobrow, 1
1985 globrowoff = globrow-startglobrow+1
1986 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%L))
ALLOCATE( lelement(1, globrowoff)%L(m,1) )
1987 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%D))
ALLOCATE( lelement(1, globrowoff)%D(m,1) )
1988 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%U))
ALLOCATE( lelement(1, globrowoff)%U(m,1) )
1989 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%b))
ALLOCATE( lelement(1, globrowoff)%b(m,1) )
1990 IF(.NOT.
ALLOCATED(lelement(1,globrowoff)%pivot))
ALLOCATE( lelement(1, globrowoff)%pivot(m) )
1991 lelement(1, globrowoff)%L = 0
1992 lelement(1, globrowoff)%D = 0
1993 lelement(1, globrowoff)%U = 0
1994 lelement(1, globrowoff)%b = 0
1995 lelement(1, globrowoff)%pivot = 0
2002 IF(.NOT.
ALLOCATED(orig))
ALLOCATE( orig(endglobrow-startglobrow+1) )
2003 DO globrow = startglobrow, endglobrow, 1
2004 globrowoff = globrow-startglobrow+1
2005 IF(.NOT.
ALLOCATED(orig(globrowoff)%L))
ALLOCATE( orig(globrowoff)%L(m,1) )
2006 IF(.NOT.
ALLOCATED(orig(globrowoff)%D))
ALLOCATE( orig(globrowoff)%D(m,1) )
2007 IF(.NOT.
ALLOCATED(orig(globrowoff)%U))
ALLOCATE( orig(globrowoff)%U(m,1) )
2008 IF(.NOT.
ALLOCATED(orig(globrowoff)%b))
ALLOCATE( orig(globrowoff)%b(m,1) )
2010 orig(globrowoff)%L = 0
2011 orig(globrowoff)%D = 0
2012 orig(globrowoff)%U = 0
2013 orig(globrowoff)%b = 0
2017 CALL plbinitialize()
2020 IF(kpdbg)
WRITE(ofu,*)
' P=',p,
' N=',n,
' M=',m;
CALL fl(ofu)
2021 IF(kpdbg)
WRITE(ofu,*)
'SROW=',startglobrow,
' EROW=', endglobrow;
CALL fl(ofu)
2022 IF(kpdbg)
WRITE(ofu,*)
'------ Initialization end ------';
CALL fl(ofu)
2023 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
2024 END SUBROUTINE initialize_bst
2032 SUBROUTINE setmatrixrowcoll_bst( globrow, Lj )
2036 INTEGER,
INTENT(IN) :: globrow
2037 REAL(dp),
INTENT(IN) :: Lj(:)
2041 INTEGER :: i, globrowoff
2046 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2047 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Bad input globrow ',globrow;
CALL fl(ofu)
2050 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2051 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColL: Non-local globrow ',globrow;
CALL fl(ofu)
2055 globrowoff = globrow-startglobrow+1
2060 IF ( globrow .EQ. 1 )
THEN
2061 lelement(1, globrowoff)%L(:,1) = zero
2063 lelement(1, globrowoff)%L(:,1) = lj(:)
2070 orig(globrowoff)%L(:,1) = lelement(1,globrowoff)%L(:,1)
2075 END SUBROUTINE setmatrixrowcoll_bst
2083 SUBROUTINE setmatrixrowcold_bst( globrow, Dj )
2087 INTEGER,
INTENT(IN) :: globrow
2088 REAL(dp),
INTENT(IN) :: Dj(:)
2093 INTEGER :: i, globrowoff
2098 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2099 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColD: Bad input globrow ',globrow;
CALL fl(ofu)
2102 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2103 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColD: Non-local globrow ',globrow;
CALL fl(ofu)
2107 globrowoff = globrow-startglobrow+1
2112 lelement(1, globrowoff)%D(:,1) = dj(:)
2117 orig(globrowoff)%D(:,1) = lelement(1,globrowoff)%D(:,1)
2122 END SUBROUTINE setmatrixrowcold_bst
2131 SUBROUTINE setmatrixrowcolu_bst( globrow, Uj )
2135 INTEGER,
INTENT(IN) :: globrow
2136 REAL(dp),
INTENT(IN) :: Uj(:)
2140 INTEGER :: i, globrowoff
2145 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2146 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColU: Bad input globrow ',globrow;
CALL fl(ofu)
2149 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2150 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRowColU: Non-local globrow ',globrow;
CALL fl(ofu)
2154 globrowoff = globrow-startglobrow+1
2159 IF ( globrow .EQ. n )
THEN
2160 lelement(1, globrowoff)%U(:,1) = zero
2162 lelement(1, globrowoff)%U(:,1) = uj
2168 orig(globrowoff)%U(:,1) = lelement(1,globrowoff)%U(:,1)
2173 END SUBROUTINE setmatrixrowcolu_bst
2181 SUBROUTINE setmatrixrhs_bst( globrow, b )
2185 INTEGER,
INTENT(IN) :: globrow
2186 REAL(dp),
INTENT(IN) :: b(:)
2191 INTEGER :: i, globrowoff
2196 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2197 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRHS: Bad input globrow ',globrow;
CALL fl(ofu)
2200 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2201 IF(kpdbg)
WRITE(ofu,*)
'SetMatrixRHS: Non-local globrow ',globrow;
CALL fl(ofu)
2205 globrowoff = globrow-startglobrow+1
2212 lelement(1, globrowoff)%b(i,1) = val
2218 orig(globrowoff)%b = lelement(1,globrowoff)%b
2223 END SUBROUTINE setmatrixrhs_bst
2231 SUBROUTINE getmatrixrowcoll( globrow, Lj, j )
2235 INTEGER,
INTENT(IN) :: globrow
2236 REAL(dp),
INTENT(OUT) :: Lj(:)
2237 INTEGER,
INTENT(IN) :: j
2242 INTEGER :: i, globrowoff
2247 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2248 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad input globrow ',globrow;
CALL fl(ofu)
2251 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2252 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Non-local globrow ',globrow;
CALL fl(ofu)
2255 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2256 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2260 globrowoff = globrow-startglobrow+1
2266 IF ( globrow .EQ. 1 )
THEN
2269 val = lelement(1, globrowoff)%L(i,j)
2274 END SUBROUTINE getmatrixrowcoll
2281 SUBROUTINE getmatrixrowcold( globrow, Dj, j )
2285 INTEGER,
INTENT(IN) :: globrow
2286 REAL(dp),
INTENT(OUT) :: Dj(:)
2287 INTEGER,
INTENT(IN) :: j
2292 INTEGER :: i, globrowoff
2297 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2298 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Bad input globrow ',globrow;
CALL fl(ofu)
2301 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2302 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Non-local globrow ',globrow;
CALL fl(ofu)
2305 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2306 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColD: Bad j column ',j;
CALL fl(ofu)
2310 globrowoff = globrow-startglobrow+1
2316 val = lelement(1, globrowoff)%D(i,j)
2320 END SUBROUTINE getmatrixrowcold
2327 SUBROUTINE getmatrixrowcolu( globrow, Uj, j )
2331 INTEGER,
INTENT(IN) :: globrow
2332 REAL(dp),
INTENT(OUT) :: Uj(:)
2333 INTEGER,
INTENT(IN) :: j
2338 INTEGER :: i, globrowoff
2343 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2344 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColU: Bad input globrow ',globrow;
CALL fl(ofu)
2347 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2348 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColU: Non-local globrow ',globrow;
CALL fl(ofu)
2351 IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) )
THEN
2352 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRowColL: Bad j column ',j;
CALL fl(ofu)
2356 globrowoff = globrow-startglobrow+1
2362 IF ( globrow .EQ. n )
THEN
2365 val = lelement(1, globrowoff)%U(i,j)
2370 END SUBROUTINE getmatrixrowcolu
2377 SUBROUTINE getmatrixrhs( globrow, b )
2381 INTEGER,
INTENT(IN) :: globrow
2382 REAL(dp),
INTENT(OUT) :: b(:)
2387 INTEGER :: i, globrowoff
2392 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2393 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRHS: Bad input globrow ',globrow;
CALL fl(ofu)
2396 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2397 IF(kpdbg)
WRITE(ofu,*)
'GetMatrixRHS: Non-local globrow ',globrow;
CALL fl(ofu)
2401 globrowoff = globrow-startglobrow+1
2407 val = lelement(1, globrowoff)%b(i,1)
2411 END SUBROUTINE getmatrixrhs
2418 SUBROUTINE getsolutionvector_bst( globrow, x )
2422 INTEGER,
INTENT(IN) :: globrow
2423 REAL(dp),
INTENT(OUT) :: x(:)
2433 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2434 IF(kpdbg)
WRITE(ofu,*)
'SetSolutionVector: Bad input globrow ',globrow;
CALL fl(ofu)
2437 IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) )
THEN
2438 IF(kpdbg)
WRITE(ofu,*)
'SetSolutionVector: Non-local globrow ',globrow;
CALL fl(ofu)
2446 val = selement(globrow)%x(i)
2450 END SUBROUTINE getsolutionvector_bst
2457 SUBROUTINE setidentitytestcase
2461 INTEGER :: i, j, globrow, globrowoff
2463 IF(kpdbg)
WRITE(ofu,*)
'------ Using Identity Test Case ------';
CALL fl(ofu)
2468 DO globrow = startglobrow, endglobrow, 1
2469 globrowoff = globrow-startglobrow+1
2472 lelement(1, globrowoff)%L(i,j) = zero
2473 IF ( i .EQ. j )
THEN
2474 lelement(1, globrowoff)%D(i,j) = one
2476 lelement(1, globrowoff)%D(i,j) = zero
2478 lelement(1, globrowoff)%U(i,j) = zero
2480 lelement(1, globrowoff)%b(i,1) = one
2485 orig(globrowoff)%L = lelement(1,globrowoff)%L
2486 orig(globrowoff)%D = lelement(1,globrowoff)%D
2487 orig(globrowoff)%U = lelement(1,globrowoff)%U
2488 orig(globrowoff)%b = lelement(1,globrowoff)%b
2495 END SUBROUTINE setidentitytestcase
2503 SUBROUTINE setidentityrhs
2507 INTEGER :: i, j, globrow, globrowoff
2509 IF(kpdbg)
WRITE(ofu,*)
'------ Using Identity Test Case RHS ------';
CALL fl(ofu)
2514 DO globrow = startglobrow, endglobrow, 1
2515 globrowoff = globrow-startglobrow+1
2517 lelement(1, globrowoff)%b(i,1) = one
2522 orig(globrowoff)%b = lelement(1,globrowoff)%b
2528 END SUBROUTINE setidentityrhs
2535 SUBROUTINE setrandomtestcase
2539 REAL(dp) :: randval, rmin, rmax
2541 INTEGER,
ALLOCATABLE :: seeds(:)
2542 INTEGER :: i, j, globrow, globrowoff
2544 IF(kpdbg)
WRITE(ofu,*)
'------ Using Random Test Case ------';
CALL fl(ofu)
2549 CALL random_seed( size=rngwidth)
2550 ALLOCATE( seeds(rngwidth) )
2552 seeds(i) = i*(rank+100)*p
2554 CALL random_seed( put=seeds(1:rngwidth) )
2560 IF( writeproblemfile )
THEN
2561 IF(kpdbg)
WRITE(pfu,*) n*m;
CALL fl(pfu)
2562 IF(kpdbg)
WRITE(pfu,*) (3*n-2)*m*m;
CALL fl(pfu)
2570 DO globrow = startglobrow, endglobrow, 1
2571 globrowoff = globrow-startglobrow+1
2574 IF ( globrow .EQ. 1 )
THEN
2577 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2578 IF( writeproblemfile )
THEN
2579 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow-2)*m+j, randval
2582 lelement(1, globrowoff)%L(i,j) = randval
2584 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2585 lelement(1, globrowoff)%D(i,j) = randval
2586 IF( writeproblemfile )
THEN
2587 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow-1)*m+j, randval
2590 IF ( globrow .EQ. n )
THEN
2593 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2594 IF( writeproblemfile )
THEN
2595 IF(kpdbg)
WRITE(pfu,*) (globrow-1)*m+i, (globrow)*m+j, randval
2598 lelement(1, globrowoff)%U(i,j) = randval
2600 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2601 lelement(1, globrowoff)%b(i,1) = randval
2607 orig(globrowoff)%L = lelement(1,globrowoff)%L
2608 orig(globrowoff)%D = lelement(1,globrowoff)%D
2609 orig(globrowoff)%U = lelement(1,globrowoff)%U
2610 orig(globrowoff)%b = lelement(1,globrowoff)%b
2613 IF (writeproblemfile)
THEN
2614 DO globrow = startglobrow, endglobrow, 1
2615 globrowoff = globrow-startglobrow+1
2617 IF(kpdbg)
WRITE(pfu,*) lelement(1,globrowoff)%b(i,1);
CALL fl(pfu)
2626 END SUBROUTINE setrandomtestcase
2634 SUBROUTINE setrandomrhs( randseedoff )
2638 INTEGER,
INTENT(IN) :: randseedoff
2642 REAL(dp) :: randval, rmin, rmax
2644 INTEGER,
ALLOCATABLE :: seeds(:)
2645 INTEGER :: i, j, globrow, globrowoff
2647 IF(kpdbg)
WRITE(ofu,*)
'------ Using Random Test Case RHS ------';
CALL fl(ofu)
2652 CALL random_seed( size=rngwidth)
2653 ALLOCATE( seeds(rngwidth) )
2655 seeds(i) = i*(rank+100+34+randseedoff)*p
2657 CALL random_seed( put=seeds(1:rngwidth) )
2665 DO globrow = startglobrow, endglobrow, 1
2666 globrowoff = globrow-startglobrow+1
2668 CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2669 lelement(1, globrowoff)%b(i,1) = randval
2675 orig(globrowoff)%b = lelement(1,globrowoff)%b
2681 END SUBROUTINE setrandomrhs
2688 SUBROUTINE finalize_bst( do_mpifinalize )
2692 LOGICAL,
INTENT(IN) :: do_mpifinalize
2699 INTEGER :: tempinvcount, tempmulcount, tempsolcount
2702 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
2703 IF(kpdbg)
WRITE(ofu,*)
'------ Finalizing start ------';
CALL fl(ofu)
2711 IF (
ALLOCATED(lelement) )
THEN
2712 DO level = lbound(lelement,1), ubound(lelement,1)
2713 DO globrow = lbound(lelement,2), ubound(lelement,2)
2714 IF (
ALLOCATED(lelement(level,globrow)%L) )
THEN
2715 DEALLOCATE( lelement(level, globrow)%L )
2717 IF (
ALLOCATED(lelement(level,globrow)%D) )
THEN
2718 DEALLOCATE( lelement(level, globrow)%D )
2720 IF (
ALLOCATED(lelement(level,globrow)%U) )
THEN
2721 DEALLOCATE( lelement(level, globrow)%U )
2723 IF (
ALLOCATED(lelement(level,globrow)%b) )
THEN
2724 DEALLOCATE( lelement(level, globrow)%b )
2726 IF (
ALLOCATED(lelement(level,globrow)%pivot) )
THEN
2727 DEALLOCATE( lelement(level, globrow)%pivot )
2731 DEALLOCATE(lelement)
2734 IF (
ALLOCATED(orig) )
THEN
2735 DO globrow = lbound(orig,1), ubound(orig,1)
2736 IF (
ALLOCATED(orig(globrow)%L) )
THEN
2737 DEALLOCATE( orig(globrow)%L )
2739 IF (
ALLOCATED(orig(globrow)%D) )
THEN
2740 DEALLOCATE( orig(globrow)%D )
2742 IF (
ALLOCATED(orig(globrow)%U) )
THEN
2743 DEALLOCATE( orig(globrow)%U )
2745 IF (
ALLOCATED(orig(globrow)%b) )
THEN
2746 DEALLOCATE( orig(globrow)%b )
2748 IF (
ALLOCATED(orig(globrow)%pivot) )
THEN
2749 DEALLOCATE( orig(globrow)%pivot )
2755 IF (
ALLOCATED(selement) )
THEN
2756 DO globrow = 1, n, 1
2757 IF (
ALLOCATED(selement(globrow)%x) )
THEN
2758 DEALLOCATE( selement(globrow)%x )
2761 DEALLOCATE(selement)
2765 #if defined(MPI_OPT)
2766 IF ( usebarriers )
THEN
2767 IF(kpdbg)
WRITE(ofu,*)
'Barrier in finalize';
CALL fl(ofu)
2768 CALL mpi_barrier( ns_comm, mpierr )
2769 IF(kpdbg)
WRITE(ofu,*)
'Done barrier in finalize';
CALL fl(ofu)
2772 IF ( do_mpifinalize )
THEN
2773 CALL mpi_finalize( mpierr )
2777 bcyclic_comp_time = bcyclic_comp_time + (tottime-totcommtime)
2778 bcyclic_comm_time = bcyclic_comm_time + totcommtime
2780 tempmulcount=totmatmulcount;
IF ( tempmulcount .LE. 0 ) tempmulcount = 1
2781 tempinvcount=totinvcount;
IF ( tempinvcount .LE. 0 ) tempinvcount = 1
2782 tempsolcount=totmatsolcount;
IF ( tempsolcount .LE. 0 ) tempsolcount = 1
2783 IF(kpdbg)
WRITE(ofu,*)
'N=', n,
' M=', m,
' P=', p,
' rank=', rank
2784 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory ', membytes/1e6,
' MB'
2785 IF(kpdbg)
WRITE(ofu,
'(A,F8.4,A)')
'Computation ', tottime-totcommtime,
' sec'
2786 IF(kpdbg)
WRITE(ofu,
'(A,F8.4,A)')
'Communication ', totcommtime,
' sec'
2787 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix inv ',totinvcount,
' * ', &
2788 totinvtime/tempinvcount,
' sec = ', totinvtime,
' sec'
2789 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix mul ',totmatmulcount,
' * ', &
2790 totmatmultime/tempmulcount,
' sec = ', totmatmultime,
' sec'
2791 IF(kpdbg)
WRITE(ofu,
'(A,I5.1,A,F8.4,A,F8.4,A)')
'Matrix sol ',totmatsolcount,
' * ', &
2792 totmatsoltime/tempsolcount,
' sec = ', totmatsoltime,
' sec'
2793 IF(kpdbg)
WRITE(ofu,*)
'Finalized rank ', rank
2794 IF(kpdbg)
WRITE(ofu,*)
'------ Finalization end ------';
CALL fl(ofu)
2795 END SUBROUTINE finalize_bst
2802 LOGICAL FUNCTION iseven( num )
2803 INTEGER,
INTENT(IN) :: num
2804 iseven = mod(num,2) .EQ. 0
2812 LOGICAL FUNCTION isodd( num )
2813 INTEGER,
INTENT(IN) :: num
2814 isodd = (.NOT. iseven(num))
2822 FUNCTION lr2gr( locrow, level )
result ( globrow )
2826 INTEGER,
INTENT(IN) :: locrow
2827 INTEGER,
INTENT(IN) :: level
2839 levelloop:
DO i = level-1, 1, -1
2840 globrow = 2*globrow - 1
2846 IF ( ((2.0 ** (level-1)) * (locrow-1) + 1) .NE. globrow )
THEN
2860 FUNCTION gr2lr( globrow, level )
result ( locrow )
2864 INTEGER,
INTENT(IN) :: globrow
2865 INTEGER,
INTENT(IN) :: level
2878 levelloop:
DO i = 1, level-1, 1
2879 IF ( iseven(locrow) )
THEN
2883 locrow = (locrow+1) / 2
2894 FUNCTION gr2rank( globrow )
result ( outrank )
2898 INTEGER,
INTENT(IN) :: globrow
2907 IF ( (globrow .LT. 1) .OR. (globrow .GT. n) )
THEN
2912 IF ( globrow .LE. (m+1)*spill )
THEN
2913 outrank = (globrow-1)/(m+1)
2915 outrank = (globrow-1 - (m+1)*spill)/m + spill
2920 END FUNCTION gr2rank
2927 FUNCTION lr2rank( locrow, level )
result ( outrank )
2931 INTEGER,
INTENT(IN) :: locrow
2932 INTEGER,
INTENT(IN) :: level
2940 globrow = lr2gr( locrow, level )
2941 outrank = gr2rank( globrow )
2944 END FUNCTION lr2rank
2953 #if defined(MPI_OPT)
2954 SUBROUTINE computeforwardoddrowhats(locrow, level, startlocrow, endlocrow,bonly)
2958 INTEGER,
INTENT(IN) :: locrow
2959 INTEGER,
INTENT(IN) :: level
2960 INTEGER,
INTENT(IN) :: startlocrow
2961 INTEGER,
INTENT(IN) :: endlocrow
2962 LOGICAL,
INTENT(IN) :: bonly
2967 INTEGER :: globrowoff
2968 INTEGER :: prevglobrowoff
2969 INTEGER :: nextglobrowoff
2975 IF ( level .GE. nlevels )
THEN
2976 IF(kpdbg)
WRITE(ofu,*)
'mat-mul last lvl: no op ';
CALL fl(ofu)
2977 IF ( rank .NE. 0 )
THEN
2978 IF(kpdbg)
WRITE(ofu,*)
'mat-mul last lvl: impossible ',rank;
CALL fl(ofu)
2987 IF ( iseven( locrow ) )
THEN
2988 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: odd only ',globrow,
' ',locrow;
CALL fl(ofu); stop
2990 IF ( .NOT. ( (startlocrow .LE. locrow) .AND. (locrow .LE. endlocrow) ) )
THEN
2991 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: locrow prob ',globrow,
' ',locrow;
CALL fl(ofu);stop
2995 globrow = lr2gr( locrow, level )
2996 globrowoff = globrow - startglobrow + 1
2997 IF(kpdbg)
WRITE(ofu,*)
' Compute mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
3002 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D ) )
THEN
3003 IF(kpdbg)
WRITE(ofu,*)
' Copying bad D';
CALL fl(ofu); stop
3005 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L ) )
THEN
3006 IF(kpdbg)
WRITE(ofu,*)
' Copying bad L';
CALL fl(ofu); stop
3008 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U ) )
THEN
3009 IF(kpdbg)
WRITE(ofu,*)
' Copying bad U';
CALL fl(ofu); stop
3011 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b ) )
THEN
3012 IF(kpdbg)
WRITE(ofu,*)
' Copying bad b';
CALL fl(ofu); stop
3018 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%D ) )
THEN
3019 IF(kpdbg)
WRITE(ofu,*)
' Copying bad D +1';
CALL fl(ofu); stop
3021 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%L ) )
THEN
3022 IF(kpdbg)
WRITE(ofu,*)
' Copying bad L +1';
CALL fl(ofu); stop
3024 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%U ) )
THEN
3025 IF(kpdbg)
WRITE(ofu,*)
' Copying bad U +1';
CALL fl(ofu); stop
3027 IF ( .NOT.
ALLOCATED(lelement(level+1,globrowoff)%b ) )
THEN
3028 IF(kpdbg)
WRITE(ofu,*)
' Copying bad b +1';
CALL fl(ofu); stop
3034 IF ( .NOT. bonly )
THEN
3035 lelement(level+1,globrowoff)%D = lelement(level,globrowoff)%D
3037 lelement(level+1,globrowoff)%b = lelement(level,globrowoff)%b
3042 IF ( lr2rank(locrow-1,level) .LT. 0 )
THEN
3044 IF ( .NOT. bonly )
THEN
3045 lelement(level+1,globrowoff)%U = 0
3046 IF(kpdbg)
WRITE(ofu,*)
' ZERO L';
CALL fl(ofu)
3049 IF ( locrow .EQ. startlocrow )
THEN
3051 ELSE IF ( locrow .GT. startlocrow )
THEN
3052 prevglobrowoff = lr2gr( locrow-1, level ) - startglobrow + 1
3054 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: impossible prev';
CALL fl(ofu); stop
3057 IF ( .NOT. bonly )
THEN
3061 CALL bsystemclock(mattimer1)
3062 CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3063 lelement(level,prevglobrowoff)%U, &
3064 one, lelement(level+1,globrowoff)%D )
3065 CALL bsystemclock(mattimer2)
3066 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3067 IF(kpdbg)
WRITE(ofu,*)
' Dtop';
CALL fl(ofu)
3072 CALL bsystemclock(mattimer1)
3073 CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3074 lelement(level,prevglobrowoff)%L, &
3075 zero, lelement(level+1,globrowoff)%L )
3076 CALL bsystemclock(mattimer2)
3077 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3078 IF(kpdbg)
WRITE(ofu,*)
' L';
CALL fl(ofu)
3084 CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
3085 lelement(level,prevglobrowoff)%b(:,1), &
3086 one, lelement(level+1,globrowoff)%b(:,1) )
3087 IF(kpdbg)
WRITE(ofu,*)
' btop';
CALL fl(ofu)
3093 IF ( lr2rank(locrow+1,level) .LT. 0 )
THEN
3095 IF ( .NOT. bonly )
THEN
3096 lelement(level+1,globrowoff)%U = 0
3097 IF(kpdbg)
WRITE(ofu,*)
' ZERO U';
CALL fl(ofu)
3100 IF ( locrow .EQ. endlocrow )
THEN
3101 nextglobrowoff = endglobrow - startglobrow + 2
3102 ELSE IF ( locrow .LT. endlocrow )
THEN
3103 nextglobrowoff = lr2gr( locrow+1, level ) - startglobrow + 1
3105 IF(kpdbg)
WRITE(ofu,*)
'mat-mul: impossible next';
CALL fl(ofu); stop
3108 IF ( .NOT. bonly )
THEN
3112 CALL bsystemclock(mattimer1)
3113 CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3114 lelement(level,nextglobrowoff)%L, &
3115 one, lelement(level+1,globrowoff)%D )
3116 CALL bsystemclock(mattimer2)
3117 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3118 IF(kpdbg)
WRITE(ofu,*)
' Dbot';
CALL fl(ofu)
3123 CALL bsystemclock(mattimer1)
3124 CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3125 lelement(level,nextglobrowoff)%U, &
3126 zero, lelement(level+1,globrowoff)%U )
3127 CALL bsystemclock(mattimer2)
3128 CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3129 IF(kpdbg)
WRITE(ofu,*)
' U';
CALL fl(ofu)
3135 CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
3136 lelement(level,nextglobrowoff)%b(:,1), &
3137 one, lelement(level+1,globrowoff)%b(:,1) )
3138 IF(kpdbg)
WRITE(ofu,*)
' bbot';
CALL fl(ofu)
3140 END SUBROUTINE computeforwardoddrowhats
3147 SUBROUTINE forwardsolve_bst
3154 INTEGER :: globrowoff
3155 INTEGER :: startlocrow
3156 INTEGER :: endlocrow
3157 INTEGER :: rowoffset
3159 INTEGER :: nbrmpireqindx
3160 INTEGER :: nbrmpireqcnt
3161 INTEGER :: nbrmpinirecvs
3162 INTEGER :: nbrmpinisends
3163 INTEGER :: nbrmpireq(6)
3164 INTEGER :: nbrmpierr(6)
3165 INTEGER :: mpiwaiterr
3167 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
3168 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
3169 INTEGER :: waitmatchindex
3173 DOUBLE PRECISION :: fwton, fwtoff
3189 CALL bsystemclock(globtimer1)
3190 CALL bsystemclock(fwton)
3193 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3194 IF(kpdbg)
WRITE(ofu,*)
'------ Forward solve start ------';
CALL fl(ofu)
3195 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3201 forwardsolveloop:
DO level = 1, nlevels, 1
3204 CALL bsystemclock(skston)
3206 IF ( usebarriers )
THEN
3207 IF(kpdbg)
WRITE(ofu,*)
'Barrier at forward level ', level;
CALL fl(ofu)
3208 CALL mpi_barrier( ns_comm, mpierr )
3209 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at forward level ', level;
CALL fl(ofu)
3217 DO globrow = startglobrow, endglobrow, 1
3218 locrow = gr2lr( globrow, level )
3219 IF ( locrow .GT. 0 )
THEN
3220 IF ( locrow .LT. startlocrow )
THEN
3221 startlocrow = locrow
3223 IF ( locrow .GT. endlocrow )
THEN
3232 IF ( startlocrow .GT. endlocrow )
THEN
3233 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at forward level ', level;
CALL fl(ofu)
3234 CALL plbforwardinitializelevel( level, .false. )
3235 cycle forwardsolveloop
3237 CALL plbforwardinitializelevel( level, .true. )
3239 CALL bsystemclock(skstoff)
3240 t(1) = t(1) + skstoff-skston
3245 IF(kpdbg)
WRITE(ofu,*)
'**** Forward level ', level,
' ****';
CALL fl(ofu)
3250 IF ( isodd(startlocrow) .AND. &
3251 (lr2rank(startlocrow-1,level) .GE. 0) )
THEN
3253 IF ( .NOT.
ALLOCATED( lelement(level, globrowoff)%L ) )
THEN
3254 ALLOCATE( lelement(level, globrowoff)%L(m,1) )
3255 ALLOCATE( lelement(level, globrowoff)%U(m,1) )
3256 ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3258 lelement(level, globrowoff)%L = 0
3259 lelement(level, globrowoff)%U = 0
3260 lelement(level, globrowoff)%b = 0
3262 IF ( isodd(endlocrow) .AND. &
3263 (lr2rank(endlocrow+1,level) .GE. 0) )
THEN
3264 globrowoff = endglobrow-startglobrow+2
3265 IF ( .NOT.
ALLOCATED( lelement(level, globrowoff)%L ) )
THEN
3266 ALLOCATE( lelement(level, globrowoff)%L(m,1) )
3267 ALLOCATE( lelement(level, globrowoff)%U(m,1) )
3268 ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3270 lelement(level, globrowoff)%L = 0
3271 lelement(level, globrowoff)%U = 0
3272 lelement(level, globrowoff)%b = 0
3275 CALL bsystemclock(skstoff)
3276 t(2) = t(2) + skstoff-skston
3282 IF ( level .LT. nlevels )
THEN
3283 DO locrow = startlocrow, endlocrow, 1
3284 IF ( isodd(locrow) )
THEN
3285 globrow = lr2gr( locrow, level )
3286 globrowoff = globrow-startglobrow+1
3287 IF ( .NOT.
ALLOCATED( lelement(level+1, globrowoff)%L ) )
THEN
3288 ALLOCATE( lelement(level+1, globrowoff)%L(m,1) )
3289 ALLOCATE( lelement(level+1, globrowoff)%D(m,1) )
3290 ALLOCATE( lelement(level+1, globrowoff)%U(m,1) )
3291 ALLOCATE( lelement(level+1, globrowoff)%b(m,1) )
3293 lelement(level+1, globrowoff)%L = 0
3294 lelement(level+1, globrowoff)%D = 0
3295 lelement(level+1, globrowoff)%U = 0
3296 lelement(level+1, globrowoff)%b = 0
3301 CALL bsystemclock(skstoff)
3302 t(3) = t(3) + skstoff-skston
3308 DO nbrmpireqindx = 1, 6
3309 nbrmpireq(nbrmpireqindx) = mpi_request_null
3318 nbrrank = lr2rank(startlocrow-1,level)
3319 IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
3325 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',nbrrank,
' ';
CALL fl(ofu)
3327 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3328 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad L';
CALL fl(ofu); stop
3330 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3331 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad U';
CALL fl(ofu); stop
3333 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3334 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad b';
CALL fl(ofu); stop
3337 CALL bsystemclock(loctimer1)
3338 CALL mpi_irecv( lelement(level, globrowoff)%L, m, mpi_real8, &
3339 nbrrank, 1, ns_comm, nbrmpireq(nbrmpireqindx), &
3340 nbrmpierr(nbrmpireqindx) )
3341 nbrmpireqindx = nbrmpireqindx + 1
3342 nbrmpinirecvs = nbrmpinirecvs + 1
3343 IF(kpdbg)
WRITE(ofu,*)
' Irecv 1 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3345 CALL mpi_irecv( lelement(level, globrowoff)%U, m, mpi_real8, &
3346 nbrrank, 2, ns_comm, nbrmpireq(nbrmpireqindx), &
3347 nbrmpierr(nbrmpireqindx))
3348 nbrmpireqindx = nbrmpireqindx + 1
3349 nbrmpinirecvs = nbrmpinirecvs + 1
3350 IF(kpdbg)
WRITE(ofu,*)
' Irecv 2 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3352 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3353 nbrrank, 3, ns_comm, nbrmpireq(nbrmpireqindx), &
3354 nbrmpierr(nbrmpireqindx))
3355 nbrmpireqindx = nbrmpireqindx + 1
3356 nbrmpinirecvs = nbrmpinirecvs + 1
3357 IF(kpdbg)
WRITE(ofu,*)
' Irecv 3 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3358 CALL bsystemclock(loctimer2)
3359 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3362 CALL bsystemclock(skstoff)
3363 t(4) = t(4) + skstoff-skston
3366 nbrrank = lr2rank(endlocrow+1,level)
3367 IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
3372 globrowoff = endglobrow-startglobrow+2
3373 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ', nbrrank;
CALL fl(ofu)
3375 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3376 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad L';
CALL fl(ofu); stop
3378 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3379 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad U';
CALL fl(ofu); stop
3381 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3382 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad b';
CALL fl(ofu); stop
3385 CALL bsystemclock(loctimer1)
3386 CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3387 nbrrank, 4, ns_comm, nbrmpireq(nbrmpireqindx), &
3388 nbrmpierr(nbrmpireqindx))
3389 nbrmpireqindx = nbrmpireqindx + 1
3390 nbrmpinirecvs = nbrmpinirecvs + 1
3391 IF(kpdbg)
WRITE(ofu,*)
' Irecv 4 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3393 CALL mpi_irecv( lelement(level, globrowoff)%U, m, mpi_real8, &
3394 nbrrank, 5, ns_comm, nbrmpireq(nbrmpireqindx), &
3395 nbrmpierr(nbrmpireqindx))
3396 nbrmpireqindx = nbrmpireqindx + 1
3397 nbrmpinirecvs = nbrmpinirecvs + 1
3398 IF(kpdbg)
WRITE(ofu,*)
' Irecv 5 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3400 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3401 nbrrank, 6, ns_comm, nbrmpireq(nbrmpireqindx), &
3402 nbrmpierr(nbrmpireqindx))
3403 nbrmpireqindx = nbrmpireqindx + 1
3404 nbrmpinirecvs = nbrmpinirecvs + 1
3405 IF(kpdbg)
WRITE(ofu,*)
' Irecv 6 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3406 CALL bsystemclock(loctimer2)
3407 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3410 CALL bsystemclock(skstoff)
3411 t(5) = t(5) + skstoff-skston
3418 DO locrow = startlocrow, endlocrow, 1
3419 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
3420 globrow = lr2gr( locrow, level )
3421 globrowoff = globrow-startglobrow+1
3423 IF(kpdbg)
WRITE(ofu,*)
' Compute even hats ',globrow,
' ', locrow;
CALL fl(ofu)
3428 IF ( level .EQ. nlevels )
THEN
3429 IF ( (rank .NE. 0) .OR. &
3430 (startlocrow .NE. endlocrow) .OR. &
3431 (locrow .NE. 1) .OR. (globrow .NE. 1) )
THEN
3432 IF(kpdbg)
WRITE(ofu,*)
' EVEN ERROR ',globrow,
' ', locrow;
CALL fl(ofu)
3436 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D) )
THEN
3437 IF(kpdbg)
WRITE(ofu,*)
'Chats bad D';
CALL fl(ofu); stop
3439 IF ( .NOT.
ALLOCATED(lelement(1,globrowoff)%pivot) )
THEN
3440 IF(kpdbg)
WRITE(ofu,*)
'Chats bad pivot';
CALL fl(ofu); stop
3442 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3443 IF(kpdbg)
WRITE(ofu,*)
'Chats bad L';
CALL fl(ofu); stop
3445 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3446 IF(kpdbg)
WRITE(ofu,*)
'Chats bad U';
CALL fl(ofu); stop
3448 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3449 IF(kpdbg)
WRITE(ofu,*)
'Chats bad b';
CALL fl(ofu); stop
3455 CALL bsystemclock(mattimer1)
3456 CALL plbdgetrf( lelement(level,globrowoff)%D, &
3457 lelement(1,globrowoff)%pivot, blaserr )
3458 CALL bsystemclock(mattimer2)
3459 dgetrf_time = dgetrf_time + (mattimer2-mattimer1)
3460 CALL chargetime( totinvtime, mattimer2, mattimer1, totinvcount )
3461 IF (blaserr .NE. 0)
THEN
3462 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRF failed ', blaserr;
CALL fl(ofu); stop
3468 IF ( level .LT. nlevels )
THEN
3469 CALL bsystemclock(mattimer1)
3471 lelement(level,globrowoff)%L(i,1)=lelement(level,globrowoff)%L(i,1)/&
3472 lelement(level,globrowoff)%D(i,1)
3475 CALL bsystemclock(mattimer2)
3476 dgetrs_time = dgetrs_time + (mattimer2-mattimer1)
3477 CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3478 IF (blaserr .NE. 0)
THEN
3479 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRS L failed';
CALL fl(ofu); stop
3481 CALL bsystemclock(mattimer1)
3483 lelement(level,globrowoff)%U(i,1)=lelement(level,globrowoff)%U(i,1)/&
3484 lelement(level,globrowoff)%D(i,1)
3487 CALL bsystemclock(mattimer2)
3488 dgetrs_time = dgetrs_time + (mattimer2-mattimer1)
3489 CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3490 IF (blaserr .NE. 0)
THEN
3491 IF(kpdbg)
WRITE(ofu,*)
' PLBDGETRS U failed';
CALL fl(ofu); stop
3498 CALL bsystemclock(mattimer1)
3500 lelement(level,globrowoff)%b(i,1)=lelement(level,globrowoff)%b(i,1)/&
3501 lelement(level,globrowoff)%D(i,1)
3504 IF (blaserr .NE. 0)
THEN
3505 IF(kpdbg)
WRITE(ofu,*)
'PLBDGETRS b failed';
CALL fl(ofu); stop
3507 CALL bsystemclock(mattimer2)
3508 dgetrs_time = dgetrs_time + (mattimer2-mattimer1)
3514 DO rowoffset = -1, 1, 2
3515 nbrrank = lr2rank(locrow+rowoffset, level)
3516 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
3517 CALL bsystemclock(loctimer1)
3518 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3519 IF(kpdbg)
WRITE(ofu,*)
'ISend bad L';
CALL fl(ofu); stop
3521 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3522 IF(kpdbg)
WRITE(ofu,*)
'ISend bad U';
CALL fl(ofu); stop
3524 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3525 IF(kpdbg)
WRITE(ofu,*)
'ISend bad b';
CALL fl(ofu); stop
3528 IF(kpdbg)
WRITE(ofu,*)
' ISend ',globrow,
' ',locrow,
' ',nbrrank;
CALL fl(ofu)
3529 globrowoff = globrow-startglobrow+1
3530 stag = (((1-rowoffset))/2)*3
3531 CALL mpi_isend( lelement(level,globrowoff)%L, m, mpi_real8, &
3532 nbrrank, stag+1, ns_comm, &
3533 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3534 nbrmpireqindx = nbrmpireqindx + 1
3535 nbrmpinisends = nbrmpinisends + 1
3536 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+1,
' ',globrowoff;
CALL fl(ofu)
3538 CALL mpi_isend( lelement(level,globrowoff)%U, m, mpi_real8, &
3539 nbrrank, stag+2, ns_comm, &
3540 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3541 nbrmpireqindx = nbrmpireqindx + 1
3542 nbrmpinisends = nbrmpinisends + 1
3543 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+2,
' ',globrowoff;
CALL fl(ofu)
3545 CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
3546 nbrrank, stag+3, ns_comm, &
3547 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3548 nbrmpireqindx = nbrmpireqindx + 1
3549 nbrmpinisends = nbrmpinisends + 1
3550 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+3,
' ',globrowoff;
CALL fl(ofu)
3551 CALL bsystemclock(loctimer2)
3552 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3558 CALL bsystemclock(skstoff)
3559 t(6) = t(6) + skstoff-skston
3565 DO locrow = startlocrow, endlocrow, 1
3566 globrow = lr2gr( locrow, level )
3567 IF ( isodd( locrow ) .AND. (level .NE. nlevels) )
THEN
3568 IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) )
THEN
3569 IF(kpdbg)
WRITE(ofu,*)
' Precomp mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
3570 CALL computeforwardoddrowhats( locrow, level, &
3571 startlocrow, endlocrow, .false. )
3576 CALL bsystemclock(skstoff)
3577 t(7) = t(7) + skstoff-skston
3585 nbrmpireqcnt = nbrmpireqindx - 1
3586 IF ( nbrmpireqcnt .GT. 0 )
THEN
3587 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
3588 CALL bsystemclock(loctimer1)
3589 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
3590 CALL bsystemclock(loctimer2)
3591 waitall_time=waitall_time+(loctimer2-loctimer1)
3592 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3595 CALL bsystemclock(skstoff)
3596 t(8) = t(8) + skstoff-skston
3602 IF ( isodd(startlocrow) )
THEN
3603 globrow = lr2gr( startlocrow, level )
3604 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',startlocrow;
CALL fl(ofu)
3605 CALL computeforwardoddrowhats( startlocrow, level, &
3606 startlocrow, endlocrow, .false. )
3608 IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) )
THEN
3609 globrow = lr2gr( endlocrow, level )
3610 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',endlocrow;
CALL fl(ofu)
3611 CALL computeforwardoddrowhats( endlocrow, level, &
3612 startlocrow, endlocrow, .false. )
3615 CALL bsystemclock(skstoff)
3616 t(9) = t(9) + skstoff-skston
3620 CALL bsystemclock(skstoff)
3621 t(10) = t(10) + skstoff-skston
3623 END DO forwardsolveloop
3626 matdirtied = .false.
3627 rhsdirtied = .false.
3630 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
3631 IF(kpdbg)
WRITE(ofu,*)
'------ Forward solve end ------';
CALL fl(ofu)
3633 CALL bsystemclock(globtimer2)
3634 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
3637 CALL bsystemclock(fwtoff)
3638 t(11) = t(11) + fwtoff-skston
3639 forwardsolveloop_time = forwardsolveloop_time + (fwtoff-fwton)
3641 END SUBROUTINE forwardsolve_bst
3649 SUBROUTINE forwardupdateb
3656 INTEGER :: globrowoff
3657 INTEGER :: startlocrow
3658 INTEGER :: endlocrow
3659 INTEGER :: rowoffset
3661 INTEGER :: nbrmpireqindx
3662 INTEGER :: nbrmpireqcnt
3663 INTEGER :: nbrmpinirecvs
3664 INTEGER :: nbrmpinisends
3665 INTEGER :: nbrmpireq(6)
3666 INTEGER :: nbrmpierr(6)
3667 INTEGER :: mpiwaiterr
3669 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
3670 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
3671 INTEGER :: waitmatchindex
3678 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3679 IF(kpdbg)
WRITE(ofu,*)
'------ Forward updateb start ------';
CALL fl(ofu)
3680 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3683 IF ( matdirtied )
THEN
3684 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
3685 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied matrix ------';
CALL fl(ofu)
3686 IF(kpdbg)
WRITE(ofu,*)
' ------ Forward solve, not updateb... ------';
CALL fl(ofu)
3687 CALL forwardsolve_bst()
3691 CALL bsystemclock(globtimer1)
3697 forwardsolveloop:
DO level = 1, nlevels, 1
3700 IF ( usebarriers )
THEN
3701 IF(kpdbg)
WRITE(ofu,*)
'Barrier at forward updateb level ', level;
CALL fl(ofu)
3702 CALL mpi_barrier( ns_comm, mpierr )
3703 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at forward updateb level ', level;
CALL fl(ofu)
3711 DO globrow = startglobrow, endglobrow, 1
3712 locrow = gr2lr( globrow, level )
3713 IF ( locrow .GT. 0 )
THEN
3714 IF ( locrow .LT. startlocrow )
THEN
3715 startlocrow = locrow
3717 IF ( locrow .GT. endlocrow )
THEN
3726 IF ( startlocrow .GT. endlocrow )
THEN
3727 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at forward updateb level ',level;
CALL fl(ofu)
3728 CALL plbforwardinitializelevel( level, .false. )
3729 cycle forwardsolveloop
3731 CALL plbforwardinitializelevel( level, .true. )
3734 IF(kpdbg)
WRITE(ofu,*)
'**** Forward updateb level ', level,
' ****';
CALL fl(ofu)
3740 IF ( isodd(startlocrow) .AND. &
3741 (lr2rank(startlocrow-1,level) .GE. 0) )
THEN
3743 IF( .NOT. (
ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
3744 ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
3745 ALLOCATED( lelement(level, globrowoff)%b ) ) )
THEN
3746 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +0 memory error';
CALL fl(ofu); stop
3748 lelement(level, globrowoff)%b = 0
3750 IF ( isodd(endlocrow) .AND. &
3751 (lr2rank(endlocrow+1,level) .GE. 0) )
THEN
3752 globrowoff = endglobrow-startglobrow+2
3753 IF ( .NOT. (
ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
3754 ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
3755 ALLOCATED( lelement(level, globrowoff)%b ) ) )
THEN
3756 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +2 memory error';
CALL fl(ofu); stop
3758 lelement(level, globrowoff)%b = 0
3765 IF ( level .LT. nlevels )
THEN
3766 DO locrow = startlocrow, endlocrow, 1
3767 IF ( isodd(locrow) )
THEN
3768 globrow = lr2gr( locrow, level )
3769 globrowoff = globrow-startglobrow+1
3770 IF ( .NOT. (
ALLOCATED( lelement(level+1, globrowoff)%L ) .AND. &
3771 ALLOCATED( lelement(level+1, globrowoff)%D ) .AND. &
3772 ALLOCATED( lelement(level+1, globrowoff)%U ) .AND. &
3773 ALLOCATED( lelement(level+1, globrowoff)%b ) ) )
THEN
3774 IF(kpdbg)
WRITE(ofu,*)
'ForwardUpdateb +1 memory error';
CALL fl(ofu); stop
3776 lelement(level+1, globrowoff)%b = 0
3784 DO nbrmpireqindx = 1, 6
3785 nbrmpireq(nbrmpireqindx) = mpi_request_null
3794 nbrrank = lr2rank(startlocrow-1,level)
3795 IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
3801 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',nbrrank,
' ';
CALL fl(ofu)
3803 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3804 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad L';
CALL fl(ofu); stop
3806 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3807 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad U';
CALL fl(ofu); stop
3809 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3810 IF(kpdbg)
WRITE(ofu,*)
' Irecv bad b';
CALL fl(ofu); stop
3813 CALL bsystemclock(loctimer1)
3814 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3815 nbrrank, 3, ns_comm, nbrmpireq(nbrmpireqindx), &
3816 nbrmpierr(nbrmpireqindx))
3817 nbrmpireqindx = nbrmpireqindx + 1
3818 nbrmpinirecvs = nbrmpinirecvs + 1
3819 IF(kpdbg)
WRITE(ofu,*)
' Irecv 3 top ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3820 CALL bsystemclock(loctimer2)
3821 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3824 nbrrank = lr2rank(endlocrow+1,level)
3825 IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
3830 globrowoff = endglobrow-startglobrow+2
3831 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ', nbrrank;
CALL fl(ofu)
3833 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3834 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad L';
CALL fl(ofu); stop
3836 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3837 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad U';
CALL fl(ofu); stop
3839 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3840 IF(kpdbg)
WRITE(ofu,*)
'Irecv bad b';
CALL fl(ofu); stop
3843 CALL bsystemclock(loctimer1)
3844 CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3845 nbrrank, 6, ns_comm, nbrmpireq(nbrmpireqindx), &
3846 nbrmpierr(nbrmpireqindx))
3847 nbrmpireqindx = nbrmpireqindx + 1
3848 nbrmpinirecvs = nbrmpinirecvs + 1
3849 IF(kpdbg)
WRITE(ofu,*)
' Irecv 6 bot ',nbrrank,
' ',nbrmpireqindx-1;
CALL fl(ofu)
3850 CALL bsystemclock(loctimer2)
3851 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3858 DO locrow = startlocrow, endlocrow, 1
3859 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
3860 globrow = lr2gr( locrow, level )
3861 globrowoff = globrow-startglobrow+1
3863 IF(kpdbg)
WRITE(ofu,*)
' Computed even hats ',globrow,
' ', locrow;
CALL fl(ofu)
3868 IF ( level .EQ. nlevels )
THEN
3869 IF ( (rank .NE. 0) .OR. &
3870 (startlocrow .NE. endlocrow) .OR. &
3871 (locrow .NE. 1) .OR. (globrow .NE. 1) )
THEN
3872 IF(kpdbg)
WRITE(ofu,*)
' EVEN ERROR ',globrow,
' ', locrow;
CALL fl(ofu)
3876 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%D) )
THEN
3877 IF(kpdbg)
WRITE(ofu,*)
'Chats bad D';
CALL fl(ofu); stop
3879 IF ( .NOT.
ALLOCATED(lelement(1,globrowoff)%pivot) )
THEN
3880 IF(kpdbg)
WRITE(ofu,*)
'Chats bad pivot';
CALL fl(ofu); stop
3882 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3883 IF(kpdbg)
WRITE(ofu,*)
'Chats bad L';
CALL fl(ofu); stop
3885 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3886 IF(kpdbg)
WRITE(ofu,*)
'Chats bad U';
CALL fl(ofu); stop
3888 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3889 IF(kpdbg)
WRITE(ofu,*)
'Chats bad b';
CALL fl(ofu); stop
3899 CALL bsystemclock(mattimer1)
3901 lelement(level,globrowoff)%b(i,1)=lelement(level,globrowoff)%b(i,1)/&
3902 lelement(level,globrowoff)%D(i,1)
3905 IF (blaserr .NE. 0)
THEN
3906 IF(kpdbg)
WRITE(ofu,*)
'PLBDGETRS b failed';
CALL fl(ofu); stop
3908 CALL bsystemclock(mattimer2)
3909 dgetrs_time = dgetrs_time + (mattimer2-mattimer1)
3915 DO rowoffset = -1, 1, 2
3916 nbrrank = lr2rank(locrow+rowoffset, level)
3917 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
3918 CALL bsystemclock(loctimer1)
3919 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%L) )
THEN
3920 IF(kpdbg)
WRITE(ofu,*)
'ISend bad L';
CALL fl(ofu); stop
3922 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%U) )
THEN
3923 IF(kpdbg)
WRITE(ofu,*)
'ISend bad U';
CALL fl(ofu); stop
3925 IF ( .NOT.
ALLOCATED(lelement(level,globrowoff)%b) )
THEN
3926 IF(kpdbg)
WRITE(ofu,*)
'ISend bad b';
CALL fl(ofu); stop
3929 IF(kpdbg)
WRITE(ofu,*)
' ISend ',globrow,
' ',locrow,
' ',nbrrank;
CALL fl(ofu)
3930 globrowoff = globrow-startglobrow+1
3931 stag = (((1-rowoffset))/2)*3
3932 CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
3933 nbrrank, stag+3, ns_comm, &
3934 nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3935 nbrmpireqindx = nbrmpireqindx + 1
3936 nbrmpinisends = nbrmpinisends + 1
3937 IF(kpdbg)
WRITE(ofu,*)
' Isend ',stag+3,
' ',globrowoff;
CALL fl(ofu)
3938 CALL bsystemclock(loctimer2)
3939 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3948 DO locrow = startlocrow, endlocrow, 1
3949 globrow = lr2gr( locrow, level )
3950 IF ( isodd( locrow ) .AND. (level .NE. nlevels) )
THEN
3951 IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) )
THEN
3952 IF(kpdbg)
WRITE(ofu,*)
' Precomp mat-mul ',globrow,
' ',locrow;
CALL fl(ofu)
3953 CALL computeforwardoddrowhats( locrow, level, &
3954 startlocrow, endlocrow, .true. )
3964 nbrmpireqcnt = nbrmpireqindx - 1
3965 IF ( nbrmpireqcnt .GT. 0 )
THEN
3966 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
3967 CALL bsystemclock(loctimer1)
3968 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
3969 CALL bsystemclock(loctimer2)
3970 waitall_time=waitall_time+(loctimer2-loctimer1)
3971 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3977 IF ( isodd(startlocrow) )
THEN
3978 globrow = lr2gr( startlocrow, level )
3979 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',startlocrow;
CALL fl(ofu)
3980 CALL computeforwardoddrowhats( startlocrow, level, &
3981 startlocrow, endlocrow, .true. )
3983 IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) )
THEN
3984 globrow = lr2gr( endlocrow, level )
3985 IF(kpdbg)
WRITE(ofu,*)
' Postcomp mat-mul ',globrow,
' ',endlocrow;
CALL fl(ofu)
3986 CALL computeforwardoddrowhats( endlocrow, level, &
3987 startlocrow, endlocrow, .true. )
3989 END DO forwardsolveloop
3992 rhsdirtied = .false.
3995 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
3996 IF(kpdbg)
WRITE(ofu,*)
'------ updateb solve end ------';
CALL fl(ofu)
3998 CALL bsystemclock(globtimer2)
3999 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4001 END SUBROUTINE forwardupdateb
4008 SUBROUTINE backwardsolve_bst
4015 INTEGER :: globrowoff
4016 INTEGER :: prevglobrow
4017 INTEGER :: nextglobrow
4018 INTEGER :: startlocrow
4019 INTEGER :: endlocrow
4020 INTEGER :: rowoffset
4022 INTEGER :: nbrmpireqindx
4023 INTEGER :: nbrmpireqcnt
4024 INTEGER :: nbrmpinirecvs
4025 INTEGER :: nbrmpinisends
4026 INTEGER :: nbrmpireq(2)
4027 INTEGER :: nbrmpierr(2)
4028 INTEGER :: mpiwaiterr
4030 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4031 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4032 INTEGER :: waitmatchindex
4037 CALL bsystemclock(globtimer1)
4040 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4041 IF(kpdbg)
WRITE(ofu,*)
'------ Backward solve start ------';
CALL fl(ofu)
4044 IF ( matdirtied )
THEN
4045 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4046 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied matrix; updating... ------';
CALL fl(ofu)
4047 CALL forwardsolve_bst()
4049 IF ( rhsdirtied )
THEN
4050 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4051 IF(kpdbg)
WRITE(ofu,*)
' ------ Dirtied RHS; updating... ------';
CALL fl(ofu)
4052 CALL forwardupdateb()
4058 IF ( (.NOT.
ALLOCATED(lelement)) .OR. (.NOT.
ALLOCATED(selement)) )
THEN
4059 IF(kpdbg)
WRITE(ofu,*)
'Forward not called before backward?';
CALL fl(ofu)
4067 backwardsolveloop:
DO level = nlevels, 1, -1
4069 IF ( usebarriers )
THEN
4070 IF(kpdbg)
WRITE(ofu,*)
'Barrier at backward level ', level;
CALL fl(ofu)
4071 CALL mpi_barrier( ns_comm, mpierr )
4072 IF(kpdbg)
WRITE(ofu,*)
'Done barrier at backward level ', level;
CALL fl(ofu)
4080 DO globrow = startglobrow, endglobrow, 1
4081 locrow = gr2lr( globrow, level )
4082 IF ( locrow .GT. 0 )
THEN
4083 IF ( locrow .LT. startlocrow )
THEN
4084 startlocrow = locrow
4086 IF ( locrow .GT. endlocrow )
THEN
4095 IF ( startlocrow .GT. endlocrow )
THEN
4096 IF(kpdbg)
WRITE(ofu,*)
'Nothing to do at backward level', level;
CALL fl(ofu)
4097 CALL plbbackwardinitializelevel( level, .false. )
4098 cycle backwardsolveloop
4100 CALL plbbackwardinitializelevel( level, .true. )
4103 IF(kpdbg)
WRITE(ofu,*)
'**** Backward level ', level,
' ****';
CALL fl(ofu)
4108 DO nbrmpireqindx = 1, 2
4109 nbrmpireq(nbrmpireqindx) = mpi_request_null
4118 nbrrank = lr2rank(startlocrow-1,level)
4119 IF( iseven(startlocrow) .AND. (nbrrank .GE. 0) )
THEN
4125 globrow = lr2gr(startlocrow-1,level)
4126 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',startlocrow-1,
' ',globrow,
' ',nbrrank;
CALL fl(ofu)
4127 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4128 ALLOCATE( selement(globrow)%x(m) )
4129 CALL chargememory( m*dpsz )
4131 CALL bsystemclock(loctimer1)
4132 CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4133 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4134 nbrmpireqindx = nbrmpireqindx + 1
4135 nbrmpinirecvs = nbrmpinirecvs + 1
4136 CALL bsystemclock(loctimer2)
4137 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4139 nbrrank = lr2rank(endlocrow+1,level)
4140 IF ( iseven(endlocrow) .AND. (nbrrank .GE. 0) )
THEN
4146 globrow = lr2gr(endlocrow+1,level)
4147 IF(kpdbg)
WRITE(ofu,*)
' Irecv ',endlocrow+1,
' ',globrow,
' ',nbrrank;
CALL fl(ofu)
4148 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4149 ALLOCATE( selement(globrow)%x(m) )
4150 CALL chargememory( m*dpsz )
4152 CALL bsystemclock(loctimer1)
4153 CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4154 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4155 nbrmpireqindx = nbrmpireqindx + 1
4156 nbrmpinirecvs = nbrmpinirecvs + 1
4157 CALL bsystemclock(loctimer2)
4158 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4165 DO locrow = startlocrow, endlocrow, 1
4166 IF ( isodd( locrow ) )
THEN
4171 DO rowoffset = -1, 1, 2
4172 nbrrank = lr2rank(locrow+rowoffset, level)
4173 IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) )
THEN
4174 IF(kpdbg)
WRITE(ofu,*)
' Isend ', locrow,
' ', nbrrank;
CALL fl(ofu)
4175 globrow = lr2gr(locrow,level)
4176 IF ( .NOT.
ALLOCATED(selement(globrow)%x) )
THEN
4177 IF(kpdbg)
WRITE(ofu,*)
'ERROR BISEND AT LEVEL', level;
CALL fl(ofu)
4178 IF(kpdbg)
WRITE(ofu,*) locrow,
' ', globrow,
' ', nbrrank;
CALL fl(ofu)
4181 stag = (1-rowoffset)/2+1
4182 CALL bsystemclock(loctimer1)
4183 CALL mpi_isend( selement(globrow)%x, m, mpi_real8, &
4184 nbrrank, stag, ns_comm, nbrmpireq(nbrmpireqindx), &
4185 nbrmpierr(nbrmpireqindx) )
4186 nbrmpireqindx = nbrmpireqindx + 1
4187 nbrmpinisends = nbrmpinisends + 1
4188 CALL bsystemclock(loctimer2)
4189 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4198 nbrmpireqcnt = nbrmpireqindx - 1
4199 IF ( nbrmpireqcnt .GT. 0 )
THEN
4200 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
4201 CALL bsystemclock(loctimer1)
4202 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4203 CALL bsystemclock(loctimer2)
4204 waitall_time=waitall_time+(loctimer2-loctimer1)
4205 CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4211 DO locrow = startlocrow, endlocrow, 1
4212 IF ( iseven( locrow ) .OR. (level .EQ. nlevels) )
THEN
4213 globrow = lr2gr( locrow, level )
4214 globrowoff = globrow - startglobrow + 1
4215 IF ( ( level .EQ. nlevels ) .AND. (locrow .NE. 1) )
THEN
4216 IF(kpdbg)
WRITE(ofu,*)
'ERROR ',level,
' ',globrow,
' ', locrow;
CALL fl(ofu)
4220 IF(kpdbg)
WRITE(ofu,*)
' Compute solution ', globrow,
' ', locrow;
CALL fl(ofu)
4222 IF ( .NOT.
ALLOCATED( selement(globrow)%x ) )
THEN
4223 ALLOCATE( selement(globrow)%x(m) )
4224 CALL chargememory( m*dpsz )
4230 selement(globrow)%x = lelement(level,globrowoff)%b(:,1)
4235 nbrrank = lr2rank(locrow-1,level)
4236 IF ( nbrrank .GE. 0 )
THEN
4237 prevglobrow = lr2gr(locrow-1,level)
4238 CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
4239 selement(prevglobrow)%x, &
4240 one, selement(globrow)%x )
4246 nbrrank = lr2rank(locrow+1,level)
4247 IF ( nbrrank .GE. 0 )
THEN
4248 nextglobrow = lr2gr(locrow+1,level)
4249 CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
4250 selement(nextglobrow)%x, &
4251 one, selement(globrow)%x )
4258 IF(kpdbg)
WRITE(ofu,*)
' x[',globrow,
']=',selement(globrow)%x;
CALL fl(ofu)
4262 END DO backwardsolveloop
4265 IF(kpdbg)
WRITE(ofu,
'(A,F6.1,A)')
'Memory so far: ', membytes/1e6,
' MB';
CALL fl(ofu)
4266 IF(kpdbg)
WRITE(ofu,*)
'------ Backward solve end ------';
CALL fl(ofu)
4268 CALL bsystemclock(globtimer2)
4269 CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4271 END SUBROUTINE backwardsolve_bst
4278 SUBROUTINE verifysolution
4282 INTEGER :: i, k, globrow, globrowoff
4283 REAL(dp) :: term, totrmserr
4285 INTEGER :: nbrmpireqindx
4286 INTEGER :: nbrmpireqcnt
4287 INTEGER :: nbrmpinirecvs
4288 INTEGER :: nbrmpinisends
4289 INTEGER :: nbrmpireq(2)
4290 INTEGER :: nbrmpierr(2)
4291 INTEGER :: mpiwaiterr
4293 INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4294 INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4295 INTEGER :: waitmatchindex
4298 IF(kpdbg)
WRITE(ofu,*);
CALL fl(ofu)
4299 IF(kpdbg)
WRITE(ofu,*)
'------ Verifying solution ------';
CALL fl(ofu)
4307 nbrrank = gr2rank(startglobrow-1)
4308 IF ( isodd(startglobrow) .AND. (nbrrank .GE. 0) )
THEN
4309 IF ( .NOT.
ALLOCATED( selement(startglobrow-1)%x ) )
THEN
4310 ALLOCATE( selement(startglobrow-1)%x(m) )
4312 CALL mpi_irecv( selement(startglobrow-1)%x, m, mpi_real8, nbrrank, 1, &
4313 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4314 nbrmpireqindx = nbrmpireqindx + 1
4315 nbrmpinirecvs = nbrmpinirecvs + 1
4317 nbrrank = gr2rank(endglobrow+1)
4318 IF ( isodd(endglobrow) .AND. (nbrrank .GE. 0) )
THEN
4319 IF ( .NOT.
ALLOCATED( selement(endglobrow+1)%x ) )
THEN
4320 ALLOCATE( selement(endglobrow+1)%x(m) )
4322 CALL mpi_irecv( selement(endglobrow+1)%x, m, mpi_real8, nbrrank, 2, &
4323 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4324 nbrmpireqindx = nbrmpireqindx + 1
4325 nbrmpinirecvs = nbrmpinirecvs + 1
4327 nbrrank = gr2rank(startglobrow-1)
4328 IF ( iseven(startglobrow) .AND. (nbrrank .GE. 0) )
THEN
4329 CALL mpi_isend( selement(startglobrow)%x, m, mpi_real8, nbrrank, 2, &
4330 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4331 nbrmpireqindx = nbrmpireqindx + 1
4332 nbrmpinisends = nbrmpinisends + 1
4334 nbrrank = gr2rank(endglobrow+1)
4335 IF ( iseven(endglobrow) .AND. (nbrrank .GE. 0) )
THEN
4336 CALL mpi_isend( selement(endglobrow)%x, m, mpi_real8, nbrrank, 1, &
4337 ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4338 nbrmpireqindx = nbrmpireqindx + 1
4339 nbrmpinisends = nbrmpinisends + 1
4341 nbrmpireqcnt = nbrmpireqindx - 1
4342 IF ( nbrmpireqcnt .GT. 0 )
THEN
4343 IF(kpdbg)
WRITE(ofu,*)
' Wait ',nbrmpinirecvs,
' ',nbrmpinisends;
CALL fl(ofu)
4344 CALL bsystemclock(loctimer1)
4345 CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4346 CALL bsystemclock(loctimer2)
4347 waitall_time=waitall_time+(loctimer2-loctimer1)
4352 DO globrow = startglobrow, endglobrow, 1
4353 globrowoff = globrow - startglobrow + 1
4356 IF ( globrow .GT. 1 )
THEN
4357 term = term + sum( orig(globrowoff)%L(i,:) * selement(globrow-1)%x )
4360 term = term + sum( orig(globrowoff)%D(i,:) * selement(globrow)%x )
4362 IF (writesolution)
THEN
4365 IF(kpdbg)
WRITE(ofu,*)
'X[',(globrow-1)*m+k,
']=', selement(globrow)%x(k);
CALL fl(ofu)
4370 IF ( globrow .LT. n )
THEN
4371 term = term + sum( orig(globrowoff)%U(i,:) * selement(globrow+1)%x )
4374 totrmserr = totrmserr + (term - orig(globrowoff)%b(i,1))**2
4378 IF ( endglobrow .LT. startglobrow )
THEN
4381 totrmserr = sqrt( totrmserr / ((endglobrow-startglobrow+1) * m) )
4383 IF(kpdbg)
WRITE(ofu,
'(A,E15.8E3)')
'TOTAL RMS ERROR = ', totrmserr;
CALL fl(ofu)
4384 IF(kpdbg)
WRITE(ofu,*)
'------ Solution verified ------';
CALL fl(ofu)
4385 END SUBROUTINE verifysolution
4390 END MODULE blocktridiagonalsolver_bst