V3FIT
blocktridiagonalsolver.f90
1 !-------------------------------------------------------------------------------
5 !-------------------------------------------------------------------------------
6 
7 !-------------------------------------------------------------------------------
9 USE mpi_inc
10 USE parallel_include_module, ONLY: stopmpi
11 USE parallel_include_module, ONLY: tofu
12 USE parallel_include_module, ONLY: ns_comm
13 USE parallel_include_module, ONLY: grank, gnranks
14 USE parallel_include_module, ONLY: rank, nranks
15 USE parallel_include_module, ONLY: ns_resltn
16 IMPLICIT NONE
17 
18 !-------------------------------------------------------------------------------
19 ! Precision settings
20 !-------------------------------------------------------------------------------
21 INTEGER, PARAMETER :: rprec = selected_real_kind(15,300)
22 INTEGER, PARAMETER :: iprec = selected_int_kind(8)
23 INTEGER, PARAMETER :: cprec = kind((1.0_rprec,1.0_rprec))
24 INTEGER, PARAMETER :: dp = rprec
25 
26 !-------------------------------------------------------------------------------
30 !-------------------------------------------------------------------------------
32  REAL(dp), ALLOCATABLE :: L(:,:), D(:,:), U(:,:), b(:,:)
33  INTEGER, ALLOCATABLE :: pivot(:)
34 END TYPE levelelement
35 
36 !-------------------------------------------------------------------------------
40 !-------------------------------------------------------------------------------
42  REAL(dp), ALLOCATABLE :: x(:) !Vector of size M
43 END TYPE solutionelement
44 
45 !-------------------------------------------------------------------------------
52 !-------------------------------------------------------------------------------
53 TYPE (levelelement), ALLOCATABLE :: lelement(:,:)
54 
55 !-------------------------------------------------------------------------------
60 !-------------------------------------------------------------------------------
61 TYPE (levelelement), ALLOCATABLE :: orig(:)
62 
63 !-------------------------------------------------------------------------------
68 !-------------------------------------------------------------------------------
69 TYPE (solutionelement), ALLOCATABLE :: selement(:)
70 
71 !-------------------------------------------------------------------------------
72 !INTEGER :: rank !<This MPI task's rank
73 !INTEGER :: nranks !<Num of MPI tasks
74 INTEGER :: p
75 INTEGER :: n
76 INTEGER :: m
77 INTEGER :: startglobrow
78 INTEGER :: endglobrow
79 INTEGER :: nlevels
80 LOGICAL :: matdirtied
81 LOGICAL :: rhsdirtied
82 
83 !-------------------------------------------------------------------------------
84 CHARACTER*100 :: kenvvar
85 CHARACTER*100 :: kenvval
86 LOGICAL :: kpdbg
87 INTEGER :: ofu
88 INTEGER :: pfu
89 INTEGER :: mpi_err
90 LOGICAL :: writeproblemfile
91 LOGICAL :: writesolution
92 LOGICAL :: usebarriers
93 REAL :: membytes
94 REAL :: dpsz
95 REAL :: intsz
96 REAL :: ptrsz
97 REAL(dp) :: one
98 REAL(dp) :: zero
99 LOGICAL(dp) :: l_colscale=.false.
100 
101 !-------------------------------------------------------------------------------
102 LOGICAL :: use_mpiwtime
103 DOUBLE PRECISION :: loctimer1, loctimer2
104 DOUBLE PRECISION :: mattimer1, mattimer2
105 DOUBLE PRECISION :: globtimer1, globtimer2
106 DOUBLE PRECISION :: timerfreq
107 DOUBLE PRECISION :: tottime
108 INTEGER :: totcount
109 DOUBLE PRECISION :: totcommtime
110 INTEGER :: totcommcount
111 DOUBLE PRECISION :: totinvtime
112 INTEGER :: totinvcount
113 DOUBLE PRECISION :: totmatmultime
114 INTEGER :: totmatmulcount
115 DOUBLE PRECISION :: totmatsoltime
116 INTEGER :: totmatsolcount
117 
118 REAL(dp), PUBLIC :: maxeigen_tri
119 REAL(dp), PUBLIC :: dmin_tri
120 
121 !-------------------------------------------------------------------------------
125 !-------------------------------------------------------------------------------
126 REAL(dp), ALLOCATABLE :: origdiagelement(:)
127 REAL(dp), ALLOCATABLE :: topscalefac(:), botscalefac(:)
128 !-------------------------------------------------------------------------------
129 
130 !-------------------------------------------------------------------------------
131 !-------------------------------------------------------------------------------
135 !-------------------------------------------------------------------------------
136 LOGICAL :: doblasonly
137 LOGICAL :: doblacscomm
138 
139 !-------------------------------------------------------------------------------
143 !-------------------------------------------------------------------------------
145  INTEGER :: myrow, mycol
146  INTEGER :: nrows, ncols
147  INTEGER :: blockszrows, blockszcols
148  INTEGER, ALLOCATABLE :: map(:,:)
149 END TYPE blacsprocessgrid
150 
151 !-------------------------------------------------------------------------------
155 !-------------------------------------------------------------------------------
157  INTEGER :: iam
158  INTEGER :: nprocs
159  INTEGER :: maincontext
160  INTEGER :: levelcontext
161  TYPE(BlacsProcessGrid) :: pgrid
162  INTEGER :: nbpp
163 END TYPE blacsparameters
164 
165 TYPE(blacsparameters) :: blacs
166 
167 !-------------------------------------------------------------------------------
171 !-------------------------------------------------------------------------------
173  INTEGER :: masterrank
174  INTEGER :: nslaves
175  INTEGER, ALLOCATABLE :: slaveranks(:)
176 END TYPE mastertoslavemapping
177 
178 !-------------------------------------------------------------------------------
182 !-------------------------------------------------------------------------------
184  LOGICAL :: ammaster
185  TYPE(MasterToSlaveMapping) :: msmap
186 
187  INTEGER :: mpicomm, mpitag, mpierr
188 #if defined(MPI_OPT)
189  INTEGER :: mpistatus(MPI_STATUS_SIZE)
190 #endif
191 
192 END TYPE pblaslevelparameters
193 
194 TYPE(PBLASLevelParameters) :: pblas
195 
196 !-------------------------------------------------------------------------------
200 !-------------------------------------------------------------------------------
201 INTEGER, PARAMETER :: op_none = 0
202 INTEGER, PARAMETER :: op_done = 1
203 INTEGER, PARAMETER :: op_dgemm = 2
204 INTEGER, PARAMETER :: op_dgemv = 3
205 INTEGER, PARAMETER :: op_dgetrf = 4
206 INTEGER, PARAMETER :: op_dgetrs = 5
207 
208 !-------------------------------------------------------------------------------
212 !-------------------------------------------------------------------------------
214  DOUBLE PRECISION :: tm
215  INTEGER :: cnt
216  DOUBLE PRECISION :: t1, t2
217 END TYPE timecount
219  TYPE(TimeCount) :: wait, comm, comp
220  TYPE(TimeCount) :: mm, trf, pmm, ptrf
221  TYPE(TimeCount) :: mma, mmb, mmc, mmalpha, mmbeta, mmrc
222  TYPE(TimeCount) :: extract, waitall
223 END TYPE pblasstats
224 
225 TYPE(PBLASStats) :: pstats
226 
227 !-------------------------------------------------------------------------------
229  DOUBLE PRECISION, ALLOCATABLE :: temparray(:)
230 END TYPE pblastemparray
231 
232 !-------------------------------------------------------------------------------
233 
234 CONTAINS
235 
236 !-------------------------------------------------------------------------------
240 !-------------------------------------------------------------------------------
241 SUBROUTINE bclockinit()
242  !-----------------------------------------------------------------------------
243  ! Local variables
244  !-----------------------------------------------------------------------------
245  INTEGER :: tempint
246 
247  IF ( use_mpiwtime ) THEN
248  timerfreq = 1.0
249  ELSE
250  CALL system_clock(count_rate=tempint)
251  timerfreq = tempint
252  END IF
253 END SUBROUTINE bclockinit
254 
255 !-------------------------------------------------------------------------------
259 !-------------------------------------------------------------------------------
260 SUBROUTINE bsystemclock( ts )
261  !-----------------------------------------------------------------------------
262  ! Formal arguments
263  !-----------------------------------------------------------------------------
264  DOUBLE PRECISION, INTENT(INOUT) :: ts
265 
266  !-----------------------------------------------------------------------------
267  ! Local variables
268  !-----------------------------------------------------------------------------
269  INTEGER :: tempint
270  DOUBLE PRECISION :: tempdbl
271 
272  IF ( use_mpiwtime ) THEN
273 #if defined(MPI_OPT)
274  tempdbl = mpi_wtime()
275  ts = tempdbl
276 #endif
277  ELSE
278  CALL system_clock( tempint )
279  ts = tempint
280  END IF
281 END SUBROUTINE bsystemclock
282 
283 !-------------------------------------------------------------------------------
287 !-------------------------------------------------------------------------------
288 SUBROUTINE fl( u )
289  !-----------------------------------------------------------------------------
290  ! Formal arguments
291  !-----------------------------------------------------------------------------
292  INTEGER, INTENT(IN) :: u
293 
294  CALL flush( u )
295 END SUBROUTINE fl
296 
297 !-------------------------------------------------------------------------------
301 !-------------------------------------------------------------------------------
302 SUBROUTINE chargememory( bytes )
303  !-----------------------------------------------------------------------------
304  ! Formal arguments
305  !-----------------------------------------------------------------------------
306  REAL, INTENT(IN) :: bytes
307 
308  !-----------------------------------------------------------------------------
309  membytes = membytes + bytes
310 END SUBROUTINE chargememory
311 
312 !-------------------------------------------------------------------------------
316 !-------------------------------------------------------------------------------
317 SUBROUTINE chargetime( tot, t2, t1, cnt )
318  !-----------------------------------------------------------------------------
319  ! Formal arguments
320  !-----------------------------------------------------------------------------
321  DOUBLE PRECISION, INTENT(INOUT) :: tot
322  DOUBLE PRECISION, INTENT(IN) :: t2
323  DOUBLE PRECISION, INTENT(IN) :: t1
324  INTEGER, INTENT(INOUT) :: cnt
325 
326  !-----------------------------------------------------------------------------
327  tot = tot + (real(t2-t1))/timerfreq
328  cnt = cnt + 1
329 END SUBROUTINE chargetime
330 
331 !-------------------------------------------------------------------------------
332 !-------------------------------------------------------------------------------
336 !-------------------------------------------------------------------------------
337 SUBROUTINE timecountinit( tc )
338  !----------------------------------------------
339  ! Formal arguments
340  !----------------------------------------------
341  TYPE(timecount), INTENT(INOUT) :: tc
342 
343  !----------------------------------------------
344  tc%tm = 0.0
345  tc%cnt = 0
346 END SUBROUTINE timecountinit
347 
348 !-------------------------------------------------------------------------------
352 !-------------------------------------------------------------------------------
353 SUBROUTINE timecountprint( tc, msg )
354  !----------------------------------------------
355  ! Formal arguments
356  !----------------------------------------------
357  TYPE(timecount), INTENT(INOUT) :: tc
358  CHARACTER(*), INTENT(IN) :: msg
359  !----------------------------------------------
360  ! Local Variables
361  !----------------------------------------------
362  DOUBLE PRECISION :: avg
363 
364  !----------------------------------------------
365  avg = 0.0
366  IF (tc%cnt .GT. 0) avg = tc%tm / tc%cnt
367  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') msg,tc%cnt,' * ',avg,' sec = ',tc%tm,' sec'
368 END SUBROUTINE timecountprint
369 
370 !-------------------------------------------------------------------------------
374 !-------------------------------------------------------------------------------
375 SUBROUTINE plbinitstats()
376  CALL timecountinit( pstats%wait )
377  CALL timecountinit( pstats%comm )
378  CALL timecountinit( pstats%comp )
379  CALL timecountinit( pstats%mm )
380  CALL timecountinit( pstats%trf )
381  CALL timecountinit( pstats%pmm )
382  CALL timecountinit( pstats%ptrf )
383 
384  CALL timecountinit( pstats%mma )
385  CALL timecountinit( pstats%mmb )
386  CALL timecountinit( pstats%mmc )
387  CALL timecountinit( pstats%mmalpha )
388  CALL timecountinit( pstats%mmbeta )
389  CALL timecountinit( pstats%mmrc )
390  CALL timecountinit( pstats%extract )
391  CALL timecountinit( pstats%waitall )
392 END SUBROUTINE plbinitstats
393 
394 !-------------------------------------------------------------------------------
398 !-------------------------------------------------------------------------------
399 SUBROUTINE plbprintstats()
400  CALL timecountprint( pstats%wait, 'PBLAS Wait ' )
401  CALL timecountprint( pstats%comm, 'PBLAS Comm ' )
402  CALL timecountprint( pstats%comp, 'PBLAS Comp ' )
403  CALL timecountprint( pstats%mm, 'PBLAS MM ' )
404  CALL timecountprint( pstats%trf, 'PBLAS TRF ' )
405  CALL timecountprint( pstats%pmm, 'PBLAS PMM ' )
406  CALL timecountprint( pstats%ptrf, 'PBLAS PTRF ' )
407 
408  CALL timecountprint( pstats%mma, 'PBLAS MMA ' )
409  CALL timecountprint( pstats%mmb, 'PBLAS MMB ' )
410  CALL timecountprint( pstats%mmc, 'PBLAS MMC ' )
411  CALL timecountprint( pstats%mmalpha, 'PBLAS MMalpha ' )
412  CALL timecountprint( pstats%mmbeta, 'PBLAS MMbeta ' )
413  CALL timecountprint( pstats%mmrc, 'PBLAS MMRC ' )
414  CALL timecountprint( pstats%extract, 'PBLAS Extract ' )
415  CALL timecountprint( pstats%waitall, 'PBLAS Waitall ' )
416 END SUBROUTINE plbprintstats
417 
418 !-------------------------------------------------------------------------------
422 !-------------------------------------------------------------------------------
423 SUBROUTINE plbinitialize
424  !----------------------------------------------
425  ! Local Variables
426  !----------------------------------------------
427  CHARACTER*100 :: envvar
428  CHARACTER*100 :: envval
429 
430  IF(kpdbg) WRITE(ofu,*) 'PLBInitialize Started'; CALL fl(ofu)
431 
432  !----------------------------------------------
433  doblasonly = .true.
434  IF ( m .GE. 2048 ) THEN
435  doblasonly = .true.
436  envvar = 'BLOCKTRI_BLASONLY'
437  CALL getenv( envvar, envval )
438  IF ( envval .EQ. 'TRUE' ) THEN
439  doblasonly = .true.
440  IF(kpdbg) WRITE(ofu,*) 'BLAS ONLY -- obeying env var ', envvar; CALL fl(ofu)
441  END IF
442  END IF
443  IF(kpdbg) WRITE(ofu,*) 'doblasonly = ', doblasonly; CALL fl(ofu)
444 
445  !----------------------------------------------
446  doblacscomm = .false.
447  envvar = 'BLOCKTRI_BLACSCOMM'
448  CALL getenv( envvar, envval )
449  IF ( envval .EQ. 'TRUE' ) THEN
450  doblacscomm = .true.
451  IF(kpdbg) WRITE(ofu,*) 'BLACS COMM -- obeying env var ', envvar; CALL fl(ofu)
452  END IF
453  IF(kpdbg) WRITE(ofu,*) 'doblacscomm = ', doblacscomm; CALL fl(ofu)
454 
455  !----------------------------------------------
456  blacs%nbpp = 1
457  envvar = 'BLOCKTRI_NBPP'
458  CALL getenv( envvar, envval )
459  IF ( envval .NE. '' ) THEN
460  READ( envval, *) blacs%nbpp
461  IF(kpdbg) WRITE(ofu,*) 'NBPP -- obeying env var ', envvar; CALL fl(ofu)
462  END IF
463  IF(kpdbg) WRITE(ofu,*) 'NBPP = ', blacs%nbpp; CALL fl(ofu)
464 
465  !----------------------------------------------
466  CALL plbinitstats()
467 
468  !----------------------------------------------
469  IF (doblasonly) THEN
470  IF(kpdbg) WRITE(ofu,*) 'BLAS only (not using PBLAS)'; CALL fl(ofu)
471  ELSE
472 #if defined(MPI_OPT)
473  CALL blacs_pinfo( blacs%iam, blacs%nprocs )
474  IF(kpdbg) WRITE(ofu,*) 'BLACS_PINFO ', blacs%iam, ' ', blacs%nprocs; CALL fl(ofu)
475  CALL blacs_get( 0, 0, blacs%maincontext )
476  IF(kpdbg) WRITE(ofu,*) 'BLACS_GET ', blacs%maincontext; CALL fl(ofu)
477  CALL blacs_gridinit( blacs%maincontext, 'R', 1, blacs%nprocs )
478  IF(kpdbg) WRITE(ofu,*) 'BLACS_GRIDINIT'; CALL fl(ofu)
479  CALL blacs_barrier( blacs%maincontext, 'All' )
480 #endif
481  END IF
482 
483  IF(kpdbg) WRITE(ofu,*) 'PLBInitialize Done'; CALL fl(ofu)
484 END SUBROUTINE plbinitialize
485 
486 !-------------------------------------------------------------------------------
490 !-------------------------------------------------------------------------------
491 SUBROUTINE plbfinalize
492 
493  IF(kpdbg) WRITE(ofu,*) 'PLBFinalize Started'; CALL fl(ofu)
494  IF (doblasonly) THEN
495  IF(kpdbg) WRITE(ofu,*) 'BLAS only (not using PBLAS)'; CALL fl(ofu)
496  ELSE
497 #if defined(MPI_OPT)
498  CALL blacs_barrier( blacs%maincontext, 'All' )
499  IF(kpdbg) WRITE(ofu,*) 'BLACS_BARRIER'; CALL fl(ofu)
500  CALL blacs_exit(1)
501  IF(kpdbg) WRITE(ofu,*) 'BLACS_EXIT nprocs= ', blacs%nprocs; CALL fl(ofu)
502  CALL plbprintstats()
503 #endif
504  END IF
505  IF(kpdbg) WRITE(ofu,*) 'PLBFinalize Done'; CALL fl(ofu)
506 END SUBROUTINE plbfinalize
507 
508 !-------------------------------------------------------------------------------
512 !-------------------------------------------------------------------------------
513 SUBROUTINE plbforwardinitialize
514  !-----------------------------------------------------------------------------
515  ! Nothing to do
516 END SUBROUTINE plbforwardinitialize
517 
518 !-------------------------------------------------------------------------------
522 !-------------------------------------------------------------------------------
523 SUBROUTINE plbforwardfinalize
524  !-----------------------------------------------------------------------------
525  ! Nothing to do
526 END SUBROUTINE plbforwardfinalize
527 
528 !-------------------------------------------------------------------------------
532 !-------------------------------------------------------------------------------
533 SUBROUTINE plbbackwardinitialize
534  !-----------------------------------------------------------------------------
535  ! Nothing to do
536 END SUBROUTINE plbbackwardinitialize
537 
538 !-------------------------------------------------------------------------------
542 !-------------------------------------------------------------------------------
543 SUBROUTINE plbbackwardfinalize
544  !-----------------------------------------------------------------------------
545  ! Nothing to do
546 END SUBROUTINE plbbackwardfinalize
547 
548 !-------------------------------------------------------------------------------
552 !-------------------------------------------------------------------------------
553 SUBROUTINE plbforwardinitializelevel( lvl, ammaster )
554  !----------------------------------------------
555  ! Formal arguments
556  !----------------------------------------------
557  INTEGER :: lvl
558  LOGICAL :: ammaster
559  !----------------------------------------------
560  ! Local Variables
561  !----------------------------------------------
562  INTEGER :: I, J, K
563  INTEGER :: maxslaves, actualslaves
564 
565  !----------------------------------------------
566  !Short-circuit PBLAS to BLAS
567  IF ( doblasonly ) THEN
568  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel BLAS only'; CALL fl(ofu)
569  RETURN
570  END IF
571 
572  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Started', ammaster; CALL fl(ofu)
573 
574 #if defined(MPI_OPT)
575  !----------------------------------------------
576  pblas%ammaster = ammaster
577  pblas%msmap%masterrank = -1
578  pblas%msmap%nslaves = 0
579  CALL determinemasterslaveranks()
580 
581  !----------------------------------------------
582  !Set up BLACS process grid and a new context
583  blacs%levelcontext = -1
584  CALL blacs_get( blacs%maincontext, 10, blacs%levelcontext )
585  IF(kpdbg) WRITE(ofu,*) 'Created new context'; CALL fl(ofu)
586  IF(kpdbg) WRITE(ofu,*) 'Nslaves ',pblas%msmap%nslaves; CALL fl(ofu)
587 
588  blacs%pgrid%blockszrows = 64
589  IF ( blacs%pgrid%blockszrows .GT. m ) blacs%pgrid%blockszrows = m
590  blacs%pgrid%blockszcols = blacs%pgrid%blockszrows
591  IF(kpdbg) WRITE(ofu,*) 'Block NR=',blacs%pgrid%blockszrows; CALL fl(ofu)
592  IF(kpdbg) WRITE(ofu,*) 'Block NC=',blacs%pgrid%blockszcols; CALL fl(ofu)
593 
594  maxslaves = (m*m) / (blacs%nbpp*blacs%nbpp* &
595  blacs%pgrid%blockszrows*blacs%pgrid%blockszcols)
596  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
597  IF ( maxslaves .LT. 1 ) maxslaves = 1
598  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
599  IF ( maxslaves .GT. pblas%msmap%nslaves ) maxslaves = pblas%msmap%nslaves
600  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
601  actualslaves = maxslaves
602  IF(kpdbg) WRITE(ofu,*) ' Actual slaves ', actualslaves; CALL fl(ofu)
603 
604  blacs%pgrid%nrows = int( sqrt( real( actualslaves ) ) )
605  IF ( blacs%pgrid%nrows .LT. 1 ) blacs%pgrid%nrows = 1
606  blacs%pgrid%ncols = int( actualslaves / blacs%pgrid%nrows )
607  IF(kpdbg) WRITE(ofu,*) 'NR=',blacs%pgrid%nrows,' NC=',blacs%pgrid%ncols; CALL fl(ofu)
608  ALLOCATE( blacs%pgrid%map( 1 : blacs%pgrid%nrows, 1 : blacs%pgrid%ncols ) )
609  k = 0
610  DO i = 1, blacs%pgrid%nrows
611  DO j = 1, blacs%pgrid%ncols
612  blacs%pgrid%map(i,j) = pblas%msmap%slaveranks(k+1)
613  k = k + 1
614  END DO
615  END DO
616  IF(kpdbg) WRITE(ofu,*) 'NR*NC=',k; CALL fl(ofu)
617  CALL blacs_gridmap( blacs%levelcontext, blacs%pgrid%map, &
618  blacs%pgrid%nrows, blacs%pgrid%nrows, blacs%pgrid%ncols )
619  IF(kpdbg) WRITE(ofu,*) 'GridMap done'; CALL fl(ofu)
620  CALL blacs_gridinfo( blacs%levelcontext, &
621  blacs%pgrid%nrows, blacs%pgrid%ncols, &
622  blacs%pgrid%myrow, blacs%pgrid%mycol )
623  IF(kpdbg) WRITE(ofu,*) 'GridInfo done'; CALL fl(ofu)
624  IF(kpdbg) WRITE(ofu,*) 'Myrowcol ',blacs%pgrid%myrow,' ',blacs%pgrid%mycol; CALL fl(ofu)
625 
626  !----------------------------------------------
627  pblas%mpicomm = ns_comm
628  pblas%mpitag = 1234
629 
630  !----------------------------------------------
631  CALL blacs_barrier( blacs%maincontext, 'All' )
632  IF(kpdbg) WRITE(ofu,*) 'BLACS_BARRIER'; CALL fl(ofu)
633 
634  !----------------------------------------------
635  IF ( ammaster ) THEN
636  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Master'; CALL fl(ofu)
637  ELSE
638  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Slave'; CALL fl(ofu)
639  CALL slaveservice()
640  END IF
641 
642  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Done', ammaster; CALL fl(ofu)
643 #endif
644 
645 END SUBROUTINE plbforwardinitializelevel
646 
647 !-------------------------------------------------------------------------------
651 !-------------------------------------------------------------------------------
652 #if defined(MPI_OPT)
653 SUBROUTINE plbforwardfinalizelevel( lvl, ammaster )
654  !----------------------------------------------
655  ! Formal arguments
656  !----------------------------------------------
657  INTEGER :: lvl
658  LOGICAL :: ammaster
659 
660  !----------------------------------------------
661  !Short-circuit PBLAS to BLAS
662  IF ( doblasonly ) THEN
663  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel BLAS only'; CALL fl(ofu)
664  RETURN
665  END IF
666  !----------------------------------------------
667  !If I'm master, tell slaves that they're done
668  IF ( ammaster .AND. (pblas%msmap%nslaves .GT. 1) ) THEN
669  CALL masterbcastnextop( op_done )
670  END IF
671 
672  IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
673  (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) ) THEN
674  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel pariah !level-barrier';CALL fl(ofu)
675  ELSE
676  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel level-barrier'; CALL fl(ofu)
677  CALL blacs_barrier( blacs%levelcontext, 'All' )
678  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel level grid exit'; CALL fl(ofu)
679  CALL blacs_gridexit( blacs%levelcontext )
680  END IF
681 
682  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel main-barrier'; CALL fl(ofu)
683  CALL blacs_barrier( blacs%maincontext, 'All' )
684 
685  !----------------------------------------------
686  pblas%msmap%masterrank = -1
687  pblas%msmap%nslaves = 0
688  DEALLOCATE( pblas%msmap%slaveranks )
689  DEALLOCATE( blacs%pgrid%map )
690 
691  !----------------------------------------------
692  CALL plbprintstats()
693 
694  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel ', ammaster; CALL fl(ofu)
695 
696 END SUBROUTINE plbforwardfinalizelevel
697 #endif
698 
699 !-------------------------------------------------------------------------------
703 !-------------------------------------------------------------------------------
704 SUBROUTINE plbbackwardinitializelevel( lvl, ammaster )
705  !----------------------------------------------
706  ! Formal arguments
707  !----------------------------------------------
708  INTEGER :: lvl
709  LOGICAL :: ammaster
710 
711  !----------------------------------------------
712  ! Nothing to do
713 END SUBROUTINE plbbackwardinitializelevel
714 
715 !-------------------------------------------------------------------------------
719 !-------------------------------------------------------------------------------
720 SUBROUTINE plbbackwardfinalizelevel( lvl, ammaster )
721  !----------------------------------------------
722  ! Formal arguments
723  !----------------------------------------------
724  INTEGER :: lvl
725  LOGICAL :: ammaster
726 
727  !----------------------------------------------
728  !Nothing to do
729 END SUBROUTINE plbbackwardfinalizelevel
730 
731 !-------------------------------------------------------------------------------
735 !-------------------------------------------------------------------------------
736 #if defined(MPI_OPT)
737 SUBROUTINE determinemasterslaveranks()
738  !----------------------------------------------
739  ! Local Variables
740  !----------------------------------------------
741  LOGICAL, ALLOCATABLE :: send_ammaster(:), recv_ammaster(:), assigned(:)
742  TYPE(mastertoslavemapping), ALLOCATABLE :: allmsmaps(:)
743  INTEGER :: totmasters, nslavespermaster, adjustednslavespermaster, tempmaster
744  INTEGER :: mpi_err, I, J, masterindex
745 
746  IF(kpdbg) WRITE(ofu,*) 'DetermineMasterSlaveRanks started'; CALL fl(ofu)
747 
748  !----------------------------------------------
749  !Initialize temporary data structures used for mapping
750  ALLOCATE( send_ammaster(1:1) )
751  ALLOCATE( recv_ammaster(1:nranks) )
752  send_ammaster(1) = pblas%ammaster
753  ALLOCATE( assigned(1:nranks) )
754  DO i = 1, nranks
755  assigned( i ) = .false.
756  END DO
757  ALLOCATE( allmsmaps(1:nranks) )
758  DO i = 1, nranks
759  allmsmaps(i)%masterrank = -1
760  allmsmaps(i)%nslaves = 0
761  END DO
762 
763  !----------------------------------------------
764  CALL mpi_allgather( send_ammaster, 1, mpi_logical, &
765  recv_ammaster, 1, mpi_logical, &
766  ns_comm, mpi_err )
767 
768  !----------------------------------------------
769  !Figure out total number of masters
770  totmasters = 0
771  DO i = 1, nranks
772  IF(kpdbg) WRITE(ofu,*) ' recv_ammaster ',i,' ',recv_ammaster(i); CALL fl(ofu)
773  IF ( recv_ammaster(i) ) THEN
774  totmasters = totmasters + 1
775  END IF
776  END DO
777 
778  IF ( totmasters .LE. 0 ) THEN
779  IF(kpdbg) WRITE(ofu,*) 'Total masters must be greater than 0'; CALL fl(ofu)
780  stop
781  END IF
782 
783  nslavespermaster = (nranks / totmasters)
784  IF(kpdbg) WRITE(ofu,*) 'Avg nslavespermaster',nslavespermaster; CALL fl(ofu)
785 
786  !----------------------------------------------
787  !Assign slaves to each master
788  tempmaster = 0
789  masterloop: DO i = 1, nranks
790  IF ( recv_ammaster(i) ) THEN
791 
792  tempmaster = tempmaster + 1
793 
794  !-------------------------------------------------------------
795  !Give one more slave to earlier ranks, if not evenly divisible
796  adjustednslavespermaster = nslavespermaster
797  IF ( tempmaster .LE. mod( nranks, totmasters ) ) THEN
798  adjustednslavespermaster = adjustednslavespermaster + 1
799  END IF
800 
801  IF(kpdbg) WRITE(ofu,*) 'Adjusted nslavespm',adjustednslavespermaster; CALL fl(ofu)
802  allmsmaps(i)%masterrank = i-1
803  ALLOCATE( allmsmaps(i)%slaveranks(1:adjustednslavespermaster) )
804 
805  !-------------------------------------------------------------
806  !Add the master as first in its own slave rank set
807  assigned(i) = .true.
808  allmsmaps(i)%nslaves = 1
809  allmsmaps(i)%slaveranks(1) = i-1
810 
811  !-------------------------------------------------------------
812  !Assign the next block of unassigned slaves
813  slaveloop: DO j = 1, nranks
814  IF ( allmsmaps(i)%nslaves .GE. adjustednslavespermaster ) THEN
815  EXIT slaveloop
816  END IF
817  IF ( (.NOT. assigned(j)) .AND. (.NOT. recv_ammaster(j)) ) THEN
818  assigned(j) = .true.
819  allmsmaps(j)%masterrank = i-1
820  allmsmaps(i)%nslaves = allmsmaps(i)%nslaves + 1
821  allmsmaps(i)%slaveranks(allmsmaps(i)%nslaves) = j-1
822  END IF
823  END DO slaveloop
824  END IF
825  END DO masterloop
826  IF(kpdbg) WRITE(ofu,*) 'Computed all mappings'; CALL fl(ofu)
827 
828  !----------------------------------------------
829  !Copy the mapping relevant to us (if I'm master, my own, else, my master's)
830  masterindex = rank+1
831  IF ( allmsmaps(masterindex)%masterrank .NE. rank ) THEN
832  masterindex = allmsmaps(masterindex)%masterrank+1
833  END IF
834  pblas%msmap%masterrank = allmsmaps(masterindex)%masterrank
835  pblas%msmap%nslaves = allmsmaps(masterindex)%nslaves
836  ALLOCATE(pblas%msmap%slaveranks(1:allmsmaps(masterindex)%nslaves))
837  pblas%msmap%slaveranks(:) = allmsmaps(masterindex)%slaveranks(:)
838 
839  IF(kpdbg) WRITE(ofu,*) 'Extracted my mappings'; CALL fl(ofu)
840  IF(kpdbg) WRITE(ofu,*) 'My master rank',masterindex-1; CALL fl(ofu)
841  IF(kpdbg) WRITE(ofu,*) 'Nslaves in my set',allmsmaps(masterindex)%nslaves; CALL fl(ofu)
842  DO j = 1, allmsmaps(masterindex)%nslaves
843  IF(kpdbg) WRITE(ofu,*) 'Slave',j,' ',allmsmaps(masterindex)%slaveranks(j);CALL fl(ofu)
844  END DO
845 
846  !----------------------------------------------
847  !Deallocations of temporary space
848  DO i = 1, nranks
849  IF ( recv_ammaster(i) ) THEN
850  DEALLOCATE( allmsmaps(i)%slaveranks )
851  END IF
852  END DO
853  DEALLOCATE( assigned )
854  DEALLOCATE( allmsmaps )
855  DEALLOCATE( send_ammaster )
856  DEALLOCATE( recv_ammaster )
857 
858  IF(kpdbg) WRITE(ofu,*) 'DetermineMasterSlaveRanks done'; CALL fl(ofu)
859 END SUBROUTINE determinemasterslaveranks
860 #endif
861 
862 !-------------------------------------------------------------------------------
866 !-------------------------------------------------------------------------------
867 SUBROUTINE masterbcastnextop( nextop )
868  !----------------------------------------------
869  ! Formal arguments
870  !----------------------------------------------
871  INTEGER, INTENT(IN) :: nextop
872  !----------------------------------------------
873  ! Local variables
874  !----------------------------------------------
875  INTEGER :: K, destrank
876  REAL(dp) :: realnextop
877 
878  realnextop = real(nextop)
879  IF(kpdbg) WRITE(ofu,*) 'MasterBcastNextOp started ', nextop; CALL fl(ofu)
880  CALL masterbcastvalue( realnextop )
881  IF(kpdbg) WRITE(ofu,*) 'MasterBcastNextOp done ', nextop; CALL fl(ofu)
882 END SUBROUTINE masterbcastnextop
883 
884 !-------------------------------------------------------------------------------
888 !-------------------------------------------------------------------------------
889 SUBROUTINE slavegetnextop( nextop )
890  !----------------------------------------------
891  ! Formal arguments
892  !----------------------------------------------
893  REAL(dp) :: realnextop
894  INTEGER, INTENT(INOUT) :: nextop
895 
896  IF(kpdbg) WRITE(ofu,*) 'SlaveGetNextOp started'; CALL fl(ofu)
897  CALL slavereceivevalue( realnextop )
898  nextop = int( realnextop )
899  IF(kpdbg) WRITE(ofu,*) 'SlaveGetNextOp done ', nextop; CALL fl(ofu)
900 END SUBROUTINE slavegetnextop
901 
902 !-------------------------------------------------------------------------------
906 !-------------------------------------------------------------------------------
907 SUBROUTINE slaveservice()
908  !----------------------------------------------
909  ! Local variables
910  !----------------------------------------------
911  INTEGER :: nextop
912 
913  !----------------------------------------------
914  IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
915  (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) ) THEN
916  IF(kpdbg) WRITE(ofu,*) 'SlaveService pariah falling out '; CALL fl(ofu)
917  ELSE
918  IF(kpdbg) WRITE(ofu,*) 'SlaveService started '; CALL fl(ofu)
919  oploop: DO WHILE (.true.)
920  nextop = op_none
921  CALL slavegetnextop( nextop )
922  IF ( nextop .EQ. op_done ) THEN
923  EXIT oploop
924  ELSE IF ( nextop .EQ. op_dgemm ) THEN
925  CALL slavedgemm()
926  ELSE IF ( nextop .EQ. op_dgetrf ) THEN
927  CALL slavedgetrf()
928  ELSE IF ( nextop .EQ. op_dgetrs ) THEN
929  CALL slavedgetrs()
930  ELSE
931  IF(kpdbg) WRITE(ofu,*) 'Bad Next Op', nextop; CALL fl(ofu)
932  stop
933  END IF
934  END DO oploop
935  IF(kpdbg) WRITE(ofu,*) 'SlaveService done '; CALL fl(ofu)
936  END IF
937 END SUBROUTINE slaveservice
938 
939 !-------------------------------------------------------------------------------
943 !-------------------------------------------------------------------------------
944 SUBROUTINE extractsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
945  A, nrows, ncols, subA, subnrows, subncols )
946  !----------------------------------------------
947  ! Formal arguments
948  !----------------------------------------------
949  INTEGER, INTENT(IN) :: bszr, bszc
950  INTEGER, INTENT(IN) :: pnr, pnc
951  INTEGER, INTENT(IN) :: pi, pj
952  REAL(dp), INTENT(IN) :: A(:,:)
953  INTEGER, INTENT(IN) :: nrows, ncols
954  REAL(dp), INTENT(OUT) :: subA(:)
955  INTEGER, INTENT(IN) :: subnrows, subncols
956  !----------------------------------------------
957  ! Local variables
958  !----------------------------------------------
959  INTEGER :: I, J, K, Q, R
960  INTEGER :: thisblkr, thisblkc
961 
962  IF(kpdbg) WRITE(ofu,*) 'ExtractSubMatrix NR=', subnrows, ' NC=', subncols; CALL fl(ofu)
963 
964  CALL bsystemclock(pstats%extract%t1)
965 
966  k = 0
967  DO j = 1, ncols, bszc
968  thisblkc = (j-1) / bszc
969  IF ( mod(thisblkc, pnc) .EQ. pj-1 ) THEN
970  DO r = 1, bszc
971  IF ( j+r-1 .LE. ncols ) THEN
972  DO i = 1, nrows, bszr
973  thisblkr = (i-1) / bszr
974  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
975  DO q = 1, bszr
976  IF ( i+q-1 .LE. nrows ) THEN
977  k = k + 1
978  suba(k) = a(i+q-1,j+r-1)
979  END IF
980  END DO
981  END IF
982  END DO
983  END IF
984  END DO
985  END IF
986  END DO
987 
988  !----------------------------------------------
989  IF ( k .NE. subnrows*subncols ) THEN
990  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
991  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows, ' subnc=', subncols; CALL fl(ofu)
992  stop
993  END IF
994 
995  CALL bsystemclock(pstats%extract%t2)
996  CALL chargetime( pstats%extract%tm, pstats%extract%t2, pstats%extract%t1, pstats%extract%cnt )
997 
998  IF(kpdbg) WRITE(ofu,*) 'ExtractSubMatrix done K', k; CALL fl(ofu)
999 
1000 END SUBROUTINE extractsubmatrix
1001 
1002 !-------------------------------------------------------------------------------
1008 !-------------------------------------------------------------------------------
1009 SUBROUTINE mastersendmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1010  !----------------------------------------------
1011  ! Formal arguments
1012  !----------------------------------------------
1013  REAL(dp), INTENT(IN) :: A(:,:)
1014  INTEGER, INTENT(IN) :: nrows, ncols
1015  REAL(dp), INTENT(OUT) :: ssubA(:)
1016  INTEGER, INTENT(IN) :: ssubnrows, ssubncols
1017  !----------------------------------------------
1018  ! Local variables
1019  !----------------------------------------------
1020  INTEGER :: I, J, K
1021  INTEGER :: aslaverank
1022  TYPE(pblastemparray), ALLOCATABLE :: subA(:,:)
1023  INTEGER :: subnrows, subncols
1024  INTEGER :: maxsubnrows, maxsubncols
1025  INTEGER :: bszr, bszc
1026  INTEGER :: pnr, pnc
1027 #if defined(MPI_OPT)
1028  INTEGER, EXTERNAL :: NUMROC
1029  INTEGER :: mpistatus(MPI_STATUS_SIZE)
1030 #endif
1031 
1032  !----------------------------------------------
1033  INTEGER :: mpinisends
1034  INTEGER, ALLOCATABLE :: mpireqs(:)
1035  INTEGER :: mpierr
1036  INTEGER, ALLOCATABLE :: mpistatuses(:,:)
1037  INTEGER :: waitmatchindex
1038 
1039  !----------------------------------------------
1040  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix started'; CALL fl(ofu)
1041  CALL bsystemclock(pstats%comm%t1)
1042 
1043 #if defined(MPI_OPT)
1044  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1045  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1046 
1047  !----------------------------------------------
1048  mpinisends = 0
1049  ALLOCATE( mpireqs( pnr * pnc ) )
1050  ALLOCATE( mpistatuses( pnr * pnc, mpi_status_size ) )
1051 
1052  !----------------------------------------------
1053  maxsubnrows = numroc( nrows, bszr, 0, 0, pnr )
1054  maxsubncols = numroc( ncols, bszc, 0, 0, pnc )
1055  ALLOCATE( suba( 1 : pnr, 1 : pnc ) )
1056 
1057  DO i = 1, pnr
1058  DO j = 1, pnc
1059  aslaverank = blacs%pgrid%map(i,j)
1060 
1061  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1062  subncols = numroc( ncols, bszc, j-1, 0, pnc )
1063 
1064  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix to ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1065 
1066  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1067  !Self (local)
1068  IF ( aslaverank .NE. rank ) THEN
1069  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1070  stop
1071  END IF
1072  IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) ) THEN
1073  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1074  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows, ' SSNC ', ssubncols; CALL fl(ofu)
1075  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows, ' SNC ', subncols; CALL fl(ofu)
1076  stop
1077  END IF
1078 
1079  !Return a copy of this submatrix (will be like sent+received to/by self)
1080  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracting self submatrix'; CALL fl(ofu)
1081  CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1082  i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1083  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix kept self submatrix'; CALL fl(ofu)
1084  ELSE
1085  !A remote slave; send its corresponding matrix fragment
1086  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracting submatrix',i,j; CALL fl(ofu)
1087  ALLOCATE( suba(i,j)%temparray( subnrows*subncols ) )
1088  CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1089  i, j, a, nrows, ncols, suba(i,j)%temparray, &
1090  subnrows,subncols)
1091  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracted submatrix'; CALL fl(ofu)
1092  IF ( doblacscomm ) THEN
1093  CALL dgesd2d( blacs%levelcontext, subnrows, subncols, &
1094  suba(i,j)%temparray, subnrows, i-1, j-1 )
1095  ELSE
1096  mpinisends = mpinisends + 1
1097  CALL mpi_isend( suba(i,j)%temparray, subnrows*subncols, mpi_real8, &
1098  aslaverank, pblas%mpitag, pblas%mpicomm, mpireqs(mpinisends), mpierr )
1099  END IF
1100  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix sent slave submatrix'; CALL fl(ofu)
1101  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix deallocated temp submatrix'; CALL fl(ofu)
1102  END IF
1103  END DO
1104  END DO
1105 
1106  CALL bsystemclock(pstats%waitall%t1)
1107  CALL mpi_waitall( mpinisends, mpireqs, mpistatuses, mpierr )
1108  CALL bsystemclock(pstats%waitall%t2)
1109  CALL chargetime( pstats%waitall%tm, pstats%waitall%t2, pstats%waitall%t1, pstats%waitall%cnt )
1110 
1111  DO i = 1, pnr
1112  DO j = 1, pnc
1113  IF ( .NOT. (i .EQ. 1 .AND. j .EQ. 1) ) THEN
1114  DEALLOCATE( suba(i,j)%temparray )
1115  END IF
1116  END DO
1117  END DO
1118  DEALLOCATE( suba )
1119  DEALLOCATE( mpireqs )
1120  DEALLOCATE( mpistatuses )
1121 
1122  CALL bsystemclock(pstats%comm%t2)
1123  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1124 #endif
1125  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix done'; CALL fl(ofu)
1126 
1127 END SUBROUTINE mastersendmatrix
1128 
1129 !-------------------------------------------------------------------------------
1133 !-------------------------------------------------------------------------------
1134 SUBROUTINE slavereceivematrix( nrows, ncols, subA, subnrows, subncols )
1135  !----------------------------------------------
1136  ! Formal arguments
1137  !----------------------------------------------
1138 #if defined(MPI_OPT)
1139  INTEGER :: mpistatus(MPI_STATUS_SIZE)
1140 #endif
1141  INTEGER, INTENT(IN) :: nrows, ncols
1142  REAL(dp), INTENT(OUT) :: subA(:)
1143  INTEGER, INTENT(IN) :: subnrows, subncols
1144  !----------------------------------------------
1145  ! Local variables
1146  !----------------------------------------------
1147  INTEGER :: masterrank
1148  INTEGER :: mpierr
1149 
1150  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveMatrix started ',subnrows,' ',subncols; CALL fl(ofu)
1151 
1152 #if defined(MPI_OPT)
1153  masterrank = blacs%pgrid%map(1,1)
1154 
1155  CALL bsystemclock(pstats%comm%t1)
1156 
1157  IF ( doblacscomm ) THEN
1158  CALL dgerv2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1159  ELSE
1160  CALL mpi_recv( suba, subnrows*subncols, mpi_real8, &
1161  masterrank, pblas%mpitag, pblas%mpicomm, mpistatus, mpierr )
1162  END IF
1163 
1164  CALL bsystemclock(pstats%comm%t2)
1165  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1166 #endif
1167 
1168  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveMatrix done'; CALL fl(ofu)
1169 END SUBROUTINE slavereceivematrix
1170 
1171 !-------------------------------------------------------------------------------
1175 !-------------------------------------------------------------------------------
1176 SUBROUTINE masterbcastvalue( val )
1177 IMPLICIT NONE
1178  !----------------------------------------------
1179  ! Formal arguments
1180  !----------------------------------------------
1181  REAL(dp), INTENT(IN) :: val
1182 
1183 #if defined(MPI_OPT)
1184  CALL dgebs2d( blacs%levelcontext, 'All', ' ', 1, 1, val, 1 );
1185 #endif
1186  IF(kpdbg) WRITE(ofu,*) 'MasterBcastValue bcast to slaves'; CALL fl(ofu)
1187 
1188 END SUBROUTINE masterbcastvalue
1189 
1190 !-------------------------------------------------------------------------------
1194 !-------------------------------------------------------------------------------
1195 SUBROUTINE slavereceivevalue( val )
1196 IMPLICIT NONE
1197  !----------------------------------------------
1198  ! Formal arguments
1199  !----------------------------------------------
1200  REAL(dp), INTENT(OUT) :: val
1201 
1202 #if defined(MPI_OPT)
1203  CALL dgebr2d( blacs%levelcontext, 'All', ' ', 1, 1, val, 1, 0, 0 )
1204 #endif
1205  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveValue bcast from master'
1206  CALL fl(ofu)
1207 
1208 END SUBROUTINE slavereceivevalue
1209 
1210 !-------------------------------------------------------------------------------
1214 !-------------------------------------------------------------------------------
1215 SUBROUTINE injectsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
1216  A, nrows, ncols, subA, subnrows, subncols )
1217  !----------------------------------------------
1218  ! Formal arguments
1219  !----------------------------------------------
1220  INTEGER, INTENT(IN) :: bszr, bszc
1221  INTEGER, INTENT(IN) :: pnr, pnc
1222  INTEGER, INTENT(IN) :: pi, pj
1223  REAL(dp), INTENT(OUT) :: A(:,:)
1224  INTEGER, INTENT(IN) :: nrows, ncols
1225  REAL(dp), INTENT(IN) :: subA(:)
1226  INTEGER, INTENT(IN) :: subnrows, subncols
1227  !----------------------------------------------
1228  ! Local variables
1229  !----------------------------------------------
1230  INTEGER :: I, J, K, Q, R
1231  INTEGER :: thisblkr, thisblkc
1232 
1233  IF(kpdbg) WRITE(ofu,*) 'InjectSubMatrix NR=', subnrows, ' NC=', subncols; CALL fl(ofu)
1234 
1235  k = 0
1236  DO j = 1, ncols, bszc
1237  thisblkc = (j-1) / bszc
1238  IF ( mod(thisblkc, pnc) .EQ. pj-1 ) THEN
1239  DO r = 1, bszc
1240  IF ( j+r-1 .LE. ncols ) THEN
1241  DO i = 1, nrows, bszr
1242  thisblkr = (i-1) / bszr
1243  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
1244  DO q = 1, bszr
1245  IF ( i+q-1 .LE. nrows ) THEN
1246  k = k + 1
1247  a(i+q-1,j+r-1) = suba(k)
1248  END IF
1249  END DO
1250  END IF
1251  END DO
1252  END IF
1253  END DO
1254  END IF
1255  END DO
1256 
1257  !----------------------------------------------
1258  IF ( k .NE. subnrows*subncols ) THEN
1259  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
1260  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows, ' subnc=', subncols; CALL fl(ofu)
1261  stop
1262  END IF
1263 
1264  IF(kpdbg) WRITE(ofu,*) 'InjectSubMatrix done K', k; CALL fl(ofu)
1265 
1266 END SUBROUTINE injectsubmatrix
1267 
1268 !-------------------------------------------------------------------------------
1272 !-------------------------------------------------------------------------------
1273 SUBROUTINE injectsubvector( bszr, pnr, pi, V, nrows, subV, subnrows )
1274  !----------------------------------------------
1275  ! Formal arguments
1276  !----------------------------------------------
1277  INTEGER, INTENT(IN) :: bszr
1278  INTEGER, INTENT(IN) :: pnr
1279  INTEGER, INTENT(IN) :: pi
1280  INTEGER, INTENT(OUT) :: V(:)
1281  INTEGER, INTENT(IN) :: nrows
1282  INTEGER, INTENT(IN) :: subV(:)
1283  INTEGER, INTENT(IN) :: subnrows
1284  !----------------------------------------------
1285  ! Local variables
1286  !----------------------------------------------
1287  INTEGER :: I, K, Q
1288  INTEGER :: thisblkr
1289 
1290  IF(kpdbg) WRITE(ofu,*) 'InjectSubVector NR=', subnrows; CALL fl(ofu)
1291 
1292  k = 0
1293  DO i = 1, nrows, bszr
1294  thisblkr = (i-1) / bszr
1295  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
1296  DO q = 1, bszr
1297  IF ( i+q-1 .LE. nrows ) THEN
1298  k = k + 1
1299  v(i+q-1) = subv(k)
1300  END IF
1301  END DO
1302  END IF
1303  END DO
1304 
1305  !----------------------------------------------
1306  IF ( k .NE. subnrows ) THEN
1307  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
1308  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows; CALL fl(ofu)
1309  stop
1310  END IF
1311 
1312  IF(kpdbg) WRITE(ofu,*) 'InjectSubVector done K', k; CALL fl(ofu)
1313 
1314 END SUBROUTINE injectsubvector
1315 
1316 !-------------------------------------------------------------------------------
1322 !-------------------------------------------------------------------------------
1323 SUBROUTINE masterrecvmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1324  !----------------------------------------------
1325  ! Formal arguments
1326  !----------------------------------------------
1327  REAL(dp), INTENT(OUT) :: A(:,:)
1328  INTEGER, INTENT(IN) :: nrows, ncols
1329  REAL(dp), INTENT(IN) :: ssubA(:)
1330  INTEGER, INTENT(IN) :: ssubnrows, ssubncols
1331  !----------------------------------------------
1332  ! Local variables
1333  !----------------------------------------------
1334  INTEGER :: I, J, K
1335  INTEGER :: aslaverank
1336  REAL(dp), ALLOCATABLE :: subA(:)
1337  INTEGER :: subnrows, subncols
1338  INTEGER :: bszr, bszc
1339  INTEGER :: pnr, pnc
1340 #if defined(MPI_OPT)
1341  INTEGER, EXTERNAL :: NUMROC
1342 #endif
1343 
1344  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix started'; CALL fl(ofu)
1345  CALL bsystemclock(pstats%comm%t1)
1346 
1347 #if defined(MPI_OPT)
1348  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1349  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1350 
1351  DO i = 1, pnr
1352  DO j = 1, pnc
1353  aslaverank = blacs%pgrid%map(i,j)
1354 
1355  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1356  subncols = numroc( ncols, bszc, j-1, 0, pnc )
1357 
1358  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix from ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1359 
1360  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1361  !Self (local)
1362  IF ( aslaverank .NE. rank ) THEN
1363  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1364  stop
1365  END IF
1366  IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) ) THEN
1367  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1368  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows, ' SSNC ', ssubncols; CALL fl(ofu)
1369  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows, ' SNC ', subncols; CALL fl(ofu)
1370  stop
1371  END IF
1372 
1373  !Use the given copy of this submatrix (like sent+received to/by self)
1374  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injecting self submatrix'; CALL fl(ofu)
1375  CALL injectsubmatrix(bszr, bszc, pnr, pnc, &
1376  i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1377  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix kept self submatrix'; CALL fl(ofu)
1378  ELSE
1379  !A remote slave; receive its corresponding matrix fragment
1380  ALLOCATE( suba( 1 : subnrows*subncols ) )
1381  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix receiving slave submatrix'; CALL fl(ofu)
1382  CALL bsystemclock(pstats%wait%t1)
1383  CALL dgerv2d( blacs%levelcontext, subnrows, subncols, &
1384  suba, subnrows, i-1, j-1 )
1385  CALL bsystemclock(pstats%wait%t2)
1386  CALL chargetime( pstats%wait%tm, pstats%wait%t2, pstats%wait%t1, pstats%wait%cnt )
1387  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injecting submatrix',i,j; CALL fl(ofu)
1388  CALL injectsubmatrix( bszr, bszc, pnr, pnc, &
1389  i, j, a, nrows, ncols, suba, subnrows, subncols )
1390  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injected submatrix'; CALL fl(ofu)
1391  DEALLOCATE( suba )
1392  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix deallocated temp submatrix'; CALL fl(ofu)
1393  END IF
1394  END DO
1395  END DO
1396 
1397  CALL bsystemclock(pstats%comm%t2)
1398  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1399 #endif
1400 
1401  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix done'; CALL fl(ofu)
1402 
1403 END SUBROUTINE masterrecvmatrix
1404 
1405 !-------------------------------------------------------------------------------
1409 !-------------------------------------------------------------------------------
1410 SUBROUTINE slavesendmatrix( nrows, ncols, subA, subnrows, subncols )
1411  !----------------------------------------------
1412  ! Formal arguments
1413  !----------------------------------------------
1414  INTEGER, INTENT(IN) :: nrows, ncols
1415  REAL(dp), INTENT(IN) :: subA(:)
1416  INTEGER, INTENT(IN) :: subnrows, subncols
1417 
1418  IF(kpdbg) WRITE(ofu,*) 'SlaveSendMatrix started ',subnrows,' ',subncols; CALL fl(ofu)
1419 
1420  CALL bsystemclock(pstats%comm%t1)
1421 #if defined(MPI_OPT)
1422  CALL dgesd2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1423 #endif
1424  CALL bsystemclock(pstats%comm%t2)
1425  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1426 
1427  IF(kpdbg) WRITE(ofu,*) 'SlaveSendMatrix done'; CALL fl(ofu)
1428 END SUBROUTINE slavesendmatrix
1429 
1430 !-------------------------------------------------------------------------------
1436 !-------------------------------------------------------------------------------
1437 SUBROUTINE masterrecvvector( V, nrows, ssubV, ssubnrows )
1438  !----------------------------------------------
1439  ! Formal arguments
1440  !----------------------------------------------
1441  INTEGER, INTENT(OUT) :: V(:)
1442  INTEGER, INTENT(IN) :: nrows
1443  INTEGER, INTENT(IN) :: ssubV(:)
1444  INTEGER, INTENT(IN) :: ssubnrows
1445  !----------------------------------------------
1446  ! Local variables
1447  !----------------------------------------------
1448  INTEGER :: I, J, K
1449  INTEGER :: aslaverank
1450  INTEGER, ALLOCATABLE :: subV(:)
1451  INTEGER :: subnrows
1452  INTEGER :: bszr
1453  INTEGER :: pnr
1454 #if defined(MPI_OPT)
1455  INTEGER, EXTERNAL :: NUMROC
1456 #endif
1457 
1458  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector started'; CALL fl(ofu)
1459  CALL bsystemclock(pstats%comm%t1)
1460 
1461 #if defined(MPI_OPT)
1462  bszr = blacs%pgrid%blockszrows
1463  pnr = blacs%pgrid%nrows
1464 
1465  DO i = 1, pnr
1466  DO j = 1, 1
1467  aslaverank = blacs%pgrid%map(i,j)
1468 
1469  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1470 
1471  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector from ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1472 
1473  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1474  !Self (local)
1475  IF ( aslaverank .NE. rank ) THEN
1476  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1477  stop
1478  END IF
1479  IF ( ssubnrows .NE. subnrows ) THEN
1480  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1481  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows; CALL fl(ofu)
1482  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows; CALL fl(ofu)
1483  stop
1484  END IF
1485 
1486  !Use the given copy of this subvector (like sent+received to/by self)
1487  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injecting self subvector'; CALL fl(ofu)
1488  CALL injectsubvector(bszr, pnr, i, v, nrows, ssubv, subnrows)
1489  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector kept self subvector'; CALL fl(ofu)
1490  ELSE
1491  !A remote slave; receive its corresponding vector fragment
1492  ALLOCATE( subv( 1 : subnrows ) )
1493  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector receiving slave subvector'; CALL fl(ofu)
1494  CALL igerv2d( blacs%levelcontext, subnrows, 1, subv, subnrows, i-1, 0 )
1495  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injecting subvector',i,j; CALL fl(ofu)
1496  CALL injectsubvector( bszr, pnr, i, v, nrows, subv, subnrows )
1497  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injected subvector'; CALL fl(ofu)
1498  DEALLOCATE( subv )
1499  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector deallocated temp subvector'; CALL fl(ofu)
1500  END IF
1501  END DO
1502  END DO
1503 
1504  CALL bsystemclock(pstats%comm%t2)
1505  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1506 #endif
1507 
1508  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector done'; CALL fl(ofu)
1509 
1510 END SUBROUTINE masterrecvvector
1511 
1512 !-------------------------------------------------------------------------------
1518 !-------------------------------------------------------------------------------
1519 SUBROUTINE slavesendvector( nrows, subV, subnrows )
1520  !----------------------------------------------
1521  ! Formal arguments
1522  !----------------------------------------------
1523  INTEGER, INTENT(IN) :: nrows
1524  INTEGER, INTENT(IN) :: subV(:)
1525  INTEGER, INTENT(IN) :: subnrows
1526  INTEGER :: pj
1527 
1528  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector started ', subnrows; CALL fl(ofu)
1529 
1530  CALL bsystemclock(pstats%comm%t1)
1531 
1532  pj = blacs%pgrid%mycol
1533  IF ( pj .GT. 0 ) THEN
1534  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector skipping since mycol>0 ',pj; CALL fl(ofu)
1535  ELSE
1536 #if defined(MPI_OPT)
1537  CALL igesd2d( blacs%levelcontext, subnrows, 1, subv, subnrows, 0, 0 )
1538 #endif
1539  END IF
1540 
1541  CALL bsystemclock(pstats%comm%t2)
1542  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1543 
1544  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector done'; CALL fl(ofu)
1545 END SUBROUTINE slavesendvector
1546 
1547 !-------------------------------------------------------------------------------
1551 !-------------------------------------------------------------------------------
1552 SUBROUTINE plbdgemm( alpha, A, B, beta, C )
1553  !----------------------------------------------
1554  ! Formal arguments
1555  !----------------------------------------------
1556  REAL(dp), INTENT(IN) :: alpha, beta
1557  REAL(dp), INTENT(IN) :: A(:,:), B(:,:)
1558  REAL(dp), INTENT(INOUT) :: C(:,:)
1559  !----------------------------------------------
1560  ! Local variables
1561  !----------------------------------------------
1562  REAL(dp), ALLOCATABLE :: subA(:), subB(:), subC(:)
1563  INTEGER :: descA(9), descB(9), descC(9)
1564  INTEGER :: lldA, lldB, lldC
1565  INTEGER :: nrows, ncols, subnrows, subncols
1566  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1567  LOGICAL :: useblas
1568  INTEGER :: ctxt, info
1569 #if defined(MPI_OPT)
1570  INTEGER, EXTERNAL :: NUMROC
1571 #endif
1572 
1573  IF(kpdbg) WRITE(ofu,*) 'MasterGEMM started'; CALL fl(ofu)
1574 
1575  !----------------------------------------------
1576  useblas = ( doblasonly ) .OR. &
1577  ( pblas%msmap%nslaves .EQ. 1) .OR. &
1578  ( m .LE. blacs%pgrid%blockszrows ) .OR. &
1579  ( m .LE. blacs%pgrid%blockszcols )
1580 
1581  IF ( useblas ) THEN
1582  IF(kpdbg) WRITE(ofu,*) 'BLAS DGEMM only (not using PBLAS)'; CALL fl(ofu)
1583  CALL dgemm( 'N', 'N', m, m, m, alpha, a, m, b, m, beta, c, m )
1584  ELSE
1585 #if defined(MPI_OPT)
1586  CALL bsystemclock(pstats%mm%t1)
1587  nrows = m; ncols = m
1588  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1589  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1590  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1591  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1592  subncols = numroc( ncols, bszc, pj, 0, pnc )
1593 
1594  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM allocating subABC'; CALL fl(ofu)
1595  ALLOCATE( suba(1:subnrows*subncols) )
1596  ALLOCATE( subb(1:subnrows*subncols) )
1597  ALLOCATE( subc(1:subnrows*subncols) )
1598  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM allocated subABC'; CALL fl(ofu)
1599 
1600  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM desciniting subABC'; CALL fl(ofu)
1601  ctxt = blacs%levelcontext
1602  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1603  lldb = llda; lldc = llda
1604  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1605  CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1606  CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1607  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM desciniting subABC'; CALL fl(ofu)
1608 
1609  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending OP_DGEMM'; CALL fl(ofu)
1610  CALL masterbcastnextop( op_dgemm )
1611 
1612  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending A'; CALL fl(ofu)
1613  CALL bsystemclock(pstats%mma%t1)
1614  CALL mastersendmatrix( a, nrows, ncols, suba, subnrows, subncols )
1615  CALL bsystemclock(pstats%mma%t2)
1616  CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1617 
1618  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending B'; CALL fl(ofu)
1619  CALL bsystemclock(pstats%mmb%t1)
1620  CALL mastersendmatrix( b, nrows, ncols, subb, subnrows, subncols )
1621  CALL bsystemclock(pstats%mmb%t2)
1622  CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1623 
1624  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending C'; CALL fl(ofu)
1625  CALL bsystemclock(pstats%mmc%t1)
1626  CALL mastersendmatrix( c, nrows, ncols, subc, subnrows, subncols )
1627  CALL bsystemclock(pstats%mmc%t2)
1628  CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1629 
1630  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending alpha', alpha; CALL fl(ofu)
1631  CALL bsystemclock(pstats%mmalpha%t1)
1632  CALL masterbcastvalue( alpha )
1633  CALL bsystemclock(pstats%mmalpha%t2)
1634  CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1635 
1636  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending beta', beta; CALL fl(ofu)
1637  CALL bsystemclock(pstats%mmbeta%t1)
1638  CALL masterbcastvalue( beta )
1639  CALL bsystemclock(pstats%mmbeta%t2)
1640  CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1641 
1642  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM invoking PDGEMM'; CALL fl(ofu)
1643  CALL bsystemclock(pstats%comp%t1)
1644  CALL pdgemm( 'N', 'N', m, m, m, alpha, &
1645  suba, 1, 1, desca, &
1646  subb, 1, 1, descb, &
1647  beta, &
1648  subc, 1, 1, descc )
1649  CALL bsystemclock(pstats%comp%t2)
1650  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1651  CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1652  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM done PDGEMM'; CALL fl(ofu)
1653 
1654  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM receiving slave matrices'; CALL fl(ofu)
1655  CALL bsystemclock(pstats%mmrc%t1)
1656  CALL masterrecvmatrix( c, nrows, ncols, subc, subnrows, subncols )
1657  CALL bsystemclock(pstats%mmrc%t2)
1658  CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1659  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM received slave matrices'; CALL fl(ofu)
1660 
1661  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM deallocating subABC'; CALL fl(ofu)
1662  DEALLOCATE( suba )
1663  DEALLOCATE( subb )
1664  DEALLOCATE( subc )
1665  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM deallocated subABC'; CALL fl(ofu)
1666  CALL bsystemclock(pstats%mm%t2)
1667  CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1668 #endif
1669  END IF
1670 
1671  IF(kpdbg) WRITE(ofu,*) 'MasterGEMM done'; CALL fl(ofu)
1672 END SUBROUTINE plbdgemm
1673 
1674 !-------------------------------------------------------------------------------
1678 !-------------------------------------------------------------------------------
1679 SUBROUTINE slavedgemm()
1680  !----------------------------------------------
1681  ! Local variables
1682  !----------------------------------------------
1683  REAL(dp), ALLOCATABLE :: subA(:), subB(:), subC(:)
1684  REAL(dp) :: alpha, beta
1685  INTEGER :: descA(9), descB(9), descC(9)
1686  INTEGER :: lldA, lldB, lldC
1687  INTEGER :: nrows, ncols, subnrows, subncols
1688  INTEGER :: bszr, bszc, pnr, pnc
1689  INTEGER :: pi, pj
1690  INTEGER :: ctxt, info
1691 #if defined(MPI_OPT)
1692  INTEGER, EXTERNAL :: NUMROC
1693 #endif
1694 
1695  CALL bsystemclock(pstats%mm%t1)
1696 
1697 #if defined(MPI_OPT)
1698  nrows = m; ncols = m
1699  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1700  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1701  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1702 
1703  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1704  subncols = numroc( ncols, bszc, pj, 0, pnc )
1705 
1706  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM allocating subABC'; CALL fl(ofu)
1707  ALLOCATE( suba(1:subnrows*subncols) )
1708  ALLOCATE( subb(1:subnrows*subncols) )
1709  ALLOCATE( subc(1:subnrows*subncols) )
1710  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM allocated subABC'; CALL fl(ofu)
1711 
1712  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM desciniting subABC'; CALL fl(ofu)
1713  ctxt = blacs%levelcontext
1714  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1715  lldb = llda; lldc = llda
1716  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1717  CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1718  CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1719  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM desciniting subABC'; CALL fl(ofu)
1720 
1721  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving A'; CALL fl(ofu)
1722  CALL bsystemclock(pstats%mma%t1)
1723  CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1724  CALL bsystemclock(pstats%mma%t2)
1725  CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1726 
1727  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving B'; CALL fl(ofu)
1728  CALL bsystemclock(pstats%mmb%t1)
1729  CALL slavereceivematrix( nrows, ncols, subb, subnrows, subncols )
1730  CALL bsystemclock(pstats%mmb%t2)
1731  CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1732 
1733  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving C'; CALL fl(ofu)
1734  CALL bsystemclock(pstats%mmc%t1)
1735  CALL slavereceivematrix( nrows, ncols, subc, subnrows, subncols )
1736  CALL bsystemclock(pstats%mmc%t2)
1737  CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1738 
1739  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving alpha'; CALL fl(ofu)
1740  CALL bsystemclock(pstats%mmalpha%t1)
1741  CALL slavereceivevalue( alpha )
1742  CALL bsystemclock(pstats%mmalpha%t2)
1743  CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1744 
1745  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving beta'; CALL fl(ofu)
1746  CALL bsystemclock(pstats%mmbeta%t1)
1747  CALL slavereceivevalue( beta )
1748  CALL bsystemclock(pstats%mmbeta%t2)
1749  CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1750 
1751  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM invoking PDGEMM'; CALL fl(ofu)
1752  CALL bsystemclock(pstats%comp%t1)
1753  CALL pdgemm( 'N', 'N', m, m, m, alpha, &
1754  suba, 1, 1, desca, &
1755  subb, 1, 1, descb, &
1756  beta, &
1757  subc, 1, 1, descc )
1758  CALL bsystemclock(pstats%comp%t2)
1759  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1760  CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1761  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM done PDGEMM'; CALL fl(ofu)
1762 
1763  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM sending result matrix to master'; CALL fl(ofu)
1764  CALL bsystemclock(pstats%mmrc%t1)
1765  CALL slavesendmatrix( nrows, ncols, subc, subnrows, subncols )
1766  CALL bsystemclock(pstats%mmrc%t2)
1767  CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1768  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM sent result matrix to master'; CALL fl(ofu)
1769 
1770  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM deallocating subABC'; CALL fl(ofu)
1771  DEALLOCATE( suba )
1772  DEALLOCATE( subb )
1773  DEALLOCATE( subc )
1774  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM deallocated subABC'; CALL fl(ofu)
1775 #endif
1776 
1777  CALL bsystemclock(pstats%mm%t2)
1778  CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1779 
1780 END SUBROUTINE slavedgemm
1781 
1782 !-------------------------------------------------------------------------------
1786 !-------------------------------------------------------------------------------
1787 SUBROUTINE plbdgemv( alpha, A, x, beta, y )
1788  !----------------------------------------------
1789  ! Formal arguments
1790  !----------------------------------------------
1791  REAL(dp), INTENT(IN) :: alpha, beta
1792  REAL(dp), INTENT(IN) :: A(:,:)
1793  REAL(dp), INTENT(IN) :: x(:)
1794  REAL(dp), INTENT(INOUT) :: y(:)
1795 
1796  !----------------------------------------------
1797  !Just do this locally (not worth asking slaves' help)
1798  CALL dgemv( 'N', m, m, alpha, a, m, x, 1, beta, y, 1 )
1799 END SUBROUTINE plbdgemv
1800 
1801 !-------------------------------------------------------------------------------
1805 !-------------------------------------------------------------------------------
1806 SUBROUTINE plbdgetrf( A, piv, info )
1807  !----------------------------------------------
1808  ! Formal arguments
1809  !----------------------------------------------
1810  REAL(dp), INTENT(INOUT) :: A(:,:)
1811  INTEGER, INTENT(OUT) :: piv(:)
1812  INTEGER, INTENT(INOUT) :: info
1813  !----------------------------------------------
1814  ! Local variables
1815  !----------------------------------------------
1816  REAL(dp), ALLOCATABLE :: subA(:)
1817  INTEGER, ALLOCATABLE :: subpiv(:)
1818  INTEGER :: descA(9)
1819  INTEGER :: lldA
1820  INTEGER :: nrows, ncols, subnrows, subncols
1821  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1822  INTEGER :: ctxt
1823  LOGICAL :: useblas
1824 #if defined(MPI_OPT)
1825  INTEGER, EXTERNAL :: NUMROC
1826 #endif
1827 
1828  IF(kpdbg) WRITE(ofu,*) 'MasterGETRF started'; CALL fl(ofu)
1829 
1830  !----------------------------------------------
1831  useblas = ( doblasonly ) .OR. &
1832  ( pblas%msmap%nslaves .EQ. 1) .OR. &
1833  ( m .LE. blacs%pgrid%blockszrows ) .OR. &
1834  ( m .LE. blacs%pgrid%blockszcols )
1835 
1836  !-----------------------------------------------------------------------------
1837  IF ( useblas ) THEN
1838  !IF(KPDBG) WRITE(OFU,*) 'BLAS DGETRF only (not using PBLAS)' ; CALL FL(OFU)
1839  IF(kpdbg) WRITE(ofu,*) 'BLAS DGETRF only (not using PBLAS) with M=',m ; CALL fl(ofu)
1840  CALL dgetrf( m, m, a, m, piv, info )
1841  ELSE
1842 #if defined(MPI_OPT)
1843  CALL bsystemclock(pstats%trf%t1)
1844  nrows = m; ncols = m
1845  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1846  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1847  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1848  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1849  subncols = numroc( ncols, bszc, pj, 0, pnc )
1850 
1851  ctxt = blacs%levelcontext
1852  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1853  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1854 
1855  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF allocating subAPiv'; CALL fl(ofu)
1856  ALLOCATE( suba(1:subnrows*subncols) )
1857  ALLOCATE( subpiv(1:subnrows+bszr) )
1858  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF allocated subAPiv'; CALL fl(ofu)
1859 
1860  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF sending OP_GETRF'; CALL fl(ofu)
1861  CALL masterbcastnextop( op_dgetrf )
1862  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF sending A'; CALL fl(ofu)
1863  CALL mastersendmatrix( a, nrows, ncols, suba, subnrows, subncols )
1864  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF invoking PDGETRF'; CALL fl(ofu)
1865  CALL bsystemclock(pstats%comp%t1)
1866  CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1867  CALL bsystemclock(pstats%comp%t2)
1868  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1869  CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1870  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF done PDGETRF'; CALL fl(ofu)
1871 
1872  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF receiving slave submatrices'; CALL fl(ofu)
1873  CALL masterrecvmatrix( a, nrows, ncols, suba, subnrows, subncols )
1874  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF received slave submatrices'; CALL fl(ofu)
1875  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF receiving slave vectors'; CALL fl(ofu)
1876  CALL masterrecvvector( piv, nrows, subpiv, subnrows )
1877  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF received slave vectors'; CALL fl(ofu)
1878 
1879  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF deallocating subAPiv'; CALL fl(ofu)
1880  DEALLOCATE( subpiv )
1881  DEALLOCATE( suba )
1882  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF deallocated subAPiv'; CALL fl(ofu)
1883  CALL bsystemclock(pstats%trf%t2)
1884  CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1885 #endif
1886  END IF
1887 
1888  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF done'; CALL fl(ofu)
1889 END SUBROUTINE plbdgetrf
1890 
1891 !-------------------------------------------------------------------------------
1895 !-------------------------------------------------------------------------------
1896 SUBROUTINE slavedgetrf()
1897  !----------------------------------------------
1898  ! Local variables
1899  !----------------------------------------------
1900  REAL(dp), ALLOCATABLE :: subA(:)
1901  INTEGER, ALLOCATABLE :: subpiv(:)
1902  INTEGER :: descA(9)
1903  INTEGER :: lldA
1904  INTEGER :: nrows, ncols, subnrows, subncols
1905  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1906  INTEGER :: ctxt, info
1907 #if defined(MPI_OPT)
1908  INTEGER, EXTERNAL :: NUMROC
1909 #endif
1910 
1911  CALL bsystemclock(pstats%trf%t1)
1912 
1913 #if defined(MPI_OPT)
1914  nrows = m; ncols = m
1915  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1916  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1917  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1918  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1919  subncols = numroc( ncols, bszc, pj, 0, pnc )
1920 
1921  ctxt = blacs%levelcontext
1922  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1923  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1924 
1925  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF allocating subAPiv'; CALL fl(ofu)
1926  ALLOCATE( suba(1:subnrows*subncols) )
1927  ALLOCATE( subpiv(1:subnrows+bszr) )
1928  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF allocated subAPiv'; CALL fl(ofu)
1929 
1930  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF receiving A submatrix'; CALL fl(ofu)
1931  CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1932  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF received A submatrix'; CALL fl(ofu)
1933 
1934  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF invoking PDGETRF'; CALL fl(ofu)
1935  CALL bsystemclock(pstats%comp%t1)
1936  CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1937  CALL bsystemclock(pstats%comp%t2)
1938  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1939  CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1940  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF done PDGETRF'; CALL fl(ofu)
1941 
1942  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sending result matrix to master'; CALL fl(ofu)
1943  CALL slavesendmatrix( nrows, ncols, suba, subnrows, subncols )
1944  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sent result matrix to master'; CALL fl(ofu)
1945  CALL slavesendvector( nrows, subpiv, subnrows )
1946  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sent result vector to master'; CALL fl(ofu)
1947 
1948  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF deallocating subAPiv'; CALL fl(ofu)
1949  DEALLOCATE( subpiv )
1950  DEALLOCATE( suba )
1951  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF deallocated subAPiv'; CALL fl(ofu)
1952 
1953 #endif
1954 
1955  CALL bsystemclock(pstats%trf%t2)
1956  CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1957 
1958 END SUBROUTINE slavedgetrf
1959 
1960 !-------------------------------------------------------------------------------
1964 !-------------------------------------------------------------------------------
1965 SUBROUTINE plbdgetrs( nrhs, A, piv, B, info )
1966  !----------------------------------------------
1967  ! Formal arguments
1968  !----------------------------------------------
1969  INTEGER, INTENT(IN) :: nrhs
1970  REAL(dp), INTENT(IN) :: A(:,:)
1971  INTEGER, INTENT(IN) :: piv(:)
1972  REAL(dp), INTENT(INOUT) :: B(:,:)
1973  INTEGER :: info
1974 
1975  !----------------------------------------------
1976  !Just do this locally (not worth asking slaves' help)
1977  IF(kpdbg) WRITE(ofu,*) 'BLAS DGETRS only (not using PBLAS)'; CALL fl(ofu)
1978  CALL dgetrs( 'N', m, nrhs, a, m, piv, b, m, info )
1979 END SUBROUTINE plbdgetrs
1980 
1981 !-------------------------------------------------------------------------------
1985 !-------------------------------------------------------------------------------
1986 SUBROUTINE slavedgetrs()
1988  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRS not implemented'; CALL fl(ofu)
1989  stop !To be completed
1990 
1991 END SUBROUTINE slavedgetrs
1992 !-------------------------------------------------------------------------------
1993 !-------------------------------------------------------------------------------
1994 
1995 !-------------------------------------------------------------------------------
1999 !-------------------------------------------------------------------------------
2000 SUBROUTINE initialize( do_mpiinit, inN, inM )
2001  !-----------------------------------------------------------------------------
2002  ! Formal arguments
2003  !-----------------------------------------------------------------------------
2004  LOGICAL, INTENT(IN) :: do_mpiinit
2005  INTEGER, INTENT(IN) :: inN
2006  INTEGER, INTENT(IN) :: inM
2007 
2008  !-----------------------------------------------------------------------------
2009  ! Local variables
2010  !-----------------------------------------------------------------------------
2011  INTEGER :: nrowsperrank
2012  INTEGER :: nspillrows
2013  INTEGER :: mpierr
2014  INTEGER :: globrow
2015  INTEGER :: globrowoff
2016  REAL :: logNbase2
2017 
2018  n = inn
2019  m = inm
2020 
2021  !-----------------------------------------------------------------------------
2022  use_mpiwtime = .false.
2023 #if defined(MPI_OPT)
2024  IF ( do_mpiinit ) THEN
2025  CALL mpi_init( mpierr )
2026  END IF
2027  CALL mpi_comm_rank( ns_comm, rank, mpierr )
2028  CALL mpi_comm_size( ns_comm, nranks, mpierr)
2029  use_mpiwtime = .true.
2030 #endif
2031 
2032  !-----------------------------------------------------------------------------
2033  ! Set output file unit, so we get output of each rank in its own file
2034  ! Extremely useful for debugging -- makes it so much easier to see progress!
2035  !-----------------------------------------------------------------------------
2036  ofu = rank+1000
2037  p = nranks
2038  IF ( p .GT. n ) THEN
2039  IF(kpdbg) WRITE(ofu,*) 'Detected extra ', p-n, ' ranks'; CALL fl(ofu)
2040  p = n
2041  END IF
2042 
2043  !-----------------------------------------------------------------------------
2044  !Set problem output file
2045  !-----------------------------------------------------------------------------
2046  pfu = ofu+nranks
2047 
2048  !-----------------------------------------------------------------------------
2049  ! Check to see if debugging output is to be written : SKS on Nov 9, 2011
2050  !-----------------------------------------------------------------------------
2051  kpdbg=.false.; kenvvar='KPDBG'
2052  CALL getenv(kenvvar,kenvval)
2053  IF (kenvval.NE.'') THEN
2054  IF (kenvval.EQ.'TRUE') THEN
2055  kpdbg=.true.
2056  ELSE
2057  kpdbg=.false.
2058  END IF
2059  END IF
2060 
2061  !-----------------------------------------------------------------------------
2062  IF(kpdbg) WRITE(ofu,*) 'Rank ', rank, ' NRanks ', nranks; CALL fl(ofu)
2063  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
2064  IF(kpdbg) WRITE(ofu,*) '------ Initialization start ------'; CALL fl(ofu)
2065  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
2066 
2067  !-----------------------------------------------------------------------------
2068  IF ( n .LT. 1 ) THEN
2069  IF(kpdbg) WRITE(ofu,*) 'Bad N', n; CALL fl(ofu)
2070  stop
2071  END IF
2072 
2073  !-----------------------------------------------------------------------------
2074  !Determine where we start and end in row list at level 1 on this processor
2075  !This processor's global row numbers span [startglobrow,endglobrow] inclusive
2076  !-----------------------------------------------------------------------------
2077  nrowsperrank = n/p !Number of rows evenly split across processors
2078  nspillrows = mod(n, p) !Some left over after even split
2079  IF ( rank .LT. nspillrows ) THEN !The first few ranks get one extra row each
2080  startglobrow = rank*nrowsperrank + rank + 1
2081  endglobrow = startglobrow + nrowsperrank
2082  ELSE IF ( rank .LT. p ) THEN !The other active ranks
2083  startglobrow = rank*nrowsperrank + nspillrows + 1
2084  endglobrow = startglobrow + nrowsperrank - 1
2085  ELSE !The rest (unnecessary, inactive) ranks
2086  startglobrow = -1
2087  endglobrow = -2
2088  END IF
2089 
2090  !-----------------------------------------------------------------------------
2091  !Allocate array to store unmodified diagonal elements: SKS
2092  !-----------------------------------------------------------------------------
2093  IF (.NOT.ALLOCATED(origdiagelement)) ALLOCATE (origdiagelement((endglobrow-startglobrow+1)*m))
2094 
2095  !-----------------------------------------------------------------------------
2096  !No. of levels is 1+ceil(lg(N)); e.g., 6 for N=23
2097  !-----------------------------------------------------------------------------
2098  lognbase2 = log(real(n))/log(2.0)
2099  nlevels = 1 + int(lognbase2)
2100  IF ( real(int(lognbase2)) .NE. lognbase2 ) THEN
2101  nlevels = nlevels + 1
2102  END IF
2103  IF(kpdbg) WRITE(ofu,*) 'NLEVELS=', nlevels; CALL fl(ofu)
2104 
2105  !---------------------------------------------------------------------------
2106  matdirtied = .true.
2107  rhsdirtied = .true.
2108 
2109  !---------------------------------------------------------------------------
2110  !Not sure if barriers are needed between levels, but they can be optionally
2111  !turned on by setting usebarriers to TRUE
2112  !---------------------------------------------------------------------------
2113  usebarriers = .false.
2114 
2115  !---------------------------------------------------------------------------
2116  writeproblemfile = .false.
2117  writesolution = .false.
2118 
2119  !---------------------------------------------------------------------------
2120  membytes = 0
2121  dpsz = 8 !8 bytes for double precision
2122  intsz = 4 !4 bytes for a single integer
2123  ptrsz = 8 !Assuming 64-bit addresses in the worst case
2124  one = 1
2125  zero = 0
2126 
2127  tottime = 0
2128  totcommtime = 0
2129  totinvtime = 0
2130  totinvcount = 0
2131  totmatmultime = 0
2132  totmatmulcount = 0
2133  totmatsoltime = 0
2134  totmatsolcount = 0
2135  CALL bclockinit()
2136 
2137  !---------------------------------------------------------------------------
2138  !The +2 in nlocrows+2 is for storing incoming values from neighbors, if any
2139  !The zero'th element stores the incoming element from top neighbor, the
2140  !last element stores the incoming element from the bottom neighbor.
2141  !---------------------------------------------------------------------------
2142  ! SKS
2143  IF (.NOT.ALLOCATED(lelement)) ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
2144  !ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
2145  CALL chargememory( nlevels*(endglobrow-startglobrow+3)*5*ptrsz )
2146 
2147  ! SKS
2148  IF (.NOT.ALLOCATED(selement)) ALLOCATE( selement(n) ) !Have a place for all x; only some will be allocated
2149  ! ALLOCATE( selement(N) ) !Have a place for all x; only some will be allocated
2150  CALL chargememory( n*ptrsz )
2151 
2152  !---------------------------------------------------------------------------
2153  !Allocate (L,D,U,b,pivot) at level 1
2154  !---------------------------------------------------------------------------
2155  DO globrow = startglobrow, endglobrow, 1
2156  globrowoff = globrow-startglobrow+1
2157  !SKS start
2158  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%L)) ALLOCATE( lelement(1, globrowoff)%L(m,m) )
2159  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%D)) ALLOCATE( lelement(1, globrowoff)%D(m,m) )
2160  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%U)) ALLOCATE( lelement(1, globrowoff)%U(m,m) )
2161  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%b)) ALLOCATE( lelement(1, globrowoff)%b(m,1) )
2162  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%pivot)) ALLOCATE( lelement(1, globrowoff)%pivot(m) )
2163  !SKS end
2164  !ALLOCATE( lelement(1, globrowoff)%L(M,M) )
2165  !ALLOCATE( lelement(1, globrowoff)%D(M,M) )
2166  !ALLOCATE( lelement(1, globrowoff)%U(M,M) )
2167  !ALLOCATE( lelement(1, globrowoff)%b(M,1) )
2168  !ALLOCATE( lelement(1, globrowoff)%pivot(M) )
2169  CALL chargememory( (3*m*m+m)*dpsz + m*intsz )
2170  lelement(1, globrowoff)%L = 0
2171  lelement(1, globrowoff)%D = 0
2172  lelement(1, globrowoff)%U = 0
2173  lelement(1, globrowoff)%b = 0
2174  lelement(1, globrowoff)%pivot = 0
2175  END DO
2176 
2177  !---------------------------------------------------------------------------
2178  !Allocate (L,D,U,b) for saving the original problem
2179  !---------------------------------------------------------------------------
2180  !SKS
2181  IF (.NOT.ALLOCATED(orig)) ALLOCATE( orig(endglobrow-startglobrow+1) )
2182  ! ALLOCATE( orig(endglobrow-startglobrow+1) )
2183  DO globrow = startglobrow, endglobrow, 1
2184  globrowoff = globrow-startglobrow+1
2185  !SKS start
2186  IF(.NOT.ALLOCATED(orig(globrowoff)%L)) ALLOCATE( orig(globrowoff)%L(m,m) )
2187  IF(.NOT.ALLOCATED(orig(globrowoff)%D)) ALLOCATE( orig(globrowoff)%D(m,m) )
2188  IF(.NOT.ALLOCATED(orig(globrowoff)%U)) ALLOCATE( orig(globrowoff)%U(m,m) )
2189  IF(.NOT.ALLOCATED(orig(globrowoff)%b)) ALLOCATE( orig(globrowoff)%b(m,1) )
2190  !SKS end
2191  !ALLOCATE( orig(globrowoff)%L(M,M) )
2192  !ALLOCATE( orig(globrowoff)%D(M,M) )
2193  !ALLOCATE( orig(globrowoff)%U(M,M) )
2194  !ALLOCATE( orig(globrowoff)%b(M,1) )
2195  !-- DONT CHARGE MEMORY COST FOR THIS!!! --
2196  orig(globrowoff)%L = 0
2197  orig(globrowoff)%D = 0
2198  orig(globrowoff)%U = 0
2199  orig(globrowoff)%b = 0
2200  END DO
2201 
2202  !-----------------------------------------------------------------------------
2203  CALL plbinitialize()
2204 
2205  !-----------------------------------------------------------------------------
2206  IF(kpdbg) WRITE(ofu,*) ' P=',p,' N=',n,' M=',m; CALL fl(ofu)
2207  IF(kpdbg) WRITE(ofu,*) 'SROW=',startglobrow,' EROW=', endglobrow; CALL fl(ofu)
2208  IF(kpdbg) WRITE(ofu,*) '------ Initialization end ------'; CALL fl(ofu)
2209  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
2210 END SUBROUTINE initialize
2211 
2212 !-------------------------------------------------------------------------------
2216 !-------------------------------------------------------------------------------
2217 SUBROUTINE setmatrixrow( globrow, L, D, U )
2218  !-----------------------------------------------------------------------------
2219  ! Formal arguments
2220  !-----------------------------------------------------------------------------
2221  INTEGER, INTENT(IN) :: globrow
2222  REAL(dp), INTENT(IN) :: L(:,:), D(:,:), U(:,:)
2223  !-----------------------------------------------------------------------------
2224  ! Local variables
2225  !-----------------------------------------------------------------------------
2226  REAL(dp) :: val
2227  INTEGER :: i, j, globrowoff
2228 
2229  !-------------------------------------------
2230  ! Sanity checks on globrow
2231  !-------------------------------------------
2232  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2233  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRow: Bad input globrow ',globrow; CALL fl(ofu)
2234  stop
2235  END IF
2236  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2237  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRow: Non-local globrow ',globrow; CALL fl(ofu)
2238  stop
2239  END IF
2240 
2241  globrowoff = globrow-startglobrow+1
2242 
2243  !-------------------------------------------
2244  ! Copy given blocks into allocated matrix
2245  !-------------------------------------------
2246  DO i = 1, m
2247  DO j = 1, m
2248  IF ( globrow .EQ. 1 ) THEN
2249  val = 0.0
2250  ELSE
2251  val = l(i,j)
2252  END IF
2253  lelement(1, globrowoff)%L(i,j) = val
2254 
2255  val = d(i,j)
2256  lelement(1, globrowoff)%D(i,j) = val
2257 
2258  IF ( globrow .EQ. n ) THEN
2259  val = 0.0
2260  ELSE
2261  val = u(i,j)
2262  END IF
2263  lelement(1, globrowoff)%U(i,j) = val
2264  END DO
2265  END DO
2266 
2267  !-------------------------------------------
2268  !Save a copy of this original problem
2269  !-------------------------------------------
2270  orig(globrowoff)%L = lelement(1,globrowoff)%L
2271  orig(globrowoff)%D = lelement(1,globrowoff)%D
2272  orig(globrowoff)%U = lelement(1,globrowoff)%U
2273 
2274  !-------------------------------------------
2275  matdirtied = .true.
2276 
2277 END SUBROUTINE setmatrixrow
2278 
2279 !-------------------------------------------------------------------------------
2283 !-------------------------------------------------------------------------------
2284 SUBROUTINE setmatrixrowl( globrow, L )
2285  !-----------------------------------------------------------------------------
2286  ! Formal arguments
2287  !-----------------------------------------------------------------------------
2288  INTEGER, INTENT(IN) :: globrow
2289  REAL(dp), INTENT(IN) :: L(:,:)
2290  !-----------------------------------------------------------------------------
2291  ! Local variables
2292  !-----------------------------------------------------------------------------
2293  REAL(dp) :: val
2294  INTEGER :: i, j, globrowoff
2295 
2296  !-------------------------------------------
2297  ! Sanity checks on globrow
2298  !-------------------------------------------
2299  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2300  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowL: Bad input globrow ',globrow; CALL fl(ofu)
2301  stop
2302  END IF
2303  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2304  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowL: Non-local globrow ',globrow; CALL fl(ofu)
2305  stop
2306  END IF
2307 
2308  globrowoff = globrow-startglobrow+1
2309 
2310  !-------------------------------------------
2311  ! Copy given L block into allocated matrix
2312  !-------------------------------------------
2313  DO i = 1, m
2314  DO j = 1, m
2315  IF ( globrow .EQ. 1 ) THEN
2316  val = 0.0
2317  ELSE
2318  val = l(i,j)
2319  END IF
2320  lelement(1, globrowoff)%L(i,j) = val
2321  END DO
2322  END DO
2323 
2324  !-------------------------------------------
2325  !Save a copy of this original problem
2326  !-------------------------------------------
2327  orig(globrowoff)%L = lelement(1,globrowoff)%L
2328 
2329  !-------------------------------------------
2330  matdirtied = .true.
2331 
2332 END SUBROUTINE setmatrixrowl
2333 
2334 !-------------------------------------------------------------------------------
2338 !-------------------------------------------------------------------------------
2339 SUBROUTINE setmatrixrowd( globrow, D )
2340  !-----------------------------------------------------------------------------
2341  ! Formal arguments
2342  !-----------------------------------------------------------------------------
2343  INTEGER, INTENT(IN) :: globrow
2344  REAL(dp), INTENT(IN) :: D(:,:)
2345  !-----------------------------------------------------------------------------
2346  ! Local variables
2347  !-----------------------------------------------------------------------------
2348  REAL(dp) :: val
2349  INTEGER :: i, j, globrowoff
2350 
2351  !-------------------------------------------
2352  ! Sanity checks on globrow
2353  !-------------------------------------------
2354  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2355  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowD: Bad input globrow ',globrow; CALL fl(ofu)
2356  stop
2357  END IF
2358  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2359  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowD: Non-local globrow ',globrow; CALL fl(ofu)
2360  stop
2361  END IF
2362 
2363  globrowoff = globrow-startglobrow+1
2364 
2365  !-------------------------------------------
2366  ! Copy given D block into allocated matrix
2367  !-------------------------------------------
2368  DO i = 1, m
2369  DO j = 1, m
2370  val = d(i,j)
2371  lelement(1, globrowoff)%D(i,j) = val
2372  END DO
2373  END DO
2374 
2375  !-------------------------------------------
2376  !Save a copy of this original problem
2377  !-------------------------------------------
2378  orig(globrowoff)%D = lelement(1,globrowoff)%D
2379 
2380  !-------------------------------------------
2381  matdirtied = .true.
2382 
2383 END SUBROUTINE setmatrixrowd
2384 
2385 !-------------------------------------------------------------------------------
2389 !-------------------------------------------------------------------------------
2390 SUBROUTINE setmatrixrowu( globrow, U )
2391  !-----------------------------------------------------------------------------
2392  ! Formal arguments
2393  !-----------------------------------------------------------------------------
2394  INTEGER, INTENT(IN) :: globrow
2395  REAL(dp), INTENT(IN) :: U(:,:)
2396  !-----------------------------------------------------------------------------
2397  ! Local variables
2398  !-----------------------------------------------------------------------------
2399  REAL(dp) :: val
2400  INTEGER :: i, j, globrowoff
2401 
2402  !-------------------------------------------
2403  ! Sanity checks on globrow
2404  !-------------------------------------------
2405  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2406  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowU: Bad input globrow ',globrow; CALL fl(ofu)
2407  stop
2408  END IF
2409  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2410  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowU: Non-local globrow ',globrow; CALL fl(ofu)
2411  stop
2412  END IF
2413 
2414  globrowoff = globrow-startglobrow+1
2415 
2416  !-------------------------------------------
2417  ! Copy given U block into allocated matrix
2418  !-------------------------------------------
2419  DO i = 1, m
2420  DO j = 1, m
2421  IF ( globrow .EQ. n ) THEN
2422  val = 0.0
2423  ELSE
2424  val = u(i,j)
2425  END IF
2426  lelement(1, globrowoff)%U(i,j) = val
2427  END DO
2428  END DO
2429 
2430  !-------------------------------------------
2431  !Save a copy of this original problem
2432  !-------------------------------------------
2433  orig(globrowoff)%U = lelement(1,globrowoff)%U
2434 
2435  !-------------------------------------------
2436  matdirtied = .true.
2437 
2438 END SUBROUTINE setmatrixrowu
2439 
2440 !-------------------------------------------------------------------------------
2444 !-------------------------------------------------------------------------------
2445 SUBROUTINE setmatrixrowcoll( globrow, Lj, j )
2446  !-----------------------------------------------------------------------------
2447  ! Formal arguments
2448  !-----------------------------------------------------------------------------
2449  INTEGER, INTENT(IN) :: globrow
2450  REAL(dp), INTENT(IN) :: Lj(:)
2451  INTEGER, INTENT(IN) :: j
2452  !-----------------------------------------------------------------------------
2453  ! Local variables
2454  !-----------------------------------------------------------------------------
2455  REAL(dp) :: val
2456  INTEGER :: i, globrowoff
2457 
2458  !-------------------------------------------
2459  ! Sanity checks on globrow
2460  !-------------------------------------------
2461  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2462  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Bad input globrow ',globrow; CALL fl(ofu)
2463  stop "L 1"
2464  END IF
2465  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2466  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Non-local globrow ',globrow; CALL fl(ofu)
2467  stop "L 2"
2468  END IF
2469  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2470  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2471  stop "L 3"
2472  END IF
2473 
2474  globrowoff = globrow-startglobrow+1
2475 
2476  !-------------------------------------------
2477  ! Copy given L column into allocated matrix
2478  !-------------------------------------------
2479  DO i = 1, m
2480  IF ( globrow .EQ. 1 ) THEN
2481  val = 0.0
2482  ELSE
2483  val = lj(i)
2484  END IF
2485  lelement(1, globrowoff)%L(i,j) = val
2486  END DO
2487 
2488  !-------------------------------------------
2489  !Save a copy of this original problem
2490  !-------------------------------------------
2491  orig(globrowoff)%L(:,j) = lelement(1,globrowoff)%L(:,j)
2492 
2493  !-------------------------------------------
2494  matdirtied = .true.
2495 
2496 END SUBROUTINE setmatrixrowcoll
2497 
2498 !-------------------------------------------------------------------------------
2502 !-------------------------------------------------------------------------------
2503 SUBROUTINE setmatrixrowcold( globrow, Dj, j )
2504  !-----------------------------------------------------------------------------
2505  ! Formal arguments
2506  !-----------------------------------------------------------------------------
2507  INTEGER, INTENT(IN) :: globrow
2508  REAL(dp), INTENT(IN) :: Dj(:)
2509  INTEGER, INTENT(IN) :: j
2510  !-----------------------------------------------------------------------------
2511  ! Local variables
2512  !-----------------------------------------------------------------------------
2513  REAL(dp) :: val
2514  INTEGER :: i, globrowoff, MPI_ERR
2515 
2516  !-------------------------------------------
2517  ! Sanity checks on globrow
2518  !-------------------------------------------
2519  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2520  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColD: Bad input globrow ',globrow; CALL fl(ofu)
2521  stop "D 1"
2522  END IF
2523 
2524  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2525  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColD: Non-local globrow ',globrow; CALL fl(ofu)
2526  stop "D 2"
2527  END IF
2528 
2529  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2530  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColD: Bad j column ',j; CALL fl(ofu)
2531  stop "D 3"
2532  END IF
2533 
2534  globrowoff = globrow-startglobrow+1
2535 
2536  !-------------------------------------------
2537  ! Copy given D column into allocated matrix
2538  !-------------------------------------------
2539  DO i = 1, m
2540  val = dj(i)
2541  lelement(1, globrowoff)%D(i,j) = val
2542  END DO
2543 
2544  !-------------------------------------------
2545  !Save a copy of this original problem
2546  !-------------------------------------------
2547  orig(globrowoff)%D(:,j) = lelement(1,globrowoff)%D(:,j)
2548 
2549  !-------------------------------------------
2550  matdirtied = .true.
2551 
2552 END SUBROUTINE setmatrixrowcold
2553 
2554 !-------------------------------------------------------------------------------
2558 !-------------------------------------------------------------------------------
2559 SUBROUTINE setmatrixrowcolu( globrow, Uj, j )
2560  !-----------------------------------------------------------------------------
2561  ! Formal arguments
2562  !-----------------------------------------------------------------------------
2563  INTEGER, INTENT(IN) :: globrow
2564  REAL(dp), INTENT(IN) :: Uj(:)
2565  INTEGER, INTENT(IN) :: j
2566  !-----------------------------------------------------------------------------
2567  ! Local variables
2568  !-----------------------------------------------------------------------------
2569  REAL(dp) :: val
2570  INTEGER :: i, globrowoff
2571 
2572  !-------------------------------------------
2573  ! Sanity checks on globrow
2574  !-------------------------------------------
2575  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2576  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColU: Bad input globrow ',globrow; CALL fl(ofu)
2577  stop "U 1"
2578  END IF
2579  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2580  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColU: Non-local globrow ',globrow; CALL fl(ofu)
2581  stop "U 2"
2582  END IF
2583  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2584  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2585  stop "U 3"
2586  END IF
2587 
2588  globrowoff = globrow-startglobrow+1
2589 
2590  !-------------------------------------------
2591  ! Copy given U column into allocated matrix
2592  !-------------------------------------------
2593  DO i = 1, m
2594  IF ( globrow .EQ. n ) THEN
2595  val = 0.0
2596  ELSE
2597  val = uj(i)
2598  END IF
2599  lelement(1, globrowoff)%U(i,j) = val
2600  END DO
2601 
2602  !-------------------------------------------
2603  !Save a copy of this original problem
2604  !-------------------------------------------
2605  orig(globrowoff)%U(:,j) = lelement(1,globrowoff)%U(:,j)
2606 
2607  !-------------------------------------------
2608  matdirtied = .true.
2609 
2610 END SUBROUTINE setmatrixrowcolu
2611 
2612 !-------------------------------------------------------------------------------
2616 !-------------------------------------------------------------------------------
2617 SUBROUTINE setmatrixrhs( globrow, b )
2618  !-----------------------------------------------------------------------------
2619  ! Formal arguments
2620  !-----------------------------------------------------------------------------
2621  INTEGER, INTENT(IN) :: globrow
2622  REAL(dp), INTENT(IN) :: b(:)
2623  !-----------------------------------------------------------------------------
2624  ! Local variables
2625  !-----------------------------------------------------------------------------
2626  REAL(dp) :: val
2627  INTEGER :: i, globrowoff
2628 
2629  !-------------------------------------------
2630  ! Sanity checks on globrow
2631  !-------------------------------------------
2632  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2633  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRHS: Bad input globrow ',globrow; CALL fl(ofu)
2634  stop
2635  END IF
2636  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2637  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRHS: Non-local globrow ',globrow; CALL fl(ofu)
2638  stop
2639  END IF
2640 
2641  globrowoff = globrow-startglobrow+1
2642 
2643  !-------------------------------------------
2644  ! Copy given values into allocated RHS
2645  !-------------------------------------------
2646  DO i = 1, m
2647  val = b(i)
2648  lelement(1, globrowoff)%b(i,1) = val
2649  END DO
2650 
2651  !-------------------------------------------
2652  !Save a copy of this original problem
2653  !-------------------------------------------
2654  orig(globrowoff)%b = lelement(1,globrowoff)%b
2655 
2656  !-------------------------------------------
2657  rhsdirtied = .true.
2658 
2659 END SUBROUTINE setmatrixrhs
2660 
2661 !-------------------------------------------------------------------------------
2665 !-------------------------------------------------------------------------------
2666 SUBROUTINE getmatrixrowcoll( globrow, Lj, j )
2667  !-----------------------------------------------------------------------------
2668  ! Formal arguments
2669  !-----------------------------------------------------------------------------
2670  INTEGER, INTENT(IN) :: globrow
2671  REAL(dp), INTENT(OUT) :: Lj(:)
2672  INTEGER, INTENT(IN) :: j
2673  !-----------------------------------------------------------------------------
2674  ! Local variables
2675  !-----------------------------------------------------------------------------
2676  REAL(dp) :: val
2677  INTEGER :: i, globrowoff
2678 
2679  !-------------------------------------------
2680  ! Sanity checks on globrow
2681  !-------------------------------------------
2682  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2683  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad input globrow ',globrow; CALL fl(ofu)
2684  stop
2685  END IF
2686  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2687  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Non-local globrow ',globrow; CALL fl(ofu)
2688  stop
2689  END IF
2690  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2691  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2692  stop
2693  END IF
2694 
2695  globrowoff = globrow-startglobrow+1
2696 
2697  !-------------------------------------------
2698  ! Copy L from matrix
2699  !-------------------------------------------
2700  DO i = 1, m
2701  IF ( globrow .EQ. 1 ) THEN
2702  val = 0.0
2703  ELSE
2704  val = lelement(1, globrowoff)%L(i,j)
2705  END IF
2706  lj(i) = val
2707  END DO
2708 
2709 END SUBROUTINE getmatrixrowcoll
2710 
2711 !-------------------------------------------------------------------------------
2715 !-------------------------------------------------------------------------------
2716 SUBROUTINE getmatrixrowcold( globrow, Dj, j )
2717  !-----------------------------------------------------------------------------
2718  ! Formal arguments
2719  !-----------------------------------------------------------------------------
2720  INTEGER, INTENT(IN) :: globrow
2721  REAL(dp), INTENT(OUT) :: Dj(:)
2722  INTEGER, INTENT(IN) :: j
2723  !-----------------------------------------------------------------------------
2724  ! Local variables
2725  !-----------------------------------------------------------------------------
2726  REAL(dp) :: val
2727  INTEGER :: i, globrowoff
2728 
2729  !-------------------------------------------
2730  ! Sanity checks on globrow
2731  !-------------------------------------------
2732  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2733  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Bad input globrow ',globrow; CALL fl(ofu)
2734  stop
2735  END IF
2736  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2737  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Non-local globrow ',globrow; CALL fl(ofu)
2738  stop
2739  END IF
2740  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2741  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Bad j column ',j; CALL fl(ofu)
2742  stop
2743  END IF
2744 
2745  globrowoff = globrow-startglobrow+1
2746 
2747  !-------------------------------------------
2748  ! Copy D from matrix
2749  !-------------------------------------------
2750  DO i = 1, m
2751  val = lelement(1, globrowoff)%D(i,j)
2752  dj(i) = val
2753  END DO
2754 
2755 END SUBROUTINE getmatrixrowcold
2756 
2757 !-------------------------------------------------------------------------------
2761 !-------------------------------------------------------------------------------
2762 SUBROUTINE getmatrixrowcolu( globrow, Uj, j )
2763  !-----------------------------------------------------------------------------
2764  ! Formal arguments
2765  !-----------------------------------------------------------------------------
2766  INTEGER, INTENT(IN) :: globrow
2767  REAL(dp), INTENT(OUT) :: Uj(:)
2768  INTEGER, INTENT(IN) :: j
2769  !-----------------------------------------------------------------------------
2770  ! Local variables
2771  !-----------------------------------------------------------------------------
2772  REAL(dp) :: val
2773  INTEGER :: i, globrowoff
2774 
2775  !-------------------------------------------
2776  ! Sanity checks on globrow
2777  !-------------------------------------------
2778  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2779  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColU: Bad input globrow ',globrow; CALL fl(ofu)
2780  stop
2781  END IF
2782  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2783  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColU: Non-local globrow ',globrow; CALL fl(ofu)
2784  stop
2785  END IF
2786  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2787  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2788  stop
2789  END IF
2790 
2791  globrowoff = globrow-startglobrow+1
2792 
2793  !-------------------------------------------
2794  ! Copy U from matrix
2795  !-------------------------------------------
2796  DO i = 1, m
2797  IF ( globrow .EQ. n ) THEN
2798  val = 0.0
2799  ELSE
2800  val = lelement(1, globrowoff)%U(i,j)
2801  END IF
2802  uj(i) = val
2803  END DO
2804 
2805 END SUBROUTINE getmatrixrowcolu
2806 
2807 !-------------------------------------------------------------------------------
2811 !-------------------------------------------------------------------------------
2812 SUBROUTINE getmatrixrhs( globrow, b )
2813  !-----------------------------------------------------------------------------
2814  ! Formal arguments
2815  !-----------------------------------------------------------------------------
2816  INTEGER, INTENT(IN) :: globrow
2817  REAL(dp), INTENT(OUT) :: b(:)
2818  !-----------------------------------------------------------------------------
2819  ! Local variables
2820  !-----------------------------------------------------------------------------
2821  REAL(dp) :: val
2822  INTEGER :: i, globrowoff
2823 
2824  !-------------------------------------------
2825  ! Sanity checks on globrow
2826  !-------------------------------------------
2827  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2828  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRHS: Bad input globrow ',globrow; CALL fl(ofu)
2829  stop
2830  END IF
2831  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2832  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRHS: Non-local globrow ',globrow; CALL fl(ofu)
2833  stop
2834  END IF
2835 
2836  globrowoff = globrow-startglobrow+1
2837 
2838  !-------------------------------------------
2839  ! Copy RHS
2840  !-------------------------------------------
2841  DO i = 1, m
2842  val = lelement(1, globrowoff)%b(i,1)
2843  b(i) = val
2844  END DO
2845 
2846 END SUBROUTINE getmatrixrhs
2847 
2848 !-------------------------------------------------------------------------------
2852 !-------------------------------------------------------------------------------
2853 SUBROUTINE getsolutionvector( globrow, x )
2854  !-----------------------------------------------------------------------------
2855  ! Formal arguments
2856  !-----------------------------------------------------------------------------
2857  INTEGER, INTENT(IN) :: globrow
2858  REAL(dp), INTENT(OUT) :: x(:)
2859  !-----------------------------------------------------------------------------
2860  ! Local variables
2861  !-----------------------------------------------------------------------------
2862  REAL(dp) :: val
2863  INTEGER :: i
2864 
2865  !-------------------------------------------
2866  ! Sanity checks on globrow
2867  !-------------------------------------------
2868  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2869  IF(kpdbg) WRITE(ofu,*) 'SetSolutionVector: Bad input globrow ',globrow; CALL fl(ofu)
2870  stop
2871  END IF
2872  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2873  IF(kpdbg) WRITE(ofu,*) 'SetSolutionVector: Non-local globrow ',globrow; CALL fl(ofu)
2874  stop
2875  END IF
2876 
2877  !-------------------------------------------
2878  ! Copy solution into given vector
2879  !-------------------------------------------
2880  DO i = 1, m
2881  val = selement(globrow)%x(i)
2882  x(i) = val
2883  END DO
2884 
2885 END SUBROUTINE getsolutionvector
2886 
2887 !-------------------------------------------------------------------------------
2891 !-------------------------------------------------------------------------------
2892 SUBROUTINE setidentitytestcase
2893  !-----------------------------------------------------------------------------
2894  ! Local variables
2895  !-----------------------------------------------------------------------------
2896  INTEGER :: i, j, globrow, globrowoff
2897 
2898  IF(kpdbg) WRITE(ofu,*) '------ Using Identity Test Case ------'; CALL fl(ofu)
2899 
2900  !-----------------------------------------
2901  !Initialize the arrays with identity
2902  !-----------------------------------------
2903  DO globrow = startglobrow, endglobrow, 1
2904  globrowoff = globrow-startglobrow+1
2905  DO i = 1, m
2906  DO j = 1, m
2907  lelement(1, globrowoff)%L(i,j) = zero
2908  IF ( i .EQ. j ) THEN
2909  lelement(1, globrowoff)%D(i,j) = one
2910  ELSE
2911  lelement(1, globrowoff)%D(i,j) = zero
2912  END IF
2913  lelement(1, globrowoff)%U(i,j) = zero
2914  END DO
2915  lelement(1, globrowoff)%b(i,1) = one
2916  END DO
2917  !-------------------------------------------
2918  !Save a copy of this original problem
2919  !-------------------------------------------
2920  orig(globrowoff)%L = lelement(1,globrowoff)%L
2921  orig(globrowoff)%D = lelement(1,globrowoff)%D
2922  orig(globrowoff)%U = lelement(1,globrowoff)%U
2923  orig(globrowoff)%b = lelement(1,globrowoff)%b
2924  END DO
2925 
2926  !-------------------------------------------
2927  matdirtied = .true.
2928  rhsdirtied = .true.
2929 
2930 END SUBROUTINE setidentitytestcase
2931 
2932 !-------------------------------------------------------------------------------
2937 !-------------------------------------------------------------------------------
2938 SUBROUTINE setidentityrhs
2939  !-----------------------------------------------------------------------------
2940  ! Local variables
2941  !-----------------------------------------------------------------------------
2942  INTEGER :: i, j, globrow, globrowoff
2943 
2944  IF(kpdbg) WRITE(ofu,*) '------ Using Identity Test Case RHS ------'; CALL fl(ofu)
2945 
2946  !-----------------------------------------
2947  !Initialize the arrays with identity
2948  !-----------------------------------------
2949  DO globrow = startglobrow, endglobrow, 1
2950  globrowoff = globrow-startglobrow+1
2951  DO i = 1, m
2952  lelement(1, globrowoff)%b(i,1) = one
2953  END DO
2954  !-------------------------------------------
2955  !Save a copy of this original RHS
2956  !-------------------------------------------
2957  orig(globrowoff)%b = lelement(1,globrowoff)%b
2958  END DO
2959 
2960  !-------------------------------------------
2961  rhsdirtied = .true.
2962 
2963 END SUBROUTINE setidentityrhs
2964 
2965 !-------------------------------------------------------------------------------
2969 !-------------------------------------------------------------------------------
2970 SUBROUTINE setrandomtestcase
2971  !-----------------------------------------------------------------------------
2972  ! Local variables
2973  !-----------------------------------------------------------------------------
2974  REAL(dp) :: randval, rmin, rmax
2975  INTEGER :: rngwidth
2976  INTEGER, ALLOCATABLE :: seeds(:)
2977  INTEGER :: i, j, globrow, globrowoff
2978 
2979  IF(kpdbg) WRITE(ofu,*) '------ Using Random Test Case ------'; CALL fl(ofu)
2980 
2981  !-----------------------------------------
2982  !Initialize random number seeds
2983  !-----------------------------------------
2984  CALL random_seed( size=rngwidth)
2985  ALLOCATE( seeds(rngwidth) )
2986  DO i = 1, rngwidth
2987  seeds(i) = i*(rank+100)*p
2988  END DO
2989  CALL random_seed( put=seeds(1:rngwidth) )
2990  DEALLOCATE( seeds )
2991 
2992  !-----------------------------------------
2993  !Write the header information for problem
2994  !-----------------------------------------
2995  IF( writeproblemfile ) THEN !Write the dimensions N, NZ
2996  IF(kpdbg) WRITE(pfu,*) n*m; CALL fl(pfu)
2997  IF(kpdbg) WRITE(pfu,*) (3*n-2)*m*m; CALL fl(pfu)
2998  END IF
2999 
3000  !-----------------------------------------
3001  !Initialize the arrays with random values
3002  !-----------------------------------------
3003  rmin = 0.01
3004  rmax = 1.00
3005  DO globrow = startglobrow, endglobrow, 1
3006  globrowoff = globrow-startglobrow+1
3007  DO i = 1, m
3008  DO j = 1, m
3009  IF ( globrow .EQ. 1 ) THEN
3010  randval = 0.0
3011  ELSE
3012  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3013  IF( writeproblemfile ) THEN
3014  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow-2)*m+j, randval
3015  END IF
3016  END IF
3017  lelement(1, globrowoff)%L(i,j) = randval
3018 
3019  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3020  lelement(1, globrowoff)%D(i,j) = randval
3021  IF( writeproblemfile ) THEN
3022  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow-1)*m+j, randval
3023  END IF
3024 
3025  IF ( globrow .EQ. n ) THEN
3026  randval = 0.0
3027  ELSE
3028  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3029  IF( writeproblemfile ) THEN
3030  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow)*m+j, randval
3031  END IF
3032  END IF
3033  lelement(1, globrowoff)%U(i,j) = randval
3034  END DO
3035  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3036  lelement(1, globrowoff)%b(i,1) = randval
3037  END DO
3038 
3039  !-------------------------------------------
3040  !Save a copy of this original problem
3041  !-------------------------------------------
3042  orig(globrowoff)%L = lelement(1,globrowoff)%L
3043  orig(globrowoff)%D = lelement(1,globrowoff)%D
3044  orig(globrowoff)%U = lelement(1,globrowoff)%U
3045  orig(globrowoff)%b = lelement(1,globrowoff)%b
3046  END DO
3047 
3048  IF (writeproblemfile) THEN
3049  DO globrow = startglobrow, endglobrow, 1
3050  globrowoff = globrow-startglobrow+1
3051  DO i = 1, m
3052  IF(kpdbg) WRITE(pfu,*) lelement(1,globrowoff)%b(i,1); CALL fl(pfu)
3053  END DO
3054  END DO
3055  END IF
3056 
3057  !-------------------------------------------
3058  matdirtied = .true.
3059  rhsdirtied = .true.
3060 
3061 END SUBROUTINE setrandomtestcase
3062 
3063 !-------------------------------------------------------------------------------
3068 !-------------------------------------------------------------------------------
3069 SUBROUTINE setrandomrhs( randseedoff )
3070  !-----------------------------------------------------------------------------
3071  ! Formal arguments
3072  !-----------------------------------------------------------------------------
3073  INTEGER, INTENT(IN) :: randseedoff
3074  !-----------------------------------------------------------------------------
3075  ! Local variables
3076  !-----------------------------------------------------------------------------
3077  REAL(dp) :: randval, rmin, rmax
3078  INTEGER :: rngwidth
3079  INTEGER, ALLOCATABLE :: seeds(:)
3080  INTEGER :: i, j, globrow, globrowoff
3081 
3082  IF(kpdbg) WRITE(ofu,*) '------ Using Random Test Case RHS ------'; CALL fl(ofu)
3083 
3084  !-----------------------------------------
3085  !Initialize random number seeds
3086  !-----------------------------------------
3087  CALL random_seed( size=rngwidth)
3088  ALLOCATE( seeds(rngwidth) )
3089  DO i = 1, rngwidth
3090  seeds(i) = i*(rank+100+34+randseedoff)*p
3091  END DO
3092  CALL random_seed( put=seeds(1:rngwidth) )
3093  DEALLOCATE( seeds )
3094 
3095  !-----------------------------------------
3096  !Initialize the arrays with random values
3097  !-----------------------------------------
3098  rmin = 0.01
3099  rmax = 1.00
3100  DO globrow = startglobrow, endglobrow, 1
3101  globrowoff = globrow-startglobrow+1
3102  DO i = 1, m
3103  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3104  lelement(1, globrowoff)%b(i,1) = randval
3105  END DO
3106 
3107  !-------------------------------------------
3108  !Save a copy of this original RHS
3109  !-------------------------------------------
3110  orig(globrowoff)%b = lelement(1,globrowoff)%b
3111  END DO
3112 
3113  !-------------------------------------------
3114  rhsdirtied = .true.
3115 
3116 END SUBROUTINE setrandomrhs
3117 
3118 !-------------------------------------------------------------------------------
3122 !-------------------------------------------------------------------------------
3123 SUBROUTINE finalize( do_mpifinalize )
3124  !-----------------------------------------------------------------------------
3125  ! Formal arguments
3126  !-----------------------------------------------------------------------------
3127  LOGICAL, INTENT(IN) :: do_mpifinalize
3128  !-----------------------------------------------------------------------------
3129  ! Local variables
3130  !-----------------------------------------------------------------------------
3131  INTEGER :: level
3132  INTEGER :: globrow
3133  INTEGER :: mpierr
3134  INTEGER :: tempinvcount, tempmulcount, tempsolcount
3135 
3136  !-----------------------------------------------------------------------------
3137  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3138  IF(kpdbg) WRITE(ofu,*) '------ Finalizing start ------'; CALL fl(ofu)
3139 
3140  !-----------------------------------------------------------------------------
3141  CALL plbfinalize()
3142 
3143  !-----------------------------------------------------------------------------
3144  ! Deallocate mats and hats, if any
3145  !-----------------------------------------------------------------------------
3146  IF ( ALLOCATED(lelement) ) THEN
3147  DO level = lbound(lelement,1), ubound(lelement,1)
3148  DO globrow = lbound(lelement,2), ubound(lelement,2)
3149  IF ( ALLOCATED(lelement(level,globrow)%L) ) THEN
3150  DEALLOCATE( lelement(level, globrow)%L )
3151  END IF
3152  IF ( ALLOCATED(lelement(level,globrow)%D) ) THEN
3153  DEALLOCATE( lelement(level, globrow)%D )
3154  END IF
3155  IF ( ALLOCATED(lelement(level,globrow)%U) ) THEN
3156  DEALLOCATE( lelement(level, globrow)%U )
3157  END IF
3158  IF ( ALLOCATED(lelement(level,globrow)%b) ) THEN
3159  DEALLOCATE( lelement(level, globrow)%b )
3160  END IF
3161  IF ( ALLOCATED(lelement(level,globrow)%pivot) ) THEN
3162  DEALLOCATE( lelement(level, globrow)%pivot )
3163  END IF
3164  END DO
3165  END DO
3166  DEALLOCATE(lelement)
3167  END IF
3168  IF ( ALLOCATED(orig) ) THEN
3169  DO globrow = lbound(orig,1), ubound(orig,1)
3170  IF ( ALLOCATED(orig(globrow)%L) ) THEN
3171  DEALLOCATE( orig(globrow)%L )
3172  END IF
3173  IF ( ALLOCATED(orig(globrow)%D) ) THEN
3174  DEALLOCATE( orig(globrow)%D )
3175  END IF
3176  IF ( ALLOCATED(orig(globrow)%U) ) THEN
3177  DEALLOCATE( orig(globrow)%U )
3178  END IF
3179  IF ( ALLOCATED(orig(globrow)%b) ) THEN
3180  DEALLOCATE( orig(globrow)%b )
3181  END IF
3182  IF ( ALLOCATED(orig(globrow)%pivot) ) THEN
3183  DEALLOCATE( orig(globrow)%pivot )
3184  END IF
3185  END DO
3186  DEALLOCATE(orig)
3187  END IF
3188 
3189  IF ( ALLOCATED(selement) ) THEN
3190  DO globrow = 1, n, 1
3191  IF ( ALLOCATED(selement(globrow)%x) ) THEN
3192  DEALLOCATE( selement(globrow)%x )
3193  END IF
3194  END DO
3195  DEALLOCATE(selement)
3196  END IF
3197 
3198  !-----------------------------------------------------------------------------
3199 #if defined(MPI_OPT)
3200  IF ( usebarriers ) THEN
3201  IF(kpdbg) WRITE(ofu,*) 'Barrier in finalize'; CALL fl(ofu)
3202  CALL mpi_barrier( ns_comm, mpierr )
3203  IF(kpdbg) WRITE(ofu,*) 'Done barrier in finalize'; CALL fl(ofu)
3204  END IF
3205  !-----------------------------------------------------------------------------
3206  IF ( do_mpifinalize ) THEN
3207  CALL mpi_finalize( mpierr )
3208  END IF
3209 #endif
3210 
3211  !-----------------------------------------------------------------------------
3212  tempmulcount=totmatmulcount; IF ( tempmulcount .LE. 0 ) tempmulcount = 1
3213  tempinvcount=totinvcount; IF ( tempinvcount .LE. 0 ) tempinvcount = 1
3214  tempsolcount=totmatsolcount; IF ( tempsolcount .LE. 0 ) tempsolcount = 1
3215  IF(kpdbg) WRITE(ofu,*) 'N=', n, ' M=', m, ' P=', p, ' rank=', rank
3216  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory ', membytes/1e6, ' MB'
3217  IF(kpdbg) WRITE(ofu,'(A,F8.4,A)') 'Computation ', tottime-totcommtime,' sec'
3218  IF(kpdbg) WRITE(ofu,'(A,F8.4,A)') 'Communication ', totcommtime,' sec'
3219  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix inv ',totinvcount, ' * ', &
3220  totinvtime/tempinvcount,' sec = ', totinvtime, ' sec'
3221  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix mul ',totmatmulcount, ' * ', &
3222  totmatmultime/tempmulcount, ' sec = ', totmatmultime, ' sec'
3223  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix sol ',totmatsolcount, ' * ', &
3224  totmatsoltime/tempsolcount, ' sec = ', totmatsoltime, ' sec'
3225  IF(kpdbg) WRITE(ofu,*) 'Finalized rank ', rank
3226  IF(kpdbg) WRITE(ofu,*) '------ Finalization end ------'; CALL fl(ofu)
3227 END SUBROUTINE finalize
3228 
3229 !-------------------------------------------------------------------------------
3233 !-------------------------------------------------------------------------------
3234 LOGICAL FUNCTION iseven( num )
3235  INTEGER, INTENT(IN) :: num
3236  iseven = mod(num,2) .EQ. 0
3237 END FUNCTION iseven
3238 
3239 !-------------------------------------------------------------------------------
3243 !-------------------------------------------------------------------------------
3244 LOGICAL FUNCTION isodd( num )
3245  INTEGER, INTENT(IN) :: num
3246  isodd = (.NOT. iseven(num))
3247 END FUNCTION isodd
3248 
3249 !-------------------------------------------------------------------------------
3253 !-------------------------------------------------------------------------------
3254 FUNCTION lr2gr( locrow, level ) result ( globrow )
3255  !-----------------------------------------------------------------------------
3256  ! Formal arguments
3257  !-----------------------------------------------------------------------------
3258  INTEGER, INTENT(IN) :: locrow
3259  INTEGER, INTENT(IN) :: level
3260  INTEGER :: globrow
3261 
3262  !-----------------------------------------------------------------------------
3263  ! Local variables
3264  !-----------------------------------------------------------------------------
3265  INTEGER :: i
3266 
3267  !-----------------------------------------------------------------------------
3268  ! Invert the rule r=(r+1)/2 backward for all levels
3269  !-----------------------------------------------------------------------------
3270  globrow = locrow
3271  levelloop: DO i = level-1, 1, -1
3272  globrow = 2*globrow - 1
3273  END DO levelloop
3274 
3275  !-----------------------------------------------------------------------------
3276  ! DEBUGGING: Just double-check
3277  !-----------------------------------------------------------------------------
3278  IF ( ((2.0 ** (level-1)) * (locrow-1) + 1) .NE. globrow ) THEN
3279  stop !Consistency check failed
3280  END IF
3281 
3282  RETURN
3283 END FUNCTION lr2gr
3284 
3285 !-------------------------------------------------------------------------------
3291 !-------------------------------------------------------------------------------
3292 FUNCTION gr2lr( globrow, level ) result ( locrow )
3293  !-----------------------------------------------------------------------------
3294  ! Formal arguments
3295  !-----------------------------------------------------------------------------
3296  INTEGER, INTENT(IN) :: globrow
3297  INTEGER, INTENT(IN) :: level
3298  INTEGER :: locrow
3299 
3300  !-----------------------------------------------------------------------------
3301  ! Local variables
3302  !-----------------------------------------------------------------------------
3303  INTEGER :: i
3304 
3305  !-----------------------------------------------------------------------------
3306  ! Apply the rule r=(r+1)/2 for all odd-numbered rows
3307  ! Any time r becomes even, give up
3308  !-----------------------------------------------------------------------------
3309  locrow = globrow
3310  levelloop: DO i = 1, level-1, 1
3311  IF ( iseven(locrow) ) THEN !Even-numbered rows don't go any further in level
3312  locrow = 0
3313  EXIT levelloop !Stop looping
3314  END IF
3315  locrow = (locrow+1) / 2
3316  END DO levelloop
3317 
3318  RETURN
3319 END FUNCTION gr2lr
3320 
3321 !-------------------------------------------------------------------------------
3325 !-------------------------------------------------------------------------------
3326 FUNCTION gr2rank( globrow ) result ( outrank )
3327  !-----------------------------------------------------------------------------
3328  ! Formal arguments
3329  !-----------------------------------------------------------------------------
3330  INTEGER, INTENT(IN) :: globrow
3331  INTEGER :: outrank
3332 
3333  !-----------------------------------------------------------------------------
3334  ! Local variables
3335  !-----------------------------------------------------------------------------
3336  INTEGER :: m !Average integral number of rows per processor
3337  INTEGER :: spill !Spill over beyond prefect loading of rows to processors
3338 
3339  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
3340  outrank = -1
3341  ELSE
3342  m = n / p
3343  spill = mod( n, p )
3344  IF ( globrow .LE. (m+1)*spill ) THEN
3345  outrank = (globrow-1)/(m+1)
3346  ELSE
3347  outrank = (globrow-1 - (m+1)*spill)/m + spill
3348  END IF
3349  END IF
3350 
3351  RETURN
3352 END FUNCTION gr2rank
3353 
3354 !-------------------------------------------------------------------------------
3358 !-------------------------------------------------------------------------------
3359 FUNCTION lr2rank( locrow, level ) result ( outrank )
3360  !-----------------------------------------------------------------------------
3361  ! Formal arguments
3362  !-----------------------------------------------------------------------------
3363  INTEGER, INTENT(IN) :: locrow
3364  INTEGER, INTENT(IN) :: level
3365  INTEGER :: outrank
3366 
3367  !-----------------------------------------------------------------------------
3368  ! Local variables
3369  !-----------------------------------------------------------------------------
3370  INTEGER :: globrow !Global row number of given locrow
3371 
3372  globrow = lr2gr( locrow, level )
3373  outrank = gr2rank( globrow )
3374 
3375  RETURN
3376 END FUNCTION lr2rank
3377 
3378 !-------------------------------------------------------------------------------
3384 !-------------------------------------------------------------------------------
3385 #if defined(MPI_OPT)
3386 SUBROUTINE computeforwardoddrowhats(locrow, level, startlocrow, endlocrow,bonly)
3387  !-----------------------------------------------------------------------------
3388  ! Formal arguments
3389  !-----------------------------------------------------------------------------
3390  INTEGER, INTENT(IN) :: locrow
3391  INTEGER, INTENT(IN) :: level
3392  INTEGER, INTENT(IN) :: startlocrow
3393  INTEGER, INTENT(IN) :: endlocrow
3394  LOGICAL, INTENT(IN) :: bonly
3395  !-----------------------------------------------------------------------------
3396  ! Local variables
3397  !-----------------------------------------------------------------------------
3398  INTEGER :: globrow
3399  INTEGER :: globrowoff
3400  INTEGER :: prevglobrowoff
3401  INTEGER :: nextglobrowoff
3402 
3403  !-------------------------------------------------------------
3404  ! Nothing to do if last level
3405  !-------------------------------------------------------------
3406  IF ( level .GE. nlevels ) THEN
3407  IF(kpdbg) WRITE(ofu,*) 'mat-mul last lvl: no op '; CALL fl(ofu)
3408  IF ( rank .NE. 0 ) THEN
3409  IF(kpdbg) WRITE(ofu,*) 'mat-mul last lvl: impossible ',rank; CALL fl(ofu)
3410  stop
3411  END IF
3412  RETURN
3413  END IF
3414 
3415  !-------------------------------------------------------------
3416  ! Sanity check
3417  !-------------------------------------------------------------
3418  IF ( iseven( locrow ) ) THEN
3419  IF(kpdbg) WRITE(ofu,*) 'mat-mul: odd only ',globrow,' ',locrow; CALL fl(ofu); stop
3420  END IF
3421  IF ( .NOT. ( (startlocrow .LE. locrow) .AND. (locrow .LE. endlocrow) ) ) THEN
3422  IF(kpdbg) WRITE(ofu,*) 'mat-mul: locrow prob ',globrow,' ',locrow;CALL fl(ofu);stop
3423  END IF
3424 
3425  !-------------------------------------------------------------
3426  globrow = lr2gr( locrow, level )
3427  globrowoff = globrow - startglobrow + 1
3428  IF(kpdbg) WRITE(ofu,*) ' Compute mat-mul ',globrow,' ',locrow; CALL fl(ofu)
3429 
3430  !-------------------------------------------------------------
3431  ! Ensure there are mats at (thislevel, thisrow)
3432  !-------------------------------------------------------------
3433  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D ) ) THEN
3434  IF(kpdbg) WRITE(ofu,*) ' Copying bad D'; CALL fl(ofu); stop
3435  END IF
3436  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L ) ) THEN
3437  IF(kpdbg) WRITE(ofu,*) ' Copying bad L'; CALL fl(ofu); stop
3438  END IF
3439  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U ) ) THEN
3440  IF(kpdbg) WRITE(ofu,*) ' Copying bad U'; CALL fl(ofu); stop
3441  END IF
3442  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b ) ) THEN
3443  IF(kpdbg) WRITE(ofu,*) ' Copying bad b'; CALL fl(ofu); stop
3444  END IF
3445 
3446  !-------------------------------------------------------------
3447  ! Ensure there is space at (nextlevel, thisrow)
3448  !-------------------------------------------------------------
3449  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%D ) ) THEN
3450  IF(kpdbg) WRITE(ofu,*) ' Copying bad D +1'; CALL fl(ofu); stop
3451  END IF
3452  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%L ) ) THEN
3453  IF(kpdbg) WRITE(ofu,*) ' Copying bad L +1'; CALL fl(ofu); stop
3454  END IF
3455  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%U ) ) THEN
3456  IF(kpdbg) WRITE(ofu,*) ' Copying bad U +1'; CALL fl(ofu); stop
3457  END IF
3458  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%b ) ) THEN
3459  IF(kpdbg) WRITE(ofu,*) ' Copying bad b +1'; CALL fl(ofu); stop
3460  END IF
3461 
3462  !-------------------------------------------------------------
3463  ! Copy over the D and b as is first
3464  !-------------------------------------------------------------
3465  IF ( .NOT. bonly ) THEN
3466  lelement(level+1,globrowoff)%D = lelement(level,globrowoff)%D
3467  END IF
3468  lelement(level+1,globrowoff)%b = lelement(level,globrowoff)%b
3469 
3470  !-------------------------------------------------------------
3471  ! Check if this row has a top neighbor at this level
3472  !-------------------------------------------------------------
3473  IF ( lr2rank(locrow-1,level) .LT. 0 ) THEN
3474  ! No rank before ours
3475  IF ( .NOT. bonly ) THEN
3476  lelement(level+1,globrowoff)%U = 0
3477  IF(kpdbg) WRITE(ofu,*) ' ZERO L'; CALL fl(ofu)
3478  END IF
3479  ELSE
3480  IF ( locrow .EQ. startlocrow ) THEN
3481  prevglobrowoff = 0
3482  ELSE IF ( locrow .GT. startlocrow ) THEN
3483  prevglobrowoff = lr2gr( locrow-1, level ) - startglobrow + 1
3484  ELSE
3485  IF(kpdbg) WRITE(ofu,*) 'mat-mul: impossible prev'; CALL fl(ofu); stop
3486  END IF
3487 
3488  IF ( .NOT. bonly ) THEN
3489  !------------------------------------------------
3490  ! D = D - L*Uhat(prev)
3491  !------------------------------------------------
3492  CALL bsystemclock(mattimer1)
3493  CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3494  lelement(level,prevglobrowoff)%U, &
3495  one, lelement(level+1,globrowoff)%D )
3496  CALL bsystemclock(mattimer2)
3497  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3498  IF(kpdbg) WRITE(ofu,*) ' Dtop'; CALL fl(ofu)
3499 
3500  !------------------------------------------------
3501  ! L = -L*Lhat(prev)
3502  !------------------------------------------------
3503  CALL bsystemclock(mattimer1)
3504  CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3505  lelement(level,prevglobrowoff)%L, &
3506  zero, lelement(level+1,globrowoff)%L )
3507  CALL bsystemclock(mattimer2)
3508  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3509  IF(kpdbg) WRITE(ofu,*) ' L'; CALL fl(ofu)
3510  END IF
3511 
3512  !------------------------------------------------
3513  ! b = b - L*bhat(prev)
3514  !------------------------------------------------
3515  CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
3516  lelement(level,prevglobrowoff)%b(:,1), &
3517  one, lelement(level+1,globrowoff)%b(:,1) )
3518  IF(kpdbg) WRITE(ofu,*) ' btop'; CALL fl(ofu)
3519  END IF
3520 
3521  !-------------------------------------------------------------
3522  ! Check if this row has a bottom neighbor at this level
3523  !-------------------------------------------------------------
3524  IF ( lr2rank(locrow+1,level) .LT. 0 ) THEN
3525  ! No rank after ours
3526  IF ( .NOT. bonly ) THEN
3527  lelement(level+1,globrowoff)%U = 0
3528  IF(kpdbg) WRITE(ofu,*) ' ZERO U'; CALL fl(ofu)
3529  END IF
3530  ELSE
3531  IF ( locrow .EQ. endlocrow ) THEN
3532  nextglobrowoff = endglobrow - startglobrow + 2
3533  ELSE IF ( locrow .LT. endlocrow ) THEN
3534  nextglobrowoff = lr2gr( locrow+1, level ) - startglobrow + 1
3535  ELSE
3536  IF(kpdbg) WRITE(ofu,*) 'mat-mul: impossible next'; CALL fl(ofu); stop
3537  END IF
3538 
3539  IF ( .NOT. bonly ) THEN
3540  !------------------------------------------------
3541  ! D = D - U*Lhat(next)
3542  !------------------------------------------------
3543  CALL bsystemclock(mattimer1)
3544  CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3545  lelement(level,nextglobrowoff)%L, &
3546  one, lelement(level+1,globrowoff)%D )
3547  CALL bsystemclock(mattimer2)
3548  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3549  IF(kpdbg) WRITE(ofu,*) ' Dbot'; CALL fl(ofu)
3550 
3551  !------------------------------------------------
3552  ! U = -U*Uhat(next)
3553  !------------------------------------------------
3554  CALL bsystemclock(mattimer1)
3555  CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3556  lelement(level,nextglobrowoff)%U, &
3557  zero, lelement(level+1,globrowoff)%U )
3558  CALL bsystemclock(mattimer2)
3559  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3560  IF(kpdbg) WRITE(ofu,*) ' U'; CALL fl(ofu)
3561  END IF
3562 
3563  !------------------------------------------------
3564  ! b = b - U*bhat(next)
3565  !------------------------------------------------
3566  CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
3567  lelement(level,nextglobrowoff)%b(:,1), &
3568  one, lelement(level+1,globrowoff)%b(:,1) )
3569  IF(kpdbg) WRITE(ofu,*) ' bbot'; CALL fl(ofu)
3570  END IF
3571 END SUBROUTINE computeforwardoddrowhats
3572 
3573 !-------------------------------------------------------------------------------
3577 !-------------------------------------------------------------------------------
3578 SUBROUTINE forwardsolve
3579  !-----------------------------------------------------------------------------
3580  ! Local variables
3581  !-----------------------------------------------------------------------------
3582  INTEGER :: level
3583  INTEGER :: locrow
3584  INTEGER :: globrow
3585  INTEGER :: globrowoff
3586  INTEGER :: startlocrow
3587  INTEGER :: endlocrow
3588  INTEGER :: rowoffset
3589  INTEGER :: nbrrank
3590  INTEGER :: nbrmpireqindx
3591  INTEGER :: nbrmpireqcnt
3592  INTEGER :: nbrmpinirecvs
3593  INTEGER :: nbrmpinisends
3594  INTEGER :: nbrmpireq(6)
3595  INTEGER :: nbrmpierr(6)
3596  INTEGER :: mpiwaiterr
3597  INTEGER :: mpierr
3598  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
3599  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
3600  INTEGER :: waitmatchindex
3601  INTEGER :: stag
3602  INTEGER :: reqi
3603  INTEGER :: blaserr
3604 
3605  CALL bsystemclock(globtimer1)
3606 
3607  !-----------------------------------------------------------------------------
3608  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3609  IF(kpdbg) WRITE(ofu,*) '------ Forward solve start ------'; CALL fl(ofu)
3610  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3611 
3612  !-----------------------------------------------------------------------------
3613  CALL plbforwardinitialize()
3614 
3615  !-----------------------------------------------------------------------------
3616  ! Forward solving loop in which even rows compute inverses and odd rows
3617  ! compute matrix-matrix products for new terms
3618  !-----------------------------------------------------------------------------
3619  forwardsolveloop: DO level = 1, nlevels, 1
3620  !---------------------------------------------------------------------------
3621  IF ( usebarriers ) THEN
3622  IF(kpdbg) WRITE(ofu,*) 'Barrier at forward level ', level; CALL fl(ofu)
3623  CALL mpi_barrier( ns_comm, mpierr )
3624  IF(kpdbg) WRITE(ofu,*) 'Done barrier at forward level ', level; CALL fl(ofu)
3625  END IF
3626 
3627  !---------------------------------------------------------------------------
3628  !Determine start and end local rows at current level
3629  !---------------------------------------------------------------------------
3630  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
3631  endlocrow = -1 !Start with an invalid value (less than startlocrow)
3632  DO globrow = startglobrow, endglobrow, 1
3633  locrow = gr2lr( globrow, level ) !globrow's place at current level
3634  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
3635  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
3636  startlocrow = locrow
3637  END IF
3638  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
3639  endlocrow = locrow
3640  END IF
3641  END IF
3642  END DO
3643 
3644  !---------------------------------------------------------------------------
3645  !We may have run out of work; see if we have a valid range of rows left
3646  !---------------------------------------------------------------------------
3647  IF ( startlocrow .GT. endlocrow ) THEN !No rows left at/above this level
3648  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at forward level ', level; CALL fl(ofu)
3649  CALL plbforwardinitializelevel( level, .false. )
3650  CALL plbforwardfinalizelevel( level, .false. )
3651  cycle forwardsolveloop !Move on to next level; don't EXIT (for barriers)
3652  ELSE
3653  CALL plbforwardinitializelevel( level, .true. )
3654  END IF
3655 
3656  IF(kpdbg) WRITE(ofu,*) '**** Forward level ', level, ' ****'; CALL fl(ofu)
3657 
3658  !---------------------------------------------------------------------------
3659  !Allocate memory for incoming nbr values, if any, at this level
3660  !---------------------------------------------------------------------------
3661  IF ( isodd(startlocrow) .AND. &
3662  (lr2rank(startlocrow-1,level) .GE. 0) ) THEN
3663  globrowoff = 0
3664  IF ( .NOT. ALLOCATED( lelement(level, globrowoff)%L ) ) THEN
3665  ALLOCATE( lelement(level, globrowoff)%L(m,m) )
3666  ALLOCATE( lelement(level, globrowoff)%U(m,m) )
3667  ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3668  CALL chargememory( (2*m*m+m)*dpsz )
3669  END IF
3670  lelement(level, globrowoff)%L = 0
3671  lelement(level, globrowoff)%U = 0
3672  lelement(level, globrowoff)%b = 0
3673  END IF
3674  IF ( isodd(endlocrow) .AND. &
3675  (lr2rank(endlocrow+1,level) .GE. 0) ) THEN
3676  globrowoff = endglobrow-startglobrow+2
3677  IF ( .NOT. ALLOCATED( lelement(level, globrowoff)%L ) ) THEN
3678  ALLOCATE( lelement(level, globrowoff)%L(m,m) )
3679  ALLOCATE( lelement(level, globrowoff)%U(m,m) )
3680  ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3681  CALL chargememory( (2*m*m+m)*dpsz )
3682  END IF
3683  lelement(level, globrowoff)%L = 0
3684  lelement(level, globrowoff)%U = 0
3685  lelement(level, globrowoff)%b = 0
3686  END IF
3687 
3688  !---------------------------------------------------------------------------
3689  !Allocate memory at next level for those rows that are odd at current level
3690  !---------------------------------------------------------------------------
3691  IF ( level .LT. nlevels ) THEN
3692  DO locrow = startlocrow, endlocrow, 1
3693  IF ( isodd(locrow) ) THEN
3694  globrow = lr2gr( locrow, level )
3695  globrowoff = globrow-startglobrow+1
3696  IF ( .NOT. ALLOCATED( lelement(level+1, globrowoff)%L ) ) THEN
3697  ALLOCATE( lelement(level+1, globrowoff)%L(m,m) )
3698  ALLOCATE( lelement(level+1, globrowoff)%D(m,m) )
3699  ALLOCATE( lelement(level+1, globrowoff)%U(m,m) )
3700  ALLOCATE( lelement(level+1, globrowoff)%b(m,1) )
3701  CALL chargememory( (2*m*m+m)*dpsz )
3702  END IF
3703  lelement(level+1, globrowoff)%L = 0
3704  lelement(level+1, globrowoff)%D = 0
3705  lelement(level+1, globrowoff)%U = 0
3706  lelement(level+1, globrowoff)%b = 0
3707  END IF
3708  END DO
3709  END IF
3710 
3711  !---------------------------------------------------------------------------
3712  !Reset requests
3713  !---------------------------------------------------------------------------
3714  DO nbrmpireqindx = 1, 6
3715  nbrmpireq(nbrmpireqindx) = mpi_request_null
3716  END DO
3717 
3718  !---------------------------------------------------------------------------
3719  !Pre-post the expected MPI receives via a non-blocking recv primitive
3720  !Pre-posting can help overlap communication with computing inverses
3721  !---------------------------------------------------------------------------
3722  nbrmpireqindx = 1
3723  nbrmpinirecvs = 0
3724  nbrrank = lr2rank(startlocrow-1,level)
3725  IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
3726  !-------------------------------------------------------------------------
3727  !Our top row at this level is odd and top row's previous is valid
3728  !We will get the previous (even-numbered) row's L-hat, U-hat, and b-hat
3729  !-------------------------------------------------------------------------
3730  globrowoff = 0
3731  IF(kpdbg) WRITE(ofu,*) ' Irecv ',startlocrow-1,' ',nbrrank,' '; CALL fl(ofu)
3732 
3733  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3734  IF(kpdbg) WRITE(ofu,*)' Irecv bad L'; CALL fl(ofu); stop
3735  END IF
3736  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3737  IF(kpdbg) WRITE(ofu,*)' Irecv bad U'; CALL fl(ofu); stop
3738  END IF
3739  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3740  IF(kpdbg) WRITE(ofu,*)' Irecv bad b'; CALL fl(ofu); stop
3741  END IF
3742 
3743  CALL bsystemclock(loctimer1)
3744  CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3745  nbrrank, 1, ns_comm, nbrmpireq(nbrmpireqindx), &
3746  nbrmpierr(nbrmpireqindx) )
3747  nbrmpireqindx = nbrmpireqindx + 1
3748  nbrmpinirecvs = nbrmpinirecvs + 1
3749  IF(kpdbg) WRITE(ofu,*) ' Irecv 1 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3750 
3751  CALL mpi_irecv( lelement(level, globrowoff)%U, m*m, mpi_real8, &
3752  nbrrank, 2, ns_comm, nbrmpireq(nbrmpireqindx), &
3753  nbrmpierr(nbrmpireqindx))
3754  nbrmpireqindx = nbrmpireqindx + 1
3755  nbrmpinirecvs = nbrmpinirecvs + 1
3756  IF(kpdbg) WRITE(ofu,*) ' Irecv 2 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3757 
3758  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3759  nbrrank, 3, ns_comm, nbrmpireq(nbrmpireqindx), &
3760  nbrmpierr(nbrmpireqindx))
3761  nbrmpireqindx = nbrmpireqindx + 1
3762  nbrmpinirecvs = nbrmpinirecvs + 1
3763  IF(kpdbg) WRITE(ofu,*) ' Irecv 3 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3764  CALL bsystemclock(loctimer2)
3765  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3766  END IF
3767 
3768  nbrrank = lr2rank(endlocrow+1,level)
3769  IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
3770  !-------------------------------------------------------------------------
3771  !Our bottom row at this level is odd and bottom row's next is valid
3772  !We will get the next (even-numbered) row's L-hat, U-hat, and b-hat
3773  !-------------------------------------------------------------------------
3774  globrowoff = endglobrow-startglobrow+2
3775  IF(kpdbg) WRITE(ofu,*) ' Irecv ',endlocrow+1,' ', nbrrank; CALL fl(ofu)
3776 
3777  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3778  IF(kpdbg) WRITE(ofu,*)'Irecv bad L'; CALL fl(ofu); stop
3779  END IF
3780  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3781  IF(kpdbg) WRITE(ofu,*)'Irecv bad U'; CALL fl(ofu); stop
3782  END IF
3783  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3784  IF(kpdbg) WRITE(ofu,*)'Irecv bad b'; CALL fl(ofu); stop
3785  END IF
3786 
3787  CALL bsystemclock(loctimer1)
3788  CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3789  nbrrank, 4, ns_comm, nbrmpireq(nbrmpireqindx), &
3790  nbrmpierr(nbrmpireqindx))
3791  nbrmpireqindx = nbrmpireqindx + 1
3792  nbrmpinirecvs = nbrmpinirecvs + 1
3793  IF(kpdbg) WRITE(ofu,*) ' Irecv 4 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3794 
3795  CALL mpi_irecv( lelement(level, globrowoff)%U, m*m, mpi_real8, &
3796  nbrrank, 5, ns_comm, nbrmpireq(nbrmpireqindx), &
3797  nbrmpierr(nbrmpireqindx))
3798  nbrmpireqindx = nbrmpireqindx + 1
3799  nbrmpinirecvs = nbrmpinirecvs + 1
3800  IF(kpdbg) WRITE(ofu,*) ' Irecv 5 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3801 
3802  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3803  nbrrank, 6, ns_comm, nbrmpireq(nbrmpireqindx), &
3804  nbrmpierr(nbrmpireqindx))
3805  nbrmpireqindx = nbrmpireqindx + 1
3806  nbrmpinirecvs = nbrmpinirecvs + 1
3807  IF(kpdbg) WRITE(ofu,*) ' Irecv 6 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3808  CALL bsystemclock(loctimer2)
3809  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3810  END IF
3811 
3812  !---------------------------------------------------------------------------
3813  !Compute inverses for even-numbered rows at this level
3814  !---------------------------------------------------------------------------
3815  nbrmpinisends = 0
3816  DO locrow = startlocrow, endlocrow, 1
3817  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
3818  globrow = lr2gr( locrow, level )
3819  globrowoff = globrow-startglobrow+1
3820 
3821  IF(kpdbg) WRITE(ofu,*) ' Compute even hats ',globrow,' ', locrow; CALL fl(ofu)
3822 
3823  !-----------------------------------------------------------------------
3824  !Sanity checks
3825  !-----------------------------------------------------------------------
3826  IF ( level .EQ. nlevels ) THEN
3827  IF ( (rank .NE. 0) .OR. &
3828  (startlocrow .NE. endlocrow) .OR. &
3829  (locrow .NE. 1) .OR. (globrow .NE. 1) ) THEN
3830  IF(kpdbg) WRITE(ofu,*) ' EVEN ERROR ',globrow,' ', locrow; CALL fl(ofu)
3831  stop
3832  END IF
3833  END IF
3834  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D) ) THEN
3835  IF(kpdbg) WRITE(ofu,*)'Chats bad D'; CALL fl(ofu); stop
3836  END IF
3837  IF ( .NOT. ALLOCATED(lelement(1,globrowoff)%pivot) ) THEN
3838  IF(kpdbg) WRITE(ofu,*)'Chats bad pivot'; CALL fl(ofu); stop
3839  END IF
3840  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3841  IF(kpdbg) WRITE(ofu,*)'Chats bad L'; CALL fl(ofu); stop
3842  END IF
3843  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3844  IF(kpdbg) WRITE(ofu,*)'Chats bad U'; CALL fl(ofu); stop
3845  END IF
3846  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3847  IF(kpdbg) WRITE(ofu,*)'Chats bad b'; CALL fl(ofu); stop
3848  END IF
3849 
3850  !-----------------------------------------------------------------------
3851  !Do LU factorization of D (in place into D itself)
3852  !-----------------------------------------------------------------------
3853  CALL bsystemclock(mattimer1)
3854  CALL plbdgetrf( lelement(level,globrowoff)%D, &
3855  lelement(1,globrowoff)%pivot, blaserr )
3856  CALL bsystemclock(mattimer2)
3857  CALL chargetime( totinvtime, mattimer2, mattimer1, totinvcount )
3858  IF (blaserr .NE. 0) THEN
3859  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRF failed ', blaserr; CALL fl(ofu); stop
3860  END IF
3861 
3862  !-----------------------------------------------------------------------
3863  !Compute hats D^(-1)L and D^(-1)U if not last level
3864  !-----------------------------------------------------------------------
3865  IF ( level .LT. nlevels ) THEN
3866  CALL bsystemclock(mattimer1)
3867  CALL plbdgetrs( m, lelement(level,globrowoff)%D, &
3868  lelement(1,globrowoff)%pivot, &
3869  lelement(level,globrowoff)%L, blaserr )
3870  CALL bsystemclock(mattimer2)
3871  CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3872  IF (blaserr .NE. 0) THEN
3873  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRS L failed'; CALL fl(ofu); stop
3874  END IF
3875  CALL bsystemclock(mattimer1)
3876  CALL plbdgetrs( m, lelement(level,globrowoff)%D, &
3877  lelement(1,globrowoff)%pivot, &
3878  lelement(level,globrowoff)%U, blaserr )
3879  CALL bsystemclock(mattimer2)
3880  CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3881  IF (blaserr .NE. 0) THEN
3882  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRS U failed'; CALL fl(ofu); stop
3883  END IF
3884  END IF
3885 
3886  !-----------------------------------------------------------------------
3887  !Compute b-hats D^(-1)b
3888  !-----------------------------------------------------------------------
3889  CALL plbdgetrs( 1, lelement(level,globrowoff)%D, &
3890  lelement(1,globrowoff)%pivot, &
3891  lelement(level,globrowoff)%b, blaserr )
3892  IF (blaserr .NE. 0) THEN
3893  IF(kpdbg) WRITE(ofu,*) 'PLBDGETRS b failed'; CALL fl(ofu); stop
3894  END IF
3895 
3896  !-----------------------------------------------------------------------
3897  !Send my Lhats, Uhats and bhats to neighbor row if that neighbor exists
3898  !and that neighbor row is hosted outside this rank
3899  !-----------------------------------------------------------------------
3900  DO rowoffset = -1, 1, 2
3901  nbrrank = lr2rank(locrow+rowoffset, level)
3902  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
3903  CALL bsystemclock(loctimer1)
3904  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3905  IF(kpdbg) WRITE(ofu,*)'ISend bad L'; CALL fl(ofu); stop
3906  END IF
3907  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3908  IF(kpdbg) WRITE(ofu,*)'ISend bad U'; CALL fl(ofu); stop
3909  END IF
3910  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3911  IF(kpdbg) WRITE(ofu,*)'ISend bad b'; CALL fl(ofu); stop
3912  END IF
3913 
3914  IF(kpdbg) WRITE(ofu,*)' ISend ',globrow,' ',locrow,' ',nbrrank;CALL fl(ofu)
3915  globrowoff = globrow-startglobrow+1
3916  stag = (((1-rowoffset))/2)*3
3917  CALL mpi_isend( lelement(level,globrowoff)%L, m*m, mpi_real8, &
3918  nbrrank, stag+1, ns_comm, &
3919  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3920  nbrmpireqindx = nbrmpireqindx + 1
3921  nbrmpinisends = nbrmpinisends + 1
3922  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+1,' ',globrowoff; CALL fl(ofu)
3923 
3924  CALL mpi_isend( lelement(level,globrowoff)%U, m*m, mpi_real8, &
3925  nbrrank, stag+2, ns_comm, &
3926  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3927  nbrmpireqindx = nbrmpireqindx + 1
3928  nbrmpinisends = nbrmpinisends + 1
3929  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+2,' ',globrowoff; CALL fl(ofu)
3930 
3931  CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
3932  nbrrank, stag+3, ns_comm, &
3933  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3934  nbrmpireqindx = nbrmpireqindx + 1
3935  nbrmpinisends = nbrmpinisends + 1
3936  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+3,' ',globrowoff; CALL fl(ofu)
3937  CALL bsystemclock(loctimer2)
3938  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3939  END IF
3940  END DO
3941  END IF
3942  END DO
3943 
3944  !---------------------------------------------------------------------------
3945  !Compute the matrix-matrix multiplications for non-boundary odd rows
3946  !---------------------------------------------------------------------------
3947  DO locrow = startlocrow, endlocrow, 1
3948  globrow = lr2gr( locrow, level )
3949  IF ( isodd( locrow ) .AND. (level .NE. nlevels) ) THEN
3950  IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) ) THEN
3951  IF(kpdbg) WRITE(ofu,*) ' Precomp mat-mul ',globrow,' ',locrow; CALL fl(ofu)
3952  CALL computeforwardoddrowhats( locrow, level, &
3953  startlocrow, endlocrow, .false. )
3954  END IF
3955  END IF
3956  END DO
3957 
3958  !---------------------------------------------------------------------------
3959  !We have to wait for incoming even-numbered neighboring rows, before
3960  !computing inverses for the odd boundaries, if any.
3961  !Complete the send-receive of inverses, by doing a "wait all"
3962  !---------------------------------------------------------------------------
3963  nbrmpireqcnt = nbrmpireqindx - 1
3964  IF ( nbrmpireqcnt .GT. 0 ) THEN
3965  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
3966  CALL bsystemclock(loctimer1)
3967  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
3968  CALL bsystemclock(loctimer2)
3969  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3970  END IF
3971 
3972  !---------------------------------------------------------------------------
3973  !Now we can compute inverses for odd boundaries, if any
3974  !---------------------------------------------------------------------------
3975  IF ( isodd(startlocrow) ) THEN
3976  globrow = lr2gr( startlocrow, level )
3977  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',startlocrow;CALL fl(ofu)
3978  CALL computeforwardoddrowhats( startlocrow, level, &
3979  startlocrow, endlocrow, .false. )
3980  END IF
3981  IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) ) THEN
3982  globrow = lr2gr( endlocrow, level )
3983  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',endlocrow;CALL fl(ofu)
3984  CALL computeforwardoddrowhats( endlocrow, level, &
3985  startlocrow, endlocrow, .false. )
3986  END IF
3987  CALL plbforwardfinalizelevel( level, .true. )
3988  END DO forwardsolveloop
3989 
3990  !-----------------------------------------------------------------------------
3991  CALL plbforwardfinalize()
3992 
3993  !-----------------------------------------------------------------------------
3994  matdirtied = .false.
3995  rhsdirtied = .false.
3996 
3997  !-----------------------------------------------------------------------------
3998  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
3999  IF(kpdbg) WRITE(ofu,*) '------ Forward solve end ------'; CALL fl(ofu)
4000 
4001  CALL bsystemclock(globtimer2)
4002  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4003 
4004 END SUBROUTINE forwardsolve
4005 
4006 !-------------------------------------------------------------------------------
4011 !-------------------------------------------------------------------------------
4012 SUBROUTINE forwardupdateb
4013  !-----------------------------------------------------------------------------
4014  ! Local variables
4015  !-----------------------------------------------------------------------------
4016  INTEGER :: level
4017  INTEGER :: locrow
4018  INTEGER :: globrow
4019  INTEGER :: globrowoff
4020  INTEGER :: startlocrow
4021  INTEGER :: endlocrow
4022  INTEGER :: rowoffset
4023  INTEGER :: nbrrank
4024  INTEGER :: nbrmpireqindx
4025  INTEGER :: nbrmpireqcnt
4026  INTEGER :: nbrmpinirecvs
4027  INTEGER :: nbrmpinisends
4028  INTEGER :: nbrmpireq(6)
4029  INTEGER :: nbrmpierr(6)
4030  INTEGER :: mpiwaiterr
4031  INTEGER :: mpierr
4032  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
4033  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4034  INTEGER :: waitmatchindex
4035  INTEGER :: stag
4036  INTEGER :: reqi
4037  INTEGER :: blaserr
4038 
4039  !-----------------------------------------------------------------------------
4040  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4041  IF(kpdbg) WRITE(ofu,*) '------ Forward updateb start ------'; CALL fl(ofu)
4042  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4043 
4044  !-----------------------------------------------------------------------------
4045  IF ( matdirtied ) THEN
4046  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4047  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied matrix ------'; CALL fl(ofu)
4048  IF(kpdbg) WRITE(ofu,*) ' ------ Forward solve, not updateb... ------'; CALL fl(ofu)
4049  CALL forwardsolve()
4050  RETURN
4051  END IF
4052 
4053  CALL bsystemclock(globtimer1)
4054 
4055  !-----------------------------------------------------------------------------
4056  CALL plbforwardinitialize()
4057 
4058  !-----------------------------------------------------------------------------
4059  ! Forward solving loop in which even rows have already computed inverses and
4060  ! odd rows recompute matrix-vector products with new rhs
4061  !-----------------------------------------------------------------------------
4062  forwardsolveloop: DO level = 1, nlevels, 1
4063  !---------------------------------------------------------------------------
4064  IF ( usebarriers ) THEN
4065  IF(kpdbg) WRITE(ofu,*) 'Barrier at forward updateb level ', level; CALL fl(ofu)
4066  CALL mpi_barrier( ns_comm, mpierr )
4067  IF(kpdbg) WRITE(ofu,*) 'Done barrier at forward updateb level ', level; CALL fl(ofu)
4068  END IF
4069 
4070  !---------------------------------------------------------------------------
4071  !Determine start and end local rows at current level
4072  !---------------------------------------------------------------------------
4073  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
4074  endlocrow = -1 !Start with an invalid value (less than startlocrow)
4075  DO globrow = startglobrow, endglobrow, 1
4076  locrow = gr2lr( globrow, level ) !globrow's place at current level
4077  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
4078  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
4079  startlocrow = locrow
4080  END IF
4081  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
4082  endlocrow = locrow
4083  END IF
4084  END IF
4085  END DO
4086 
4087  !---------------------------------------------------------------------------
4088  !We may have run out of work; see if we have a valid range of rows left
4089  !---------------------------------------------------------------------------
4090  IF ( startlocrow .GT. endlocrow ) THEN !No rows left at/above this level
4091  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at forward updateb level ',level; CALL fl(ofu)
4092  CALL plbforwardinitializelevel( level, .false. )
4093  CALL plbforwardfinalizelevel( level, .false. )
4094  cycle forwardsolveloop !Move on to next level; don't EXIT (for barriers)
4095  ELSE
4096  CALL plbforwardinitializelevel( level, .true. )
4097  END IF
4098 
4099  IF(kpdbg) WRITE(ofu,*) '**** Forward updateb level ', level, ' ****'; CALL fl(ofu)
4100 
4101  !---------------------------------------------------------------------------
4102  !Memory has already been allocated in ForwardSolve for incoming nbr values,
4103  !if any, at this level
4104  !---------------------------------------------------------------------------
4105  IF ( isodd(startlocrow) .AND. &
4106  (lr2rank(startlocrow-1,level) .GE. 0) ) THEN
4107  globrowoff = 0
4108  IF( .NOT. ( ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
4109  ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
4110  ALLOCATED( lelement(level, globrowoff)%b ) ) ) THEN
4111  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +0 memory error'; CALL fl(ofu); stop
4112  END IF
4113  lelement(level, globrowoff)%b = 0
4114  END IF
4115  IF ( isodd(endlocrow) .AND. &
4116  (lr2rank(endlocrow+1,level) .GE. 0) ) THEN
4117  globrowoff = endglobrow-startglobrow+2
4118  IF ( .NOT. ( ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
4119  ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
4120  ALLOCATED( lelement(level, globrowoff)%b ) ) ) THEN
4121  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +2 memory error'; CALL fl(ofu); stop
4122  END IF
4123  lelement(level, globrowoff)%b = 0
4124  END IF
4125 
4126  !---------------------------------------------------------------------------
4127  !Memory has already been allocated in ForwardSolve at next level for those
4128  !rows that are odd at current level
4129  !---------------------------------------------------------------------------
4130  IF ( level .LT. nlevels ) THEN
4131  DO locrow = startlocrow, endlocrow, 1
4132  IF ( isodd(locrow) ) THEN
4133  globrow = lr2gr( locrow, level )
4134  globrowoff = globrow-startglobrow+1
4135  IF ( .NOT. ( ALLOCATED( lelement(level+1, globrowoff)%L ) .AND. &
4136  ALLOCATED( lelement(level+1, globrowoff)%D ) .AND. &
4137  ALLOCATED( lelement(level+1, globrowoff)%U ) .AND. &
4138  ALLOCATED( lelement(level+1, globrowoff)%b ) ) ) THEN
4139  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +1 memory error'; CALL fl(ofu); stop
4140  END IF
4141  lelement(level+1, globrowoff)%b = 0
4142  END IF
4143  END DO
4144  END IF
4145 
4146  !---------------------------------------------------------------------------
4147  !Reset requests
4148  !---------------------------------------------------------------------------
4149  DO nbrmpireqindx = 1, 6
4150  nbrmpireq(nbrmpireqindx) = mpi_request_null
4151  END DO
4152 
4153  !---------------------------------------------------------------------------
4154  !Pre-post the expected MPI receives via a non-blocking recv primitive
4155  !Pre-posting can help overlap communication with computing inverses
4156  !---------------------------------------------------------------------------
4157  nbrmpireqindx = 1
4158  nbrmpinirecvs = 0
4159  nbrrank = lr2rank(startlocrow-1,level)
4160  IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
4161  !-------------------------------------------------------------------------
4162  !Our top row at this level is odd and top row's previous is valid
4163  !We will get the previous (even-numbered) row's L-hat, U-hat, and b-hat
4164  !-------------------------------------------------------------------------
4165  globrowoff = 0
4166  IF(kpdbg) WRITE(ofu,*) ' Irecv ',startlocrow-1,' ',nbrrank,' '; CALL fl(ofu)
4167 
4168  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
4169  IF(kpdbg) WRITE(ofu,*)' Irecv bad L'; CALL fl(ofu); stop
4170  END IF
4171  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
4172  IF(kpdbg) WRITE(ofu,*)' Irecv bad U'; CALL fl(ofu); stop
4173  END IF
4174  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
4175  IF(kpdbg) WRITE(ofu,*)' Irecv bad b'; CALL fl(ofu); stop
4176  END IF
4177 
4178  CALL bsystemclock(loctimer1)
4179  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
4180  nbrrank, 3, ns_comm, nbrmpireq(nbrmpireqindx), &
4181  nbrmpierr(nbrmpireqindx))
4182  nbrmpireqindx = nbrmpireqindx + 1
4183  nbrmpinirecvs = nbrmpinirecvs + 1
4184  IF(kpdbg) WRITE(ofu,*) ' Irecv 3 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
4185  CALL bsystemclock(loctimer2)
4186  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4187  END IF
4188 
4189  nbrrank = lr2rank(endlocrow+1,level)
4190  IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
4191  !-------------------------------------------------------------------------
4192  !Our bottom row at this level is odd and bottom row's next is valid
4193  !We will get the next (even-numbered) row's L-hat, U-hat, and b-hat
4194  !-------------------------------------------------------------------------
4195  globrowoff = endglobrow-startglobrow+2
4196  IF(kpdbg) WRITE(ofu,*) ' Irecv ',endlocrow+1,' ', nbrrank; CALL fl(ofu)
4197 
4198  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
4199  IF(kpdbg) WRITE(ofu,*)'Irecv bad L'; CALL fl(ofu); stop
4200  END IF
4201  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
4202  IF(kpdbg) WRITE(ofu,*)'Irecv bad U'; CALL fl(ofu); stop
4203  END IF
4204  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
4205  IF(kpdbg) WRITE(ofu,*)'Irecv bad b'; CALL fl(ofu); stop
4206  END IF
4207 
4208  CALL bsystemclock(loctimer1)
4209  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
4210  nbrrank, 6, ns_comm, nbrmpireq(nbrmpireqindx), &
4211  nbrmpierr(nbrmpireqindx))
4212  nbrmpireqindx = nbrmpireqindx + 1
4213  nbrmpinirecvs = nbrmpinirecvs + 1
4214  IF(kpdbg) WRITE(ofu,*) ' Irecv 6 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
4215  CALL bsystemclock(loctimer2)
4216  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4217  END IF
4218 
4219  !---------------------------------------------------------------------------
4220  !Inverses have already been computed for even-numbered rows at this level
4221  !---------------------------------------------------------------------------
4222  nbrmpinisends = 0
4223  DO locrow = startlocrow, endlocrow, 1
4224  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
4225  globrow = lr2gr( locrow, level )
4226  globrowoff = globrow-startglobrow+1
4227 
4228  IF(kpdbg) WRITE(ofu,*) ' Computed even hats ',globrow,' ', locrow; CALL fl(ofu)
4229 
4230  !-----------------------------------------------------------------------
4231  !Sanity checks
4232  !-----------------------------------------------------------------------
4233  IF ( level .EQ. nlevels ) THEN
4234  IF ( (rank .NE. 0) .OR. &
4235  (startlocrow .NE. endlocrow) .OR. &
4236  (locrow .NE. 1) .OR. (globrow .NE. 1) ) THEN
4237  IF(kpdbg) WRITE(ofu,*) ' EVEN ERROR ',globrow,' ', locrow; CALL fl(ofu)
4238  stop
4239  END IF
4240  END IF
4241  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D) ) THEN
4242  IF(kpdbg) WRITE(ofu,*)'Chats bad D'; CALL fl(ofu); stop
4243  END IF
4244  IF ( .NOT. ALLOCATED(lelement(1,globrowoff)%pivot) ) THEN
4245  IF(kpdbg) WRITE(ofu,*)'Chats bad pivot'; CALL fl(ofu); stop
4246  END IF
4247  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
4248  IF(kpdbg) WRITE(ofu,*)'Chats bad L'; CALL fl(ofu); stop
4249  END IF
4250  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
4251  IF(kpdbg) WRITE(ofu,*)'Chats bad U'; CALL fl(ofu); stop
4252  END IF
4253  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
4254  IF(kpdbg) WRITE(ofu,*)'Chats bad b'; CALL fl(ofu); stop
4255  END IF
4256 
4257  !-----------------------------------------------------------------------
4258  !LU factorization of D has already been performed in place into D itself
4259  !-----------------------------------------------------------------------
4260 
4261  !-----------------------------------------------------------------------
4262  !Compute b-hats D^(-1)b
4263  !-----------------------------------------------------------------------
4264  CALL plbdgetrs( 1, lelement(level,globrowoff)%D, &
4265  lelement(1,globrowoff)%pivot, &
4266  lelement(level,globrowoff)%b, blaserr )
4267  IF (blaserr .NE. 0) THEN
4268  IF(kpdbg) WRITE(ofu,*) 'PLBDGETRS b failed'; CALL fl(ofu); stop
4269  END IF
4270 
4271  !-----------------------------------------------------------------------
4272  !Send my bhats to neighbor row if that neighbor exists
4273  !and that neighbor row is hosted outside this rank
4274  !-----------------------------------------------------------------------
4275  DO rowoffset = -1, 1, 2
4276  nbrrank = lr2rank(locrow+rowoffset, level)
4277  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
4278  CALL bsystemclock(loctimer1)
4279  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
4280  IF(kpdbg) WRITE(ofu,*)'ISend bad L'; CALL fl(ofu); stop
4281  END IF
4282  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
4283  IF(kpdbg) WRITE(ofu,*)'ISend bad U'; CALL fl(ofu); stop
4284  END IF
4285  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
4286  IF(kpdbg) WRITE(ofu,*)'ISend bad b'; CALL fl(ofu); stop
4287  END IF
4288 
4289  IF(kpdbg) WRITE(ofu,*)' ISend ',globrow,' ',locrow,' ',nbrrank;CALL fl(ofu)
4290  globrowoff = globrow-startglobrow+1
4291  stag = (((1-rowoffset))/2)*3
4292  CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
4293  nbrrank, stag+3, ns_comm, &
4294  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
4295  nbrmpireqindx = nbrmpireqindx + 1
4296  nbrmpinisends = nbrmpinisends + 1
4297  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+3,' ',globrowoff; CALL fl(ofu)
4298  CALL bsystemclock(loctimer2)
4299  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4300  END IF
4301  END DO
4302  END IF
4303  END DO
4304 
4305  !---------------------------------------------------------------------------
4306  !Compute the matrix-vector multiplications for non-boundary odd rows
4307  !---------------------------------------------------------------------------
4308  DO locrow = startlocrow, endlocrow, 1
4309  globrow = lr2gr( locrow, level )
4310  IF ( isodd( locrow ) .AND. (level .NE. nlevels) ) THEN
4311  IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) ) THEN
4312  IF(kpdbg) WRITE(ofu,*) ' Precomp mat-mul ',globrow,' ',locrow; CALL fl(ofu)
4313  CALL computeforwardoddrowhats( locrow, level, &
4314  startlocrow, endlocrow, .true. )
4315  END IF
4316  END IF
4317  END DO
4318 
4319  !---------------------------------------------------------------------------
4320  !We have to wait for incoming even-numbered neighboring rows, before
4321  !computing inverses for the odd boundaries, if any.
4322  !Complete the send-receive of inverses, by doing a "wait all"
4323  !---------------------------------------------------------------------------
4324  nbrmpireqcnt = nbrmpireqindx - 1
4325  IF ( nbrmpireqcnt .GT. 0 ) THEN
4326  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
4327  CALL bsystemclock(loctimer1)
4328  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4329  CALL bsystemclock(loctimer2)
4330  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4331  END IF
4332 
4333  !---------------------------------------------------------------------------
4334  !Now we can compute inverses for odd boundaries, if any
4335  !---------------------------------------------------------------------------
4336  IF ( isodd(startlocrow) ) THEN
4337  globrow = lr2gr( startlocrow, level )
4338  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',startlocrow;CALL fl(ofu)
4339  CALL computeforwardoddrowhats( startlocrow, level, &
4340  startlocrow, endlocrow, .true. )
4341  END IF
4342  IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) ) THEN
4343  globrow = lr2gr( endlocrow, level )
4344  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',endlocrow;CALL fl(ofu)
4345  CALL computeforwardoddrowhats( endlocrow, level, &
4346  startlocrow, endlocrow, .true. )
4347  END IF
4348  CALL plbforwardfinalizelevel( level, .true. )
4349  END DO forwardsolveloop
4350 
4351  !-----------------------------------------------------------------------------
4352  CALL plbforwardfinalize()
4353 
4354  !-----------------------------------------------------------------------------
4355  rhsdirtied = .false.
4356 
4357  !-----------------------------------------------------------------------------
4358  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
4359  IF(kpdbg) WRITE(ofu,*) '------ updateb solve end ------'; CALL fl(ofu)
4360 
4361  CALL bsystemclock(globtimer2)
4362  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4363 
4364 END SUBROUTINE forwardupdateb
4365 
4366 !-------------------------------------------------------------------------------
4370 !-------------------------------------------------------------------------------
4371 SUBROUTINE backwardsolve
4372  !-----------------------------------------------------------------------------
4373  ! Local variables
4374  !-----------------------------------------------------------------------------
4375  INTEGER :: level
4376  INTEGER :: locrow
4377  INTEGER :: globrow
4378  INTEGER :: globrowoff
4379  INTEGER :: prevglobrow
4380  INTEGER :: nextglobrow
4381  INTEGER :: startlocrow
4382  INTEGER :: endlocrow
4383  INTEGER :: rowoffset
4384  INTEGER :: nbrrank
4385  INTEGER :: nbrmpireqindx
4386  INTEGER :: nbrmpireqcnt
4387  INTEGER :: nbrmpinirecvs
4388  INTEGER :: nbrmpinisends
4389  INTEGER :: nbrmpireq(2)
4390  INTEGER :: nbrmpierr(2)
4391  INTEGER :: mpiwaiterr
4392  INTEGER :: mpierr
4393  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4394  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4395  INTEGER :: waitmatchindex
4396  INTEGER :: stag
4397  INTEGER :: reqi
4398  INTEGER :: blaserr
4399 
4400  CALL bsystemclock(globtimer1)
4401 
4402  !-----------------------------------------------------------------------------
4403  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4404  IF(kpdbg) WRITE(ofu,*) '------ Backward solve start ------'; CALL fl(ofu)
4405 
4406  !-----------------------------------------------------------------------------
4407  IF ( matdirtied ) THEN
4408  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4409  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied matrix; updating... ------'; CALL fl(ofu)
4410  CALL forwardsolve()
4411  END IF
4412  IF ( rhsdirtied ) THEN
4413  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4414  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied RHS; updating... ------'; CALL fl(ofu)
4415  CALL forwardupdateb()
4416  END IF
4417 
4418  !-----------------------------------------------------------------------------
4419  CALL plbbackwardinitialize()
4420 
4421  !-----------------------------------------------------------------------------
4422  ! Make sure we have the forward solve data
4423  !-----------------------------------------------------------------------------
4424  IF ( (.NOT. ALLOCATED(lelement)) .OR. (.NOT. ALLOCATED(selement)) ) THEN
4425  IF(kpdbg) WRITE(ofu,*) 'Forward not called before backward?'; CALL fl(ofu)
4426  RETURN
4427  END IF
4428 
4429  !-----------------------------------------------------------------------------
4430  ! Backward solving loop in which odd rows communicate their solution to even
4431  ! rows and even rows compute their solution
4432  !-----------------------------------------------------------------------------
4433  backwardsolveloop: DO level = nlevels, 1, -1
4434  !---------------------------------------------------------------------------
4435  IF ( usebarriers ) THEN
4436  IF(kpdbg) WRITE(ofu,*) 'Barrier at backward level ', level; CALL fl(ofu)
4437  CALL mpi_barrier( ns_comm, mpierr )
4438  IF(kpdbg) WRITE(ofu,*) 'Done barrier at backward level ', level; CALL fl(ofu)
4439  END IF
4440 
4441  !---------------------------------------------------------------------------
4442  !Determine start and end local rows at current level
4443  !---------------------------------------------------------------------------
4444  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
4445  endlocrow = -1 !Start with an invalid value (less than startlocrow)
4446  DO globrow = startglobrow, endglobrow, 1
4447  locrow = gr2lr( globrow, level ) !globrow's place at current level
4448  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
4449  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
4450  startlocrow = locrow
4451  END IF
4452  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
4453  endlocrow = locrow
4454  END IF
4455  END IF
4456  END DO
4457 
4458  !---------------------------------------------------------------------------
4459  !We may have nothing to do at this level; if so, just go back another level
4460  !---------------------------------------------------------------------------
4461  IF ( startlocrow .GT. endlocrow ) THEN !No more rows left at this level
4462  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at backward level', level; CALL fl(ofu)
4463  CALL plbbackwardinitializelevel( level, .false. )
4464  CALL plbbackwardfinalizelevel( level, .false. )
4465  cycle backwardsolveloop !Move on to previous level
4466  ELSE
4467  CALL plbbackwardinitializelevel( level, .true. )
4468  END IF
4469 
4470  IF(kpdbg) WRITE(ofu,*) '**** Backward level ', level, ' ****'; CALL fl(ofu)
4471 
4472  !---------------------------------------------------------------------------
4473  !Reset requests
4474  !---------------------------------------------------------------------------
4475  DO nbrmpireqindx = 1, 2
4476  nbrmpireq(nbrmpireqindx) = mpi_request_null
4477  END DO
4478 
4479  !---------------------------------------------------------------------------
4480  !Pre-post the expected MPI receives via a non-blocking recv primitive
4481  !Pre-posting can help overlap communication with computing inverses
4482  !---------------------------------------------------------------------------
4483  nbrmpireqindx = 1
4484  nbrmpinirecvs = 0
4485  nbrrank = lr2rank(startlocrow-1,level)
4486  IF( iseven(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
4487  !-------------------------------------------------------------------------
4488  !Our top row at this level is even and top row's previous is valid
4489  !We will get the previous (odd-numbered) row's solution
4490  !-------------------------------------------------------------------------
4491  stag = 1
4492  globrow = lr2gr(startlocrow-1,level)
4493  IF(kpdbg) WRITE(ofu,*)' Irecv ',startlocrow-1,' ',globrow,' ',nbrrank;CALL fl(ofu)
4494  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4495  ALLOCATE( selement(globrow)%x(m) )
4496  CALL chargememory( m*dpsz )
4497  END IF
4498  CALL bsystemclock(loctimer1)
4499  CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4500  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4501  nbrmpireqindx = nbrmpireqindx + 1
4502  nbrmpinirecvs = nbrmpinirecvs + 1
4503  CALL bsystemclock(loctimer2)
4504  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4505  END IF
4506  nbrrank = lr2rank(endlocrow+1,level)
4507  IF ( iseven(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
4508  !-------------------------------------------------------------------------
4509  !Our bottom row at this level is even and bottom row's next is valid
4510  !We will get the next (odd-numbered) row's solution
4511  !-------------------------------------------------------------------------
4512  stag = 2
4513  globrow = lr2gr(endlocrow+1,level)
4514  IF(kpdbg) WRITE(ofu,*)' Irecv ',endlocrow+1,' ',globrow,' ',nbrrank;CALL fl(ofu)
4515  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4516  ALLOCATE( selement(globrow)%x(m) )
4517  CALL chargememory( m*dpsz )
4518  END IF
4519  CALL bsystemclock(loctimer1)
4520  CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4521  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4522  nbrmpireqindx = nbrmpireqindx + 1
4523  nbrmpinirecvs = nbrmpinirecvs + 1
4524  CALL bsystemclock(loctimer2)
4525  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4526  END IF
4527 
4528  !---------------------------------------------------------------------------
4529  !Send our odd-numbered rows' solutions
4530  !---------------------------------------------------------------------------
4531  nbrmpinisends = 0
4532  DO locrow = startlocrow, endlocrow, 1
4533  IF ( isodd( locrow ) ) THEN
4534  !-----------------------------------------------------------------------
4535  !Send my solution to neighbor row if that neighbor exists
4536  !and is hosted outside this rank; use non-blocking sends
4537  !-----------------------------------------------------------------------
4538  DO rowoffset = -1, 1, 2
4539  nbrrank = lr2rank(locrow+rowoffset, level)
4540  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
4541  IF(kpdbg) WRITE(ofu,*) ' Isend ', locrow, ' ', nbrrank; CALL fl(ofu)
4542  globrow = lr2gr(locrow,level)
4543  IF ( .NOT. ALLOCATED(selement(globrow)%x) ) THEN
4544  IF(kpdbg) WRITE(ofu,*) 'ERROR BISEND AT LEVEL', level; CALL fl(ofu)
4545  IF(kpdbg) WRITE(ofu,*) locrow, ' ', globrow, ' ', nbrrank; CALL fl(ofu)
4546  stop
4547  END IF
4548  stag = (1-rowoffset)/2+1
4549  CALL bsystemclock(loctimer1)
4550  CALL mpi_isend( selement(globrow)%x, m, mpi_real8, &
4551  nbrrank, stag, ns_comm, nbrmpireq(nbrmpireqindx), &
4552  nbrmpierr(nbrmpireqindx) )
4553  nbrmpireqindx = nbrmpireqindx + 1
4554  nbrmpinisends = nbrmpinisends + 1
4555  CALL bsystemclock(loctimer2)
4556  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4557  END IF
4558  END DO
4559  END IF
4560  END DO
4561 
4562  !---------------------------------------------------------------------------
4563  !Complete the send-receive of solutions, by doing an MPI "wait all"
4564  !---------------------------------------------------------------------------
4565  nbrmpireqcnt = nbrmpireqindx - 1
4566  IF ( nbrmpireqcnt .GT. 0 ) THEN
4567  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
4568  CALL bsystemclock(loctimer1)
4569  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4570  CALL bsystemclock(loctimer2)
4571  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4572  END IF
4573 
4574  !---------------------------------------------------------------------------
4575  !Compute the solution for even rows at this level
4576  !---------------------------------------------------------------------------
4577  DO locrow = startlocrow, endlocrow, 1
4578  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
4579  globrow = lr2gr( locrow, level )
4580  globrowoff = globrow - startglobrow + 1
4581  IF ( ( level .EQ. nlevels ) .AND. (locrow .NE. 1) ) THEN
4582  IF(kpdbg) WRITE(ofu,*) 'ERROR ',level, ' ',globrow, ' ', locrow; CALL fl(ofu)
4583  stop
4584  END IF
4585 
4586  IF(kpdbg) WRITE(ofu,*) ' Compute solution ', globrow, ' ', locrow; CALL fl(ofu)
4587 
4588  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4589  ALLOCATE( selement(globrow)%x(m) )
4590  CALL chargememory( m*dpsz )
4591  END IF
4592 
4593  !----------------------------------------------------
4594  ! x = b-hat
4595  !----------------------------------------------------
4596  selement(globrow)%x = lelement(level,globrowoff)%b(:,1)
4597 
4598  !----------------------------------------------------
4599  ! x = x - L-hat*x(prev)
4600  !----------------------------------------------------
4601  nbrrank = lr2rank(locrow-1,level)
4602  IF ( nbrrank .GE. 0 ) THEN
4603  prevglobrow = lr2gr(locrow-1,level)
4604  CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
4605  selement(prevglobrow)%x, &
4606  one, selement(globrow)%x )
4607  END IF
4608 
4609  !----------------------------------------------------
4610  ! x = x - U-hat*x(next)
4611  !----------------------------------------------------
4612  nbrrank = lr2rank(locrow+1,level)
4613  IF ( nbrrank .GE. 0 ) THEN
4614  nextglobrow = lr2gr(locrow+1,level)
4615  CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
4616  selement(nextglobrow)%x, &
4617  one, selement(globrow)%x )
4618  END IF
4619 
4620  !----------------------------------------------------
4621  !Print the solution vector, if desired
4622  !----------------------------------------------------
4623  IF ( .false. ) THEN
4624  IF(kpdbg) WRITE(ofu,*) ' x[',globrow,']=',selement(globrow)%x; CALL fl(ofu)
4625  END IF
4626  END IF
4627  END DO
4628  CALL plbbackwardfinalizelevel( level, .false. )
4629  END DO backwardsolveloop
4630 
4631  !-----------------------------------------------------------------------------
4632  CALL plbbackwardfinalize()
4633 
4634  !-----------------------------------------------------------------------------
4635  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
4636  IF(kpdbg) WRITE(ofu,*) '------ Backward solve end ------'; CALL fl(ofu)
4637 
4638  CALL bsystemclock(globtimer2)
4639  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4640 
4641 END SUBROUTINE backwardsolve
4642 
4643 !-------------------------------------------------------------------------------
4647 !-------------------------------------------------------------------------------
4648 SUBROUTINE verifysolution
4649  !-----------------------------------------------------------------------------
4650  ! Local variables
4651  !-----------------------------------------------------------------------------
4652  INTEGER :: i, k, globrow, globrowoff
4653  REAL(dp) :: term, totrmserr
4654  INTEGER :: nbrrank
4655  INTEGER :: nbrmpireqindx
4656  INTEGER :: nbrmpireqcnt
4657  INTEGER :: nbrmpinirecvs
4658  INTEGER :: nbrmpinisends
4659  INTEGER :: nbrmpireq(2)
4660  INTEGER :: nbrmpierr(2)
4661  INTEGER :: mpiwaiterr
4662  INTEGER :: mpierr
4663  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4664  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4665  INTEGER :: waitmatchindex
4666 
4667  !-----------------------------------------------------------------------------
4668  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4669  IF(kpdbg) WRITE(ofu,*) '------ Verifying solution ------'; CALL fl(ofu)
4670 
4671  !-----------------------------------------------------------------------------
4672  !Alocate memory, do Irecvs/Isends, for boundary odd/even rows resp.
4673  !-----------------------------------------------------------------------------
4674  nbrmpireqindx = 1
4675  nbrmpinirecvs = 0
4676  nbrmpinisends = 0
4677  nbrrank = gr2rank(startglobrow-1)
4678  IF ( isodd(startglobrow) .AND. (nbrrank .GE. 0) ) THEN
4679  IF ( .NOT. ALLOCATED( selement(startglobrow-1)%x ) ) THEN
4680  ALLOCATE( selement(startglobrow-1)%x(m) )
4681  END IF
4682  CALL mpi_irecv( selement(startglobrow-1)%x, m, mpi_real8, nbrrank, 1, &
4683  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4684  nbrmpireqindx = nbrmpireqindx + 1
4685  nbrmpinirecvs = nbrmpinirecvs + 1
4686  END IF
4687  nbrrank = gr2rank(endglobrow+1)
4688  IF ( isodd(endglobrow) .AND. (nbrrank .GE. 0) ) THEN
4689  IF ( .NOT. ALLOCATED( selement(endglobrow+1)%x ) ) THEN
4690  ALLOCATE( selement(endglobrow+1)%x(m) )
4691  END IF
4692  CALL mpi_irecv( selement(endglobrow+1)%x, m, mpi_real8, nbrrank, 2, &
4693  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4694  nbrmpireqindx = nbrmpireqindx + 1
4695  nbrmpinirecvs = nbrmpinirecvs + 1
4696  END IF
4697  nbrrank = gr2rank(startglobrow-1)
4698  IF ( iseven(startglobrow) .AND. (nbrrank .GE. 0) ) THEN
4699  CALL mpi_isend( selement(startglobrow)%x, m, mpi_real8, nbrrank, 2, &
4700  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4701  nbrmpireqindx = nbrmpireqindx + 1
4702  nbrmpinisends = nbrmpinisends + 1
4703  END IF
4704  nbrrank = gr2rank(endglobrow+1)
4705  IF ( iseven(endglobrow) .AND. (nbrrank .GE. 0) ) THEN
4706  CALL mpi_isend( selement(endglobrow)%x, m, mpi_real8, nbrrank, 1, &
4707  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4708  nbrmpireqindx = nbrmpireqindx + 1
4709  nbrmpinisends = nbrmpinisends + 1
4710  END IF
4711  nbrmpireqcnt = nbrmpireqindx - 1
4712  IF ( nbrmpireqcnt .GT. 0 ) THEN
4713  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
4714  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4715  END IF
4716 
4717  !-----------------------------------------------------------------------------
4718  totrmserr = 0.0
4719  DO globrow = startglobrow, endglobrow, 1
4720  globrowoff = globrow - startglobrow + 1
4721  DO i = 1, m, 1
4722  term = 0.0
4723  IF ( globrow .GT. 1 ) THEN
4724  term = term + sum( orig(globrowoff)%L(i,:) * selement(globrow-1)%x )
4725  END IF
4726 
4727  term = term + sum( orig(globrowoff)%D(i,:) * selement(globrow)%x )
4728 
4729  IF (writesolution) THEN
4730  IF( i .EQ. 1 ) THEN
4731  DO k = 1, m
4732  IF(kpdbg) WRITE(ofu,*) 'X[',(globrow-1)*m+k,']=', selement(globrow)%x(k);CALL fl(ofu)
4733  END DO
4734  END IF
4735  END IF
4736 
4737  IF ( globrow .LT. n ) THEN
4738  term = term + sum( orig(globrowoff)%U(i,:) * selement(globrow+1)%x )
4739  END IF
4740 
4741  totrmserr = totrmserr + (term - orig(globrowoff)%b(i,1))**2
4742  END DO
4743  END DO
4744 
4745  IF ( endglobrow .LT. startglobrow ) THEN
4746  totrmserr = 0
4747  ELSE
4748  totrmserr = sqrt( totrmserr / ((endglobrow-startglobrow+1) * m) )
4749  END IF
4750  IF(kpdbg) WRITE(ofu,'(A,E15.8E3)') 'TOTAL RMS ERROR = ', totrmserr; CALL fl(ofu)
4751  IF(kpdbg) WRITE(ofu,*) '------ Solution verified ------'; CALL fl(ofu)
4752 END SUBROUTINE verifysolution
4753 !-------------------------------------------------------------------------------
4754 
4755 !-------------------------------------------------------------------------------
4756 SUBROUTINE checksymmetry (asymIndx)
4757 IMPLICIT NONE
4758 
4759 REAL(dp), INTENT (OUT) :: asymIndx
4760 REAL(dp) :: diff, partdiff, norm, eij, eji, tdiff, tnorm, summ
4761 REAL(dp), ALLOCATABLE, DIMENSION(:,:) :: sendBuf, bottomLeftMatrix
4762 
4763 INTEGER :: topNeighbor, bottomNeighbor
4764 
4765 INTEGER :: mpistatus(MPI_STATUS_SIZE), mpi_err
4766 INTEGER :: blkrow,blktype,i,j,k
4767 
4768 IF (.NOT.ALLOCATED(bottomleftmatrix)) THEN
4769  ALLOCATE(bottomleftmatrix(m,m))
4770  bottomleftmatrix=0.0
4771 END IF
4772 
4773 IF (.NOT.ALLOCATED(sendbuf)) THEN
4774  ALLOCATE(sendbuf(m,m))
4775  sendbuf=0.0
4776 END IF
4777 
4778 IF(nranks.EQ.1) THEN
4779  topneighbor=mpi_proc_null
4780  bottomneighbor=mpi_proc_null
4781 ELSE
4782  IF (rank.EQ.0) THEN
4783  topneighbor=mpi_proc_null
4784  bottomneighbor=rank+1
4785  ELSE IF (rank.EQ.nranks-1) THEN
4786  topneighbor=rank-1
4787  bottomneighbor=mpi_proc_null
4788  sendbuf=lelement(1,1)%L
4789  ELSE
4790  topneighbor=rank-1
4791  bottomneighbor=rank+1
4792  sendbuf=lelement(1,1)%L
4793  END IF
4794 END IF
4795 
4796 CALL mpi_sendrecv(sendbuf,m*m,mpi_real8,topneighbor,1,&
4797 &bottomleftmatrix,m*m,mpi_real8,bottomneighbor,1,ns_comm,&
4798 &mpistatus,mpi_err)
4799 
4800 IF (ALLOCATED(sendbuf)) DEALLOCATE(sendbuf)
4801 IF (rank.EQ.nranks-1) bottomleftmatrix=0.0
4802 
4803 summ=0.0; diff=0.0; norm=0.0
4804 DO blkrow=1,endglobrow-startglobrow+1
4805  ! Compute contributions from the main diagonal blocks
4806  partdiff=0.0
4807  DO i=1,m
4808  DO j=i,m
4809  eij=lelement(1,blkrow)%D(i,j)
4810  eji=lelement(1,blkrow)%D(j,i)
4811  partdiff = partdiff+(eij-eji)*(eij-eji)
4812  IF (i.NE.j) THEN
4813  ! Contribution from the off-diagonal elements of
4814  ! this main diagonal block
4815  norm = norm + eij*eij+eji*eji
4816  ELSE
4817  ! Contributions to the norm from just the diagonal
4818  ! elements of this block
4819  norm = norm + eij*eij
4820  END IF
4821  END DO
4822  END DO
4823  diff=diff+2*partdiff
4824 
4825  ! Compute contributions from off diagonal blocks
4826  DO i=1,m
4827  DO j=1,m
4828  IF (blkrow.NE.endglobrow-startglobrow+1) THEN
4829  eij=lelement(1,blkrow)%U(i,j)
4830  eji=lelement(1,blkrow+1)%L(j,i)
4831  diff = diff+(eij-eji)*(eij-eji)
4832  norm = norm+eij*eij+eji*eji
4833  ELSE !Local bottom block row
4834  IF (rank.NE.nranks-1) THEN
4835  eij=lelement(1,blkrow)%U(i,j)
4836  eji=bottomleftmatrix(j,i)
4837  diff = diff+(eij-eji)*(eij-eji)
4838  norm = norm+eij*eij+eji*eji
4839  END IF
4840  END IF
4841  END DO
4842  END DO
4843 END DO
4844 
4845 IF (ALLOCATED(bottomleftmatrix)) DEALLOCATE(bottomleftmatrix)
4846 
4847 CALL mpi_reduce(diff,tdiff,1,mpi_real8,mpi_sum,0,ns_comm,mpi_err)
4848 CALL mpi_reduce(norm,tnorm,1,mpi_real8,mpi_sum,0,ns_comm,mpi_err)
4849 IF (rank.EQ.0) THEN
4850  asymindx=sqrt(tdiff/tnorm)
4851  !print *,'New asymIndx:',asymIndx
4852 END IF
4853 
4854 END SUBROUTINE checksymmetry
4855 !-------------------------------------------------------------------------------
4856 
4857 !-------------------------------------------------------------------------------
4858 SUBROUTINE storediagonal(blkrow,colnum,buf)
4859 IMPLICIT NONE
4860 
4861 INTEGER, INTENT(IN) :: blkrow, colnum
4862 REAL(dp), DIMENSION(1:M), INTENT(IN) :: buf
4863 INTEGER :: indx
4864 
4865 IF(blkrow.LT.startglobrow) THEN
4866  IF(kpdbg) WRITE(ofu,*) 'Error in indexing global block row index'; CALL fl(ofu)
4867 END IF
4868 
4869 indx=(blkrow-startglobrow)*m+colnum
4870 origdiagelement(indx)=buf(colnum)
4871 
4872 END SUBROUTINE storediagonal
4873 !-------------------------------------------------------------------------------
4874 
4875 
4876 !-------------------------------------------------------------------------------
4877 SUBROUTINE writeblocks(flag)
4878 IMPLICIT NONE
4879 
4880 INTEGER, INTENT(IN) :: flag
4881 !LOGICAL, INTENT(IN) :: flag
4882 INTEGER :: row, col, localblkrow, globalrow
4883 
4884 CHARACTER(100) :: fname
4885 CHARACTER(50) :: ciam, cnprocs
4886 INTEGER :: WOFU, istat
4887 
4888 WRITE(ciam,*) rank; WRITE(cnprocs,*) nranks
4889 ciam=adjustl(ciam); cnprocs=adjustl(cnprocs)
4890 wofu = 8*nranks+rank
4891 
4892 fname='block-'//trim(ciam)//'-P-'//trim(cnprocs)//'.txt'
4893 OPEN(unit=wofu, file=fname, status="REPLACE", action="WRITE",&
4894 &form="FORMATTED",position="APPEND", iostat=istat)
4895 !OPEN(UNIT=WOFU, FILE=fname, STATUS="REPLACE", ACTION="WRITE",&
4896 !&POSITION="REWIND", IOSTAT=istat)
4897 
4898 IF(.false.) THEN
4899  DO row=1, (endglobrow-startglobrow+1)*m
4900  WRITE(wofu,*) 'SavedDiag:',row,origdiagelement(row), flag
4901  CALL fl(wofu)
4902  END DO
4903 END IF
4904 
4905 IF(.true.) THEN
4906  DO globalrow=startglobrow,endglobrow
4907  localblkrow=globalrow-startglobrow+1
4908 
4909 ! WRITE(WOFU,*)
4910  CALL fl(wofu)
4911  IF (globalrow.GT.1) THEN
4912  DO row=1,m
4913  DO col=1,m
4914  WRITE(wofu,*) globalrow, 'L', row, col, orig(localblkrow)%L(row,col), flag
4915  CALL fl(wofu)
4916  END DO
4917  END DO
4918  END IF
4919 
4920 #if defined(NOSKS)
4921  WRITE(wofu,*)
4922  CALL fl(wofu)
4923  DO row=1,m
4924  DO col=1,m
4925  WRITE(wofu,*) globalrow, 'D', row, col, orig(localblkrow)%D(row,col), flag
4926  CALL fl(wofu)
4927  END DO
4928  END DO
4929 
4930  WRITE(wofu,*)
4931  CALL fl(wofu)
4932  DO row=1,m
4933  DO col=1,m
4934  WRITE(wofu,*) globalrow, 'U', row, col, orig(localblkrow)%U(row,col), flag
4935  CALL fl(wofu)
4936  END DO
4937  END DO
4938  WRITE(wofu,*)'--------------------------------------------------------'
4939  CALL fl(wofu)
4940 #endif
4941 
4942  END DO
4943 END IF
4944 
4945 END SUBROUTINE writeblocks
4946 !-------------------------------------------------------------------------------
4947 
4948 
4949 !-------------------------------------------------------------------------------
4950 SUBROUTINE displayblocks(flag)
4951 IMPLICIT NONE
4952 
4953 INTEGER, INTENT(IN) :: flag
4954 INTEGER :: row, localblkrow, globalrow
4955 
4956 IF(kpdbg) WRITE(ofu,*) '------- Here ------- ', 1; CALL fl(ofu)
4957 
4958 IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
4959 DO row=1, (endglobrow-startglobrow+1)*m
4960  IF(kpdbg) WRITE(ofu,*) 'OrigDiagElement:',origdiagelement(row),'-',flag
4961  CALL fl(ofu)
4962 END DO
4963 IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
4964 
4965 IF(kpdbg) WRITE(ofu,*) '------- Here ------- ', 2; CALL fl(ofu)
4966 
4967 
4968 DO globalrow=startglobrow,endglobrow
4969  localblkrow=globalrow-startglobrow+1
4970 
4971  IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
4972 
4973  DO row=1,m
4974  IF(kpdbg) WRITE(ofu,*) 'orig-L:',orig(localblkrow)%L(row,1:m),'-',flag
4975  CALL fl(ofu)
4976  END DO
4977 
4978  IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
4979  DO row=1,m
4980  IF(kpdbg) WRITE(ofu,*) 'orig-D:',orig(localblkrow)%D(row,1:m),'-',flag
4981  CALL fl(ofu)
4982  END DO
4983 
4984  IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
4985  DO row=1,m
4986  IF(kpdbg) WRITE(ofu,*) 'orig-U:',orig(localblkrow)%U(row,1:m),'-',flag
4987  CALL fl(ofu)
4988  END DO
4989  IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
4990 END DO
4991 
4992 IF(kpdbg) WRITE(ofu,*) '------- Here ------- ', 3; CALL fl(ofu)
4993 
4994 
4995 END SUBROUTINE displayblocks
4996 !-------------------------------------------------------------------------------
4997 
4998 
4999 !-------------------------------------------------------------------------------
5000 SUBROUTINE parmatvec (invec,outvec,outveclength)
5001 IMPLICIT NONE
5002 
5003 INTEGER,INTENT(IN) :: outveclength
5004 REAL(dp), INTENT(IN) :: invec(1:N*M)
5005 REAL(dp), INTENT(OUT) :: outvec(1:outveclength)
5006 
5007 INTEGER :: col, row, offset
5008 INTEGER :: globalrow, localblkrow, cnt, dcnt
5009 REAL(dp) :: melement, velement, total
5010 
5011 outvec=0; cnt=0; dcnt=0
5012 DO globalrow=startglobrow,endglobrow
5013 
5014  localblkrow=globalrow-startglobrow+1
5015 
5016  IF (globalrow.EQ.1) THEN
5017  offset=0
5018  ELSE
5019  offset=(globalrow-2)*m
5020  END IF
5021 
5022  IF (globalrow.EQ.1) THEN
5023  DO row=1,m
5024  total=0
5025  DO col=1,2*m
5026  velement=invec(offset+col)
5027  IF (col.LE.m) THEN
5028  IF(row.EQ.col) THEN
5029  dcnt=dcnt+1
5030  melement=origdiagelement(dcnt)
5031  ELSE
5032  melement=orig(localblkrow)%D(row,col)
5033  END IF
5034  ELSE
5035  melement=orig(localblkrow)%U(row,col-m)
5036  END IF
5037  total=total+melement*velement
5038  END DO
5039  cnt=cnt+1
5040  outvec(cnt)=total
5041  END DO
5042  ELSE IF (globalrow.EQ.n) THEN
5043  DO row=1,m
5044  total=0
5045  DO col=1,2*m
5046  velement=invec(offset+col)
5047  IF (col.LE.m) THEN
5048  melement=orig(localblkrow)%L(row,col)
5049  ELSE
5050  IF (row.EQ.col-m) THEN
5051  dcnt=dcnt+1
5052  melement=origdiagelement(dcnt)
5053  ELSE
5054  melement=orig(localblkrow)%D(row,col-m)
5055  ENDIF
5056  END IF
5057  total=total+melement*velement
5058  END DO
5059  cnt=cnt+1
5060  outvec(cnt)=total
5061  END DO
5062  ELSE
5063  DO row=1,m
5064  total=0
5065  DO col=1,3*m
5066  velement=invec(offset+col)
5067  IF (col.LE.m) THEN
5068  melement=orig(localblkrow)%L(row,col)
5069  ELSE IF (col.GT.m.AND.col.LE.2*m) THEN
5070  IF (row.EQ.col-m) THEN
5071  dcnt=dcnt+1
5072  melement=origdiagelement(dcnt)
5073  ELSE
5074  melement=orig(localblkrow)%D(row,col-m)
5075  END IF
5076  ELSE
5077  melement=orig(localblkrow)%U(row,col-2*m)
5078  END IF
5079  total=total+melement*velement
5080  IF (globalrow.EQ.2.AND.row.EQ.1) THEN
5081  END IF
5082  END DO
5083  cnt=cnt+1
5084  outvec(cnt)=total
5085  END DO
5086  END IF
5087 END DO
5088 
5089 END SUBROUTINE parmatvec
5090 !------------------------------------------------------------------------------
5091 
5092 !------------------------------------------------
5093 SUBROUTINE setblockrowcol( globrow, colnum, buf, opt)
5094  INTEGER :: globrow
5095  REAL(dp), INTENT(IN), DIMENSION(M) :: buf
5096  INTEGER :: colnum, opt
5097  IF (opt.EQ.1) THEN
5098  CALL setmatrixrowcolu( globrow, buf, colnum )
5099  ELSE IF (opt.EQ.2) THEN
5100  CALL setmatrixrowcold( globrow, buf, colnum )
5101  ELSE IF (opt.EQ.3) THEN
5102  CALL setmatrixrowcoll( globrow, buf, colnum )
5103  ELSE
5104  WRITE(*,*) 'Error in diagonal type option'
5105  END IF
5106 END SUBROUTINE setblockrowcol
5107 
5108 !------------------------------------------------
5109 !SPH 7.19.17: ADD COLUMN SCALING ROUTINES
5110 !------------------------------------------------
5111 
5112 
5113 !-------------------------------------------------------------------------------
5114 SUBROUTINE getcolsum(cs)
5115 REAL(dp), DIMENSION(M,startglobrow:endglobrow), INTENT(OUT) :: cs
5116 REAL(dp), PARAMETER :: minScale = 1.e-6_dp
5117 REAL(dp) :: eps, eps0, s1(1), r1(1)
5118 INTEGER :: js
5119 
5120 CALL initscalefactors
5121 DO js = startglobrow, endglobrow
5122  CALL getscalefactors(js,cs(:,js))
5123 END DO
5124 
5125 !FILTER SMALL (OR ZERO) COLUMN SUMS
5126 eps0 = maxval(cs)
5127 
5128 s1(1)=eps0
5129 CALL mpi_allreduce(s1,r1,1,mpi_real8, mpi_max, ns_comm,mpi_err)
5130 eps0 = minscale*r1(1)
5131 
5132 !FILTER COLS OF CS SEPARATELY
5133 DO js = startglobrow, endglobrow
5134  eps = minscale*maxval(cs(:,js))
5135  IF (eps .EQ. zero) eps = eps0
5136  WHERE (cs(:,js) .LT. eps) cs(:,js) = eps
5137 END DO
5138 
5139 cs = one/sqrt(cs)
5140 
5141 CALL finalizescalefactors
5142 
5143 END SUBROUTINE getcolsum
5144 !-------------------------------------------------------------------------------
5145 
5146 !-------------------------------------------------------------------------------
5147 SUBROUTINE initscalefactors
5148 
5149 INTEGER :: top, bot, MPI_ERR, ic, j, k
5150 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
5151 REAL(dp), DIMENSION(:), ALLOCATABLE :: tmp1, tmp2
5152 INTEGER :: numBlockRows
5153 
5154 numblockrows = endglobrow-startglobrow+1
5155 
5156 top=rank-1; IF(rank.EQ.0) top=mpi_proc_null
5157 bot=rank+1; IF(rank.EQ.nranks-1) bot=mpi_proc_null
5158 
5159 ALLOCATE (topscalefac(m),botscalefac(m))
5160 ALLOCATE (tmp1(m),tmp2(m))
5161 
5162 DO ic=1, m
5163  tmp1(ic)=sum(orig(1)%L(:,ic)**2)
5164  tmp2(ic)=sum(orig(numblockrows)%U(:,ic)**2)
5165 END DO
5166 
5167 CALL mpi_sendrecv(tmp1,m,mpi_real8,top,1,&
5168  botscalefac,m,mpi_real8,bot,1,ns_comm,mpi_stat,mpi_err)
5169 
5170 CALL mpi_sendrecv(tmp2,m,mpi_real8,bot,1,&
5171  topscalefac,m,mpi_real8,top,1,ns_comm,mpi_stat,mpi_err)
5172 
5173 DEALLOCATE(tmp1, tmp2)
5174 
5175 END SUBROUTINE initscalefactors
5176 !-------------------------------------------------------------------------------
5177 
5178 !-------------------------------------------------------------------------------
5179 SUBROUTINE getscalefactors(js,scalevector)
5180 IMPLICIT NONE
5181 
5182 INTEGER, INTENT(IN) :: js
5183 REAL(dp), INTENT(OUT) :: scalevector(M)
5184 REAL(dp) :: eps
5185 INTEGER :: ic, ir, numBlockRows
5186 
5187 numblockrows = endglobrow-startglobrow+1
5188 
5189 IF (js.EQ.startglobrow) THEN
5190  IF (js.EQ.1) THEN
5191  DO ic=1, m
5192  scalevector(ic)=sum(orig(1)%D(:,ic)*orig(1)%D(:,ic)) + &
5193  sum(orig(2)%L(:,ic)*orig(2)%L(:,ic))
5194  END DO
5195  ELSE
5196  ir = js-startglobrow+1
5197  DO ic=1, m
5198  scalevector(ic)=sum(orig(ir)%D(:,ic)*orig(ir)%D(:,ic)) + &
5199  topscalefac(ic) + sum(orig(ir+1)%L(:,ic)*orig(ir+1)%L(:,ic))
5200  END DO
5201  END IF
5202 ELSE IF (js.EQ.endglobrow) THEN
5203  IF (js.EQ.n) THEN
5204  DO ic=1, m
5205  scalevector(ic)=sum(orig(numblockrows)%D(:,ic)*orig(numblockrows)%D(:,ic)) + &
5206  sum(orig(numblockrows-1)%U(:,ic)*orig(numblockrows-1)%U(:,ic))
5207  END DO
5208  ELSE
5209  DO ic=1, m
5210  scalevector(ic)=sum(orig(numblockrows)%D(:,ic)*orig(numblockrows)%D(:,ic)) + &
5211  sum(orig(numblockrows-1)%U(:,ic)*orig(numblockrows-1)%U(:,ic)) + botscalefac(ic)
5212  END DO
5213  END IF
5214 ELSE
5215  ir = js-startglobrow+1
5216  DO ic=1, m
5217  scalevector(ic)=sum(orig(ir)%D(:,ic)*orig(ir)%D(:,ic)) + &
5218  sum(orig(ir-1)%U(:,ic)*orig(ir-1)%U(:,ic)) + &
5219  sum(orig(ir+1)%L(:,ic)*orig(ir+1)%L(:,ic))
5220  END DO
5221 END IF
5222 
5223 IF (any(scalevector .LT. zero)) stop 'SCALEVECTOR < 0!'
5224 scalevector = sqrt(scalevector)
5225 
5226 END SUBROUTINE getscalefactors
5227 !-------------------------------------------------------------------------------
5228 
5229 !-------------------------------------------------------------------------------
5230 SUBROUTINE finalizescalefactors
5231  DEALLOCATE (topscalefac,botscalefac)
5232 END SUBROUTINE finalizescalefactors
5233 !-------------------------------------------------------------------------------
5234 
5235 !-------------------------------------------------------------------------------
5236 SUBROUTINE parallelscaling(lmp,colsum)
5237 
5238 REAL(dp), INTENT(IN) :: lmp
5239 REAL(dp), DIMENSION(M,startglobrow:endglobrow), INTENT(IN) :: colsum
5240 
5241 INTEGER :: js, icol, ir, top, bot, MPI_ERR, i4, i3, dcnt
5242 REAL(dp) :: eps, s1(1), r1(1)
5243 REAL(dp), DIMENSION(:), ALLOCATABLE :: toprow, botrow
5244 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
5245 
5246 top=rank-1; IF(rank.EQ.0) top=mpi_proc_null
5247 bot=rank+1; IF(rank.EQ.nranks-1) bot=mpi_proc_null
5248 ALLOCATE (toprow(m),botrow(m))
5249 
5250 l_colscale = .true.
5251 
5252 CALL mpi_sendrecv(colsum(:,endglobrow),m,mpi_real8,bot,1,&
5253  toprow,m,mpi_real8,top,1,ns_comm,mpi_stat,mpi_err)
5254 
5255 CALL mpi_sendrecv(colsum(:,startglobrow),m,mpi_real8,top,1,&
5256  botrow,m,mpi_real8,bot,1,ns_comm,mpi_stat,mpi_err)
5257 
5258 dcnt = 0
5259 DO js = startglobrow, endglobrow
5260  ir=js-startglobrow+1
5261  DO icol = 1, m
5262  lelement(1,ir)%D(:,icol) = colsum(:,js)*orig(ir)%D(:,icol)*colsum(icol,js)
5263  orig(ir)%D(:,icol) = lelement(1,ir)%D(:,icol)
5264  dcnt=dcnt+1
5265  origdiagelement(dcnt) = orig(ir)%D(icol,icol)
5266  IF (js .GT. 1) THEN
5267  IF (ir.EQ.1) THEN
5268  lelement(1,ir)%L(:,icol) = colsum(:,js)*orig(ir)%L(:,icol)*toprow(icol)
5269  ELSE
5270  lelement(1,ir)%L(:,icol) = colsum(:,js)*orig(ir)%L(:,icol)*colsum(icol,js-1)
5271  END IF
5272  orig(ir)%L(:,icol) = lelement(1,ir)%L(:,icol)
5273  END IF
5274  IF (js .LT. n) THEN
5275  IF (ir.EQ.(endglobrow-startglobrow+1)) THEN
5276  lelement(1,ir)%U(:,icol) = colsum(:,js)*orig(ir)%U(:,icol)*botrow(icol)
5277  ELSE
5278  lelement(1,ir)%U(:,icol) = colsum(:,js)*orig(ir)%U(:,icol)*colsum(icol,js+1)
5279  END IF
5280  orig(ir)%U(:,icol) = lelement(1,ir)%U(:,icol)
5281  END IF
5282  END DO
5283 END DO
5284 DEALLOCATE (toprow,botrow)
5285 
5286 !Find DMIN_TRI, MAXEIGEN_TRI used for scaling
5287 CALL findminmax_tri(lmp)
5288 
5289 END SUBROUTINE parallelscaling
5290 !-------------------------------------------------------------------------------
5291 
5292 
5293 !-------------------------------------------------------------------------------
5294 SUBROUTINE findminmax_tri(lmp)
5295 !SPH 091616: Find min diagonal element and max eigenvalue (Gerschgorin Theorem)
5296 REAL(dp), PARAMETER :: minThreshold=1.e-6_dp
5297 REAL(dp), INTENT(IN) :: lmp
5298 INTEGER :: js, icol, ir, dcnt
5299 REAL(dp) :: eps, s1(2), r1(2), minDiag
5300 
5301 dcnt=0
5302 maxeigen_tri = 0
5303 dmin_tri = huge(dmin_tri)
5304 DO js = startglobrow, endglobrow
5305  ir=js-startglobrow+1
5306 
5307 !Minimum allowable diagonal element after scaling
5308  mindiag = minthreshold*maxval(abs(origdiagelement(dcnt+1:dcnt+m)))
5309  IF (mindiag .EQ. 0) mindiag = minthreshold
5310 
5311  DO icol = 1, m
5312  dcnt = dcnt+1
5313  eps = origdiagelement(dcnt)
5314 
5315  IF (abs(eps) .LE. mindiag) THEN !Can happen for lcolscale=FALSE
5316  eps = -mindiag
5317  origdiagelement(dcnt) = eps
5318  orig(ir)%D(icol,icol) = eps
5319  lelement(1,ir)%D(icol,icol) = eps
5320  ELSE
5321  IF (js .LT. n) dmin_tri = min(max(1.e-12_dp,abs(eps)), dmin_tri)
5322  END IF
5323 
5324  maxeigen_tri = max(maxeigen_tri, sum(abs(orig(ir)%L(icol,:)) + abs(orig(ir)%D(icol,:)) &
5325  + abs(orig(ir)%U(icol,:))))
5326  END DO
5327 END DO
5328 
5329 s1(1)=-dmin_tri; s1(2)=maxeigen_tri
5330 CALL mpi_allreduce(s1,r1,2,mpi_real8, mpi_max, ns_comm,mpi_err)
5331 dmin_tri=sign(-r1(1), eps)
5332 maxeigen_tri=r1(2)
5333 
5334 !ADD LEVENBERG PARAMETER LM*DMIN TO DIAGONAL [OR D*(1+LM) IF NO COL-SCALING]
5335 CALL resetdiagonal(lmp, .false.)
5336 
5337 END SUBROUTINE findminmax_tri
5338 !-------------------------------------------------------------------------------
5339 
5340 
5341 !-------------------------------------------------------------------------------
5342 SUBROUTINE resetdiagonal(lmp, bReset)
5343 REAL(dp), INTENT(IN) :: lmp
5344 LOGICAL, INTENT(IN) :: bReset
5345 REAL(dp) :: DminL, eps
5346 INTEGER :: dcnt, js, ir, icol
5347 
5348 !ADD CONSTANT (SCALED BY DMIN) LEVENBERG PARAMETER TO DIAGONAL [NOT D*(1+LM)]
5349 dminl = dmin_tri*lmp
5350 dcnt = 0
5351 DO js = startglobrow, endglobrow
5352  ir=js-startglobrow+1
5353  DO icol = 1, m
5354  dcnt=dcnt+1
5355  eps = origdiagelement(dcnt)
5356 !SPH 11-14-16: remove redundancy of edge force K^s=0? Works better without this
5357 ! IF (js.EQ.N .AND. icol.GT.(M/3)) DminL = Dmin_TRI*MAX(ABS(lmp),1.E-7_dp)
5358  IF (l_colscale) THEN
5359  lelement(1,ir)%D(icol,icol) = eps + sign(dminl,eps)
5360  ELSE
5361  lelement(1,ir)%D(icol,icol) = eps*(1 + abs(lmp))
5362  END IF
5363  orig(ir)%D(icol,icol) = lelement(1,ir)%D(icol,icol)
5364  END DO
5365 
5366  IF (breset) THEN
5367  lelement(1,ir)%L = orig(ir)%L
5368  lelement(1,ir)%D = orig(ir)%D
5369  lelement(1,ir)%U = orig(ir)%U
5370  lelement(1,ir)%pivot = 0
5371  END IF
5372 END DO
5373 
5374 END SUBROUTINE resetdiagonal
5375 
5376 !-------------------------------------------------------------------------------
5377 
5378 #endif
5379 
5380 END MODULE blocktridiagonalsolver
5381 !-------------------------------------------------------------------------------
blocktridiagonalsolver::blacsparameters
BLACS/PBLAS information.
Definition: blocktridiagonalsolver.f90:156
blocktridiagonalsolver::mastertoslavemapping
Master-to-slave mapping.
Definition: blocktridiagonalsolver.f90:172
blocktridiagonalsolver::levelelement
Data associated with each row at each level.
Definition: blocktridiagonalsolver.f90:31
blocktridiagonalsolver::pblasstats
Definition: blocktridiagonalsolver.f90:218
mpi_inc
Umbrella module avoid multiple inlcudes of the mpif.h header.
Definition: mpi_inc.f:11
blocktridiagonalsolver::timecount
Statistics (timing, etc.)
Definition: blocktridiagonalsolver.f90:213
blocktridiagonalsolver
Solver for block tri-diagonal matrices. [Kalyan S. Perumalla, ORNL, 2009-2011].
Definition: blocktridiagonalsolver.f90:8
blocktridiagonalsolver::solutionelement
Solution of selected rows of interest to this rank.
Definition: blocktridiagonalsolver.f90:41
blocktridiagonalsolver::pblaslevelparameters
Level-specific PBLAS information.
Definition: blocktridiagonalsolver.f90:183
blocktridiagonalsolver::pblastemparray
Definition: blocktridiagonalsolver.f90:228
blocktridiagonalsolver::blacsprocessgrid
BLACS/PBLAS process grid information.
Definition: blocktridiagonalsolver.f90:144