V3FIT
blocktridiagonalsolver_bst.f90
1 !-------------------------------------------------------------------------------
2 MODULE blocktridiagonalsolver_bst
3 USE mpi_inc
4 USE parallel_include_module, ONLY: stopmpi
5 USE parallel_include_module, ONLY: tofu
6 USE parallel_include_module, ONLY: ns_comm
7 USE parallel_include_module, ONLY: grank, gnranks
8 USE parallel_include_module, ONLY: rank, nranks
9 USE parallel_include_module, ONLY: ns_resltn
10 
11 USE parallel_vmec_module, ONLY: bcyclic_comp_time
12 USE parallel_vmec_module, ONLY: bcyclic_comm_time
13 USE parallel_vmec_module, ONLY: waitall_time
14 USE parallel_vmec_module, ONLY: dgemm_time
15 USE parallel_vmec_module, ONLY: dgemv_time
16 USE parallel_vmec_module, ONLY: dgetrf_time
17 USE parallel_vmec_module, ONLY: dgetrs_time
18 USE parallel_vmec_module, ONLY: forwardsolveloop_time
19 USE parallel_vmec_module, ONLY: t
20 IMPLICIT NONE
21 !-------------------------------------------------------------------------------
22 ! Precision settings
23 !-------------------------------------------------------------------------------
24 INTEGER, PARAMETER :: rprec = selected_real_kind(15,300)
25 INTEGER, PARAMETER :: iprec = selected_int_kind(8)
26 INTEGER, PARAMETER :: cprec = kind((1.0_rprec,1.0_rprec))
27 INTEGER, PARAMETER :: dp = rprec
28 
29 !-------------------------------------------------------------------------------
33 !-------------------------------------------------------------------------------
35  REAL(dp), ALLOCATABLE :: L(:,:), D(:,:), U(:,:), b(:,:)
36  INTEGER, ALLOCATABLE :: pivot(:)
37 END TYPE levelelement
38 
39 !-------------------------------------------------------------------------------
43 !-------------------------------------------------------------------------------
45  REAL(dp), ALLOCATABLE :: x(:) !Vector of size M
46 END TYPE solutionelement
47 
48 !-------------------------------------------------------------------------------
55 !-------------------------------------------------------------------------------
56 TYPE (LevelElement), ALLOCATABLE :: lelement(:,:)
57 
58 !-------------------------------------------------------------------------------
63 !-------------------------------------------------------------------------------
64 TYPE (LevelElement), ALLOCATABLE :: orig(:)
65 
66 !-------------------------------------------------------------------------------
71 !-------------------------------------------------------------------------------
72 TYPE (SolutionElement), ALLOCATABLE :: selement(:)
73 
74 !-------------------------------------------------------------------------------
75 !INTEGER :: rank !<This MPI task's rank
76 !INTEGER :: nranks !<Num of MPI tasks
77 INTEGER :: P
78 INTEGER :: N
79 INTEGER :: M
80 INTEGER :: startglobrow
81 INTEGER :: endglobrow
82 INTEGER :: nlevels
83 LOGICAL :: matdirtied
84 LOGICAL :: rhsdirtied
85 
86 !-------------------------------------------------------------------------------
87 CHARACTER*100 :: kenvvar
88 CHARACTER*100 :: kenvval
89 LOGICAL :: KPDBG
90 INTEGER :: OFU
91 INTEGER :: PFU
92 LOGICAL :: writeproblemfile
93 LOGICAL :: writesolution
94 LOGICAL :: usebarriers
95 REAL :: membytes
96 REAL :: dpsz
97 REAL :: intsz
98 REAL :: ptrsz
99 REAL(dp) :: ONE
100 REAL(dp) :: ZERO
101 REAL(dp) :: skston, skstoff
102 
103 
104 !-------------------------------------------------------------------------------
105 LOGICAL :: use_mpiwtime
106 DOUBLE PRECISION :: loctimer1, loctimer2
107 DOUBLE PRECISION :: mattimer1, mattimer2
108 DOUBLE PRECISION :: globtimer1, globtimer2
109 DOUBLE PRECISION :: timerfreq
110 DOUBLE PRECISION :: tottime
111 INTEGER :: totcount
112 DOUBLE PRECISION :: totcommtime
113 INTEGER :: totcommcount
114 DOUBLE PRECISION :: totinvtime
115 INTEGER :: totinvcount
116 DOUBLE PRECISION :: totmatmultime
117 INTEGER :: totmatmulcount
118 DOUBLE PRECISION :: totmatsoltime
119 INTEGER :: totmatsolcount
120 !-------------------------------------------------------------------------------
121 
122 !-------------------------------------------------------------------------------
123 !-------------------------------------------------------------------------------
127 !-------------------------------------------------------------------------------
128 LOGICAL :: doblasonly
129 LOGICAL :: doblacscomm
130 
131 !-------------------------------------------------------------------------------
135 !-------------------------------------------------------------------------------
137  INTEGER :: myrow, mycol
138  INTEGER :: nrows, ncols
139  INTEGER :: blockszrows, blockszcols
140  INTEGER, ALLOCATABLE :: map(:,:)
141 END TYPE blacsprocessgrid
142 
143 !-------------------------------------------------------------------------------
147 !-------------------------------------------------------------------------------
149  INTEGER :: iam
150  INTEGER :: nprocs
151  INTEGER :: maincontext
152  INTEGER :: levelcontext
153  TYPE(BlacsProcessGrid) :: pgrid
154  INTEGER :: nbpp
155 END TYPE blacsparameters
156 
157 TYPE(blacsparameters) :: blacs
158 
159 !-------------------------------------------------------------------------------
163 !-------------------------------------------------------------------------------
165  INTEGER :: masterrank
166  INTEGER :: nslaves
167  INTEGER, ALLOCATABLE :: slaveranks(:)
168 END TYPE mastertoslavemapping
169 
170 !-------------------------------------------------------------------------------
174 !-------------------------------------------------------------------------------
176  LOGICAL :: ammaster
177  TYPE(MasterToSlaveMapping) :: msmap
178 
179  INTEGER :: mpicomm, mpitag, mpierr
180 #if defined(MPI_OPT)
181  INTEGER :: mpistatus(MPI_STATUS_SIZE)
182 #endif
183 
184 END TYPE pblaslevelparameters
185 
186 TYPE(PBLASLevelParameters) :: pblas
187 
188 !-------------------------------------------------------------------------------
192 !-------------------------------------------------------------------------------
193 INTEGER, PARAMETER :: OP_NONE = 0
194 INTEGER, PARAMETER :: OP_DONE = 1
195 INTEGER, PARAMETER :: OP_DGEMM = 2
196 INTEGER, PARAMETER :: OP_DGEMV = 3
197 INTEGER, PARAMETER :: OP_DGETRF = 4
198 INTEGER, PARAMETER :: OP_DGETRS = 5
199 
200 !-------------------------------------------------------------------------------
204 !-------------------------------------------------------------------------------
206  DOUBLE PRECISION :: tm
207  INTEGER :: cnt
208  DOUBLE PRECISION :: t1, t2
209 END TYPE timecount
211  TYPE(TimeCount) :: wait, comm, comp
212  TYPE(TimeCount) :: mm, trf, pmm, ptrf
213  TYPE(TimeCount) :: mma, mmb, mmc, mmalpha, mmbeta, mmrc
214  TYPE(TimeCount) :: extract, waitall
215 END TYPE pblasstats
216 
217 TYPE(PBLASStats) :: pstats
218 
219 !-------------------------------------------------------------------------------
221  DOUBLE PRECISION, ALLOCATABLE :: temparray(:)
222 END TYPE pblastemparray
223 
224 !-------------------------------------------------------------------------------
225 
226 CONTAINS
227 
228 !-------------------------------------------------------------------------------
232 !-------------------------------------------------------------------------------
233 SUBROUTINE bclockinit()
234  !-----------------------------------------------------------------------------
235  ! Local variables
236  !-----------------------------------------------------------------------------
237  INTEGER :: tempint
238 
239  IF ( use_mpiwtime ) THEN
240  timerfreq = 1.0
241  ELSE
242  CALL system_clock(count_rate=tempint)
243  timerfreq = tempint
244  END IF
245 END SUBROUTINE bclockinit
246 
247 !-------------------------------------------------------------------------------
251 !-------------------------------------------------------------------------------
252 SUBROUTINE bsystemclock( ts )
253  !-----------------------------------------------------------------------------
254  ! Formal arguments
255  !-----------------------------------------------------------------------------
256  DOUBLE PRECISION, INTENT(INOUT) :: ts
257 
258  !-----------------------------------------------------------------------------
259  ! Local variables
260  !-----------------------------------------------------------------------------
261  INTEGER :: tempint
262  DOUBLE PRECISION :: tempdbl
263 
264  IF ( use_mpiwtime ) THEN
265 #if defined(MPI_OPT)
266  tempdbl = mpi_wtime()
267  ts = tempdbl
268 #endif
269  ELSE
270  CALL system_clock( tempint )
271  ts = tempint
272  END IF
273 END SUBROUTINE bsystemclock
274 
275 !-------------------------------------------------------------------------------
279 !-------------------------------------------------------------------------------
280 SUBROUTINE fl( u )
281  !-----------------------------------------------------------------------------
282  ! Formal arguments
283  !-----------------------------------------------------------------------------
284  INTEGER, INTENT(IN) :: u
285 
286  CALL flush( u )
287 END SUBROUTINE fl
288 
289 !-------------------------------------------------------------------------------
293 !-------------------------------------------------------------------------------
294 SUBROUTINE chargememory( bytes )
295  !-----------------------------------------------------------------------------
296  ! Formal arguments
297  !-----------------------------------------------------------------------------
298  REAL, INTENT(IN) :: bytes
299 
300  !-----------------------------------------------------------------------------
301  membytes = membytes + bytes
302 END SUBROUTINE chargememory
303 
304 !-------------------------------------------------------------------------------
308 !-------------------------------------------------------------------------------
309 SUBROUTINE chargetime( tot, t2, t1, cnt )
310  !-----------------------------------------------------------------------------
311  ! Formal arguments
312  !-----------------------------------------------------------------------------
313  DOUBLE PRECISION, INTENT(INOUT) :: tot
314  DOUBLE PRECISION, INTENT(IN) :: t2
315  DOUBLE PRECISION, INTENT(IN) :: t1
316  INTEGER, INTENT(INOUT) :: cnt
317 
318  !-----------------------------------------------------------------------------
319  tot = tot + (real(t2-t1))/timerfreq
320  cnt = cnt + 1
321 END SUBROUTINE chargetime
322 
323 !-------------------------------------------------------------------------------
324 !-------------------------------------------------------------------------------
328 !-------------------------------------------------------------------------------
329 SUBROUTINE timecountinit( tc )
330  !----------------------------------------------
331  ! Formal arguments
332  !----------------------------------------------
333  TYPE(TimeCount), INTENT(INOUT) :: tc
334 
335  !----------------------------------------------
336  tc%tm = 0
337  tc%cnt = 0
338 END SUBROUTINE timecountinit
339 
340 !-------------------------------------------------------------------------------
344 !-------------------------------------------------------------------------------
345 SUBROUTINE timecountprint( tc, msg )
346  !----------------------------------------------
347  ! Formal arguments
348  !----------------------------------------------
349  TYPE(TimeCount), INTENT(INOUT) :: tc
350  CHARACTER(*), INTENT(IN) :: msg
351  !----------------------------------------------
352  ! Local Variables
353  !----------------------------------------------
354  DOUBLE PRECISION :: avg
355 
356  !----------------------------------------------
357  avg = 0
358  IF (tc%cnt .GT. 0) avg = tc%tm / tc%cnt
359  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') msg,tc%cnt,' * ',avg,' sec = ',tc%tm,' sec'
360 END SUBROUTINE timecountprint
361 
362 !-------------------------------------------------------------------------------
366 !-------------------------------------------------------------------------------
367 SUBROUTINE plbinitstats()
368  CALL timecountinit( pstats%wait )
369  CALL timecountinit( pstats%comm )
370  CALL timecountinit( pstats%comp )
371  CALL timecountinit( pstats%mm )
372  CALL timecountinit( pstats%trf )
373  CALL timecountinit( pstats%pmm )
374  CALL timecountinit( pstats%ptrf )
375 
376  CALL timecountinit( pstats%mma )
377  CALL timecountinit( pstats%mmb )
378  CALL timecountinit( pstats%mmc )
379  CALL timecountinit( pstats%mmalpha )
380  CALL timecountinit( pstats%mmbeta )
381  CALL timecountinit( pstats%mmrc )
382  CALL timecountinit( pstats%extract )
383  CALL timecountinit( pstats%waitall )
384 END SUBROUTINE plbinitstats
385 
386 !-------------------------------------------------------------------------------
390 !-------------------------------------------------------------------------------
391 SUBROUTINE plbprintstats()
392  CALL timecountprint( pstats%wait, 'PBLAS Wait ' )
393  CALL timecountprint( pstats%comm, 'PBLAS Comm ' )
394  CALL timecountprint( pstats%comp, 'PBLAS Comp ' )
395  CALL timecountprint( pstats%mm, 'PBLAS MM ' )
396  CALL timecountprint( pstats%trf, 'PBLAS TRF ' )
397  CALL timecountprint( pstats%pmm, 'PBLAS PMM ' )
398  CALL timecountprint( pstats%ptrf, 'PBLAS PTRF ' )
399 
400  CALL timecountprint( pstats%mma, 'PBLAS MMA ' )
401  CALL timecountprint( pstats%mmb, 'PBLAS MMB ' )
402  CALL timecountprint( pstats%mmc, 'PBLAS MMC ' )
403  CALL timecountprint( pstats%mmalpha, 'PBLAS MMalpha ' )
404  CALL timecountprint( pstats%mmbeta, 'PBLAS MMbeta ' )
405  CALL timecountprint( pstats%mmrc, 'PBLAS MMRC ' )
406  CALL timecountprint( pstats%extract, 'PBLAS Extract ' )
407  CALL timecountprint( pstats%waitall, 'PBLAS Waitall ' )
408 END SUBROUTINE plbprintstats
409 
410 !-------------------------------------------------------------------------------
414 !-------------------------------------------------------------------------------
415 SUBROUTINE plbinitialize
416  !----------------------------------------------
417  ! Local Variables
418  !----------------------------------------------
419  CHARACTER*100 :: envvar
420  CHARACTER*100 :: envval
421 
422  IF(kpdbg) WRITE(ofu,*) 'PLBInitialize Started'; CALL fl(ofu)
423 
424  !----------------------------------------------
425  doblasonly = .true.
426  IF ( m .GE. 2048 ) THEN
427  doblasonly = .true.
428  envvar = 'BLOCKTRI_BLASONLY'
429  CALL getenv( envvar, envval )
430  IF ( envval .EQ. 'TRUE' ) THEN
431  doblasonly = .true.
432  IF(kpdbg) WRITE(ofu,*) 'BLAS ONLY -- obeying env var ', envvar; CALL fl(ofu)
433  END IF
434  END IF
435  IF(kpdbg) WRITE(ofu,*) 'doblasonly = ', doblasonly; CALL fl(ofu)
436 
437  !----------------------------------------------
438  doblacscomm = .false.
439  envvar = 'BLOCKTRI_BLACSCOMM'
440  CALL getenv( envvar, envval )
441  IF ( envval .EQ. 'TRUE' ) THEN
442  doblacscomm = .true.
443  IF(kpdbg) WRITE(ofu,*) 'BLACS COMM -- obeying env var ', envvar; CALL fl(ofu)
444  END IF
445  IF(kpdbg) WRITE(ofu,*) 'doblacscomm = ', doblacscomm; CALL fl(ofu)
446 
447  !----------------------------------------------
448  blacs%nbpp = 1
449  envvar = 'BLOCKTRI_NBPP'
450  CALL getenv( envvar, envval )
451  IF ( envval .NE. '' ) THEN
452  READ( envval, *) blacs%nbpp
453  IF(kpdbg) WRITE(ofu,*) 'NBPP -- obeying env var ', envvar; CALL fl(ofu)
454  END IF
455  IF(kpdbg) WRITE(ofu,*) 'NBPP = ', blacs%nbpp; CALL fl(ofu)
456 
457  !----------------------------------------------
458  CALL plbinitstats()
459 
460  !----------------------------------------------
461  IF (doblasonly) THEN
462  IF(kpdbg) WRITE(ofu,*) 'BLAS only (not using PBLAS)'; CALL fl(ofu)
463  ELSE
464 #if defined(MPI_OPT)
465  CALL blacs_pinfo( blacs%iam, blacs%nprocs )
466  IF(kpdbg) WRITE(ofu,*) 'BLACS_PINFO ', blacs%iam, ' ', blacs%nprocs; CALL fl(ofu)
467  CALL blacs_get( 0, 0, blacs%maincontext )
468  IF(kpdbg) WRITE(ofu,*) 'BLACS_GET ', blacs%maincontext; CALL fl(ofu)
469  CALL blacs_gridinit( blacs%maincontext, 'R', 1, blacs%nprocs )
470  IF(kpdbg) WRITE(ofu,*) 'BLACS_GRIDINIT'; CALL fl(ofu)
471  CALL blacs_barrier( blacs%maincontext, 'All' )
472 #endif
473  END IF
474 
475  IF(kpdbg) WRITE(ofu,*) 'PLBInitialize Done'; CALL fl(ofu)
476 END SUBROUTINE plbinitialize
477 
478 !-------------------------------------------------------------------------------
482 !-------------------------------------------------------------------------------
483 SUBROUTINE plbfinalize
484 
485  IF(kpdbg) WRITE(ofu,*) 'PLBFinalize Started'; CALL fl(ofu)
486  IF (doblasonly) THEN
487  IF(kpdbg) WRITE(ofu,*) 'BLAS only (not using PBLAS)'; CALL fl(ofu)
488  ELSE
489 #if defined(MPI_OPT)
490  CALL blacs_barrier( blacs%maincontext, 'All' )
491  IF(kpdbg) WRITE(ofu,*) 'BLACS_BARRIER'; CALL fl(ofu)
492  CALL blacs_exit(1)
493  IF(kpdbg) WRITE(ofu,*) 'BLACS_EXIT nprocs= ', blacs%nprocs; CALL fl(ofu)
494  CALL plbprintstats()
495 #endif
496  END IF
497  IF(kpdbg) WRITE(ofu,*) 'PLBFinalize Done'; CALL fl(ofu)
498 END SUBROUTINE plbfinalize
499 !-------------------------------------------------------------------------------
500 
501 
502 !-------------------------------------------------------------------------------
506 !-------------------------------------------------------------------------------
507 SUBROUTINE plbforwardinitializelevel( lvl, ammaster )
508  !----------------------------------------------
509  ! Formal arguments
510  !----------------------------------------------
511  INTEGER :: lvl
512  LOGICAL :: ammaster
513  !----------------------------------------------
514  ! Local Variables
515  !----------------------------------------------
516  INTEGER :: I, J, K
517  INTEGER :: maxslaves, actualslaves
518 
519  !----------------------------------------------
520  !Short-circuit PBLAS to BLAS
521  IF ( doblasonly ) THEN
522  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel BLAS only'; CALL fl(ofu)
523  RETURN
524  END IF
525 
526  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Started', ammaster; CALL fl(ofu)
527 
528 #if defined(MPI_OPT)
529  !----------------------------------------------
530  pblas%ammaster = ammaster
531  pblas%msmap%masterrank = -1
532  pblas%msmap%nslaves = 0
533  CALL determinemasterslaveranks()
534 
535  !----------------------------------------------
536  !Set up BLACS process grid and a new context
537  blacs%levelcontext = -1
538  CALL blacs_get( blacs%maincontext, 10, blacs%levelcontext )
539  IF(kpdbg) WRITE(ofu,*) 'Created new context'; CALL fl(ofu)
540  IF(kpdbg) WRITE(ofu,*) 'Nslaves ',pblas%msmap%nslaves; CALL fl(ofu)
541 
542  blacs%pgrid%blockszrows = 64
543  IF ( blacs%pgrid%blockszrows .GT. m ) blacs%pgrid%blockszrows = m
544  blacs%pgrid%blockszcols = blacs%pgrid%blockszrows
545  IF(kpdbg) WRITE(ofu,*) 'Block NR=',blacs%pgrid%blockszrows; CALL fl(ofu)
546  IF(kpdbg) WRITE(ofu,*) 'Block NC=',blacs%pgrid%blockszcols; CALL fl(ofu)
547 
548  maxslaves = (m*m) / (blacs%nbpp*blacs%nbpp* &
549  blacs%pgrid%blockszrows*blacs%pgrid%blockszcols)
550  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
551  IF ( maxslaves .LT. 1 ) maxslaves = 1
552  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
553  IF ( maxslaves .GT. pblas%msmap%nslaves ) maxslaves = pblas%msmap%nslaves
554  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
555  actualslaves = maxslaves
556  IF(kpdbg) WRITE(ofu,*) ' Actual slaves ', actualslaves; CALL fl(ofu)
557 
558  blacs%pgrid%nrows = int( sqrt( real( actualslaves ) ) )
559  IF ( blacs%pgrid%nrows .LT. 1 ) blacs%pgrid%nrows = 1
560  blacs%pgrid%ncols = int( actualslaves / blacs%pgrid%nrows )
561  IF(kpdbg) WRITE(ofu,*) 'NR=',blacs%pgrid%nrows,' NC=',blacs%pgrid%ncols; CALL fl(ofu)
562  ALLOCATE( blacs%pgrid%map( 1 : blacs%pgrid%nrows, 1 : blacs%pgrid%ncols ) )
563  k = 0
564  DO i = 1, blacs%pgrid%nrows
565  DO j = 1, blacs%pgrid%ncols
566  blacs%pgrid%map(i,j) = pblas%msmap%slaveranks(k+1)
567  k = k + 1
568  END DO
569  END DO
570  IF(kpdbg) WRITE(ofu,*) 'NR*NC=',k; CALL fl(ofu)
571  CALL blacs_gridmap( blacs%levelcontext, blacs%pgrid%map, &
572  blacs%pgrid%nrows, blacs%pgrid%nrows, blacs%pgrid%ncols )
573  IF(kpdbg) WRITE(ofu,*) 'GridMap done'; CALL fl(ofu)
574  CALL blacs_gridinfo( blacs%levelcontext, &
575  blacs%pgrid%nrows, blacs%pgrid%ncols, &
576  blacs%pgrid%myrow, blacs%pgrid%mycol )
577  IF(kpdbg) WRITE(ofu,*) 'GridInfo done'; CALL fl(ofu)
578  IF(kpdbg) WRITE(ofu,*) 'Myrowcol ',blacs%pgrid%myrow,' ',blacs%pgrid%mycol; CALL fl(ofu)
579 
580  !----------------------------------------------
581  pblas%mpicomm = ns_comm
582  pblas%mpitag = 1234
583 
584  !----------------------------------------------
585  CALL blacs_barrier( blacs%maincontext, 'All' )
586  IF(kpdbg) WRITE(ofu,*) 'BLACS_BARRIER'; CALL fl(ofu)
587 
588  !----------------------------------------------
589  IF ( ammaster ) THEN
590  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Master'; CALL fl(ofu)
591  ELSE
592  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Slave'; CALL fl(ofu)
593  CALL slaveservice()
594  END IF
595 
596  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Done', ammaster; CALL fl(ofu)
597 #endif
598 
599 END SUBROUTINE plbforwardinitializelevel
600 
601 !-------------------------------------------------------------------------------
605 !-------------------------------------------------------------------------------
606 #if defined(MPI_OPT)
607 SUBROUTINE plbforwardfinalizelevel( lvl, ammaster )
608  !----------------------------------------------
609  ! Formal arguments
610  !----------------------------------------------
611  INTEGER :: lvl
612  LOGICAL :: ammaster
613 
614  !----------------------------------------------
615  !Short-circuit PBLAS to BLAS
616  IF ( doblasonly ) THEN
617  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel BLAS only'; CALL fl(ofu)
618  RETURN
619  END IF
620  !----------------------------------------------
621  !If I'm master, tell slaves that they're done
622  IF ( ammaster .AND. (pblas%msmap%nslaves .GT. 1) ) THEN
623  CALL masterbcastnextop( op_done )
624  END IF
625 
626  IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
627  (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) ) THEN
628  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel pariah !level-barrier';CALL fl(ofu)
629  ELSE
630  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel level-barrier'; CALL fl(ofu)
631  CALL blacs_barrier( blacs%levelcontext, 'All' )
632  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel level grid exit'; CALL fl(ofu)
633  CALL blacs_gridexit( blacs%levelcontext )
634  END IF
635 
636  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel main-barrier'; CALL fl(ofu)
637  CALL blacs_barrier( blacs%maincontext, 'All' )
638 
639  !----------------------------------------------
640  pblas%msmap%masterrank = -1
641  pblas%msmap%nslaves = 0
642  DEALLOCATE( pblas%msmap%slaveranks )
643  DEALLOCATE( blacs%pgrid%map )
644 
645  !----------------------------------------------
646  CALL plbprintstats()
647 
648  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel ', ammaster; CALL fl(ofu)
649 
650 END SUBROUTINE plbforwardfinalizelevel
651 #endif
652 
653 !-------------------------------------------------------------------------------
657 !-------------------------------------------------------------------------------
658 SUBROUTINE plbbackwardinitializelevel( lvl, ammaster )
659  !----------------------------------------------
660  ! Formal arguments
661  !----------------------------------------------
662  INTEGER :: lvl
663  LOGICAL :: ammaster
664 
665  !----------------------------------------------
666  ! Nothing to do
667 END SUBROUTINE plbbackwardinitializelevel
668 
669 !-------------------------------------------------------------------------------
673 !-------------------------------------------------------------------------------
674 SUBROUTINE plbbackwardfinalizelevel( lvl, ammaster )
675  !----------------------------------------------
676  ! Formal arguments
677  !----------------------------------------------
678  INTEGER :: lvl
679  LOGICAL :: ammaster
680 
681  !----------------------------------------------
682  !Nothing to do
683 END SUBROUTINE plbbackwardfinalizelevel
684 
685 !-------------------------------------------------------------------------------
689 !-------------------------------------------------------------------------------
690 #if defined(MPI_OPT)
691 SUBROUTINE determinemasterslaveranks()
692  !----------------------------------------------
693  ! Local Variables
694  !----------------------------------------------
695  LOGICAL, ALLOCATABLE :: send_ammaster(:), recv_ammaster(:), assigned(:)
696  TYPE(MasterToSlaveMapping), ALLOCATABLE :: allmsmaps(:)
697  INTEGER :: totmasters, nslavespermaster, adjustednslavespermaster, tempmaster
698  INTEGER :: mpi_err, I, J, masterindex
699 
700  IF(kpdbg) WRITE(ofu,*) 'DetermineMasterSlaveRanks started'; CALL fl(ofu)
701 
702  !----------------------------------------------
703  !Initialize temporary data structures used for mapping
704  ALLOCATE( send_ammaster(1:1) )
705  ALLOCATE( recv_ammaster(1:nranks) )
706  send_ammaster(1) = pblas%ammaster
707  ALLOCATE( assigned(1:nranks) )
708  DO i = 1, nranks
709  assigned( i ) = .false.
710  END DO
711  ALLOCATE( allmsmaps(1:nranks) )
712  DO i = 1, nranks
713  allmsmaps(i)%masterrank = -1
714  allmsmaps(i)%nslaves = 0
715  END DO
716 
717  !----------------------------------------------
718  CALL mpi_allgather( send_ammaster, 1, mpi_logical, &
719  recv_ammaster, 1, mpi_logical, &
720  ns_comm, mpi_err )
721 
722  !----------------------------------------------
723  !Figure out total number of masters
724  totmasters = 0
725  DO i = 1, nranks
726  IF(kpdbg) WRITE(ofu,*) ' recv_ammaster ',i,' ',recv_ammaster(i); CALL fl(ofu)
727  IF ( recv_ammaster(i) ) THEN
728  totmasters = totmasters + 1
729  END IF
730  END DO
731 
732  IF ( totmasters .LE. 0 ) THEN
733  IF(kpdbg) WRITE(ofu,*) 'Total masters must be greater than 0'; CALL fl(ofu)
734  stop
735  END IF
736 
737  nslavespermaster = (nranks / totmasters)
738  IF(kpdbg) WRITE(ofu,*) 'Avg nslavespermaster',nslavespermaster; CALL fl(ofu)
739 
740  !----------------------------------------------
741  !Assign slaves to each master
742  tempmaster = 0
743  masterloop: DO i = 1, nranks
744  IF ( recv_ammaster(i) ) THEN
745 
746  tempmaster = tempmaster + 1
747 
748  !-------------------------------------------------------------
749  !Give one more slave to earlier ranks, if not evenly divisible
750  adjustednslavespermaster = nslavespermaster
751  IF ( tempmaster .LE. mod( nranks, totmasters ) ) THEN
752  adjustednslavespermaster = adjustednslavespermaster + 1
753  END IF
754 
755  IF(kpdbg) WRITE(ofu,*) 'Adjusted nslavespm',adjustednslavespermaster; CALL fl(ofu)
756  allmsmaps(i)%masterrank = i-1
757  ALLOCATE( allmsmaps(i)%slaveranks(1:adjustednslavespermaster) )
758 
759  !-------------------------------------------------------------
760  !Add the master as first in its own slave rank set
761  assigned(i) = .true.
762  allmsmaps(i)%nslaves = 1
763  allmsmaps(i)%slaveranks(1) = i-1
764 
765  !-------------------------------------------------------------
766  !Assign the next block of unassigned slaves
767  slaveloop: DO j = 1, nranks
768  IF ( allmsmaps(i)%nslaves .GE. adjustednslavespermaster ) THEN
769  EXIT slaveloop
770  END IF
771  IF ( (.NOT. assigned(j)) .AND. (.NOT. recv_ammaster(j)) ) THEN
772  assigned(j) = .true.
773  allmsmaps(j)%masterrank = i-1
774  allmsmaps(i)%nslaves = allmsmaps(i)%nslaves + 1
775  allmsmaps(i)%slaveranks(allmsmaps(i)%nslaves) = j-1
776  END IF
777  END DO slaveloop
778  END IF
779  END DO masterloop
780  IF(kpdbg) WRITE(ofu,*) 'Computed all mappings'; CALL fl(ofu)
781 
782  !----------------------------------------------
783  !Copy the mapping relevant to us (if I'm master, my own, else, my master's)
784  masterindex = rank+1
785  IF ( allmsmaps(masterindex)%masterrank .NE. rank ) THEN
786  masterindex = allmsmaps(masterindex)%masterrank+1
787  END IF
788  pblas%msmap%masterrank = allmsmaps(masterindex)%masterrank
789  pblas%msmap%nslaves = allmsmaps(masterindex)%nslaves
790  ALLOCATE(pblas%msmap%slaveranks(1:allmsmaps(masterindex)%nslaves))
791  pblas%msmap%slaveranks(:) = allmsmaps(masterindex)%slaveranks(:)
792 
793  IF(kpdbg) WRITE(ofu,*) 'Extracted my mappings'; CALL fl(ofu)
794  IF(kpdbg) WRITE(ofu,*) 'My master rank',masterindex-1; CALL fl(ofu)
795  IF(kpdbg) WRITE(ofu,*) 'Nslaves in my set',allmsmaps(masterindex)%nslaves; CALL fl(ofu)
796  DO j = 1, allmsmaps(masterindex)%nslaves
797  IF(kpdbg) WRITE(ofu,*) 'Slave',j,' ',allmsmaps(masterindex)%slaveranks(j);CALL fl(ofu)
798  END DO
799 
800  !----------------------------------------------
801  !Deallocations of temporary space
802  DO i = 1, nranks
803  IF ( recv_ammaster(i) ) THEN
804  DEALLOCATE( allmsmaps(i)%slaveranks )
805  END IF
806  END DO
807  DEALLOCATE( assigned )
808  DEALLOCATE( allmsmaps )
809  DEALLOCATE( send_ammaster )
810  DEALLOCATE( recv_ammaster )
811 
812  IF(kpdbg) WRITE(ofu,*) 'DetermineMasterSlaveRanks done'; CALL fl(ofu)
813 END SUBROUTINE determinemasterslaveranks
814 #endif
815 
816 !-------------------------------------------------------------------------------
820 !-------------------------------------------------------------------------------
821 SUBROUTINE masterbcastnextop( nextop )
822  !----------------------------------------------
823  ! Formal arguments
824  !----------------------------------------------
825  INTEGER, INTENT(IN) :: nextop
826  !----------------------------------------------
827  ! Local variables
828  !----------------------------------------------
829  INTEGER :: K, destrank
830  REAL(dp) :: realnextop
831 
832  realnextop = real(nextop)
833  IF(kpdbg) WRITE(ofu,*) 'MasterBcastNextOp started ', nextop; CALL fl(ofu)
834  CALL masterbcastvalue( realnextop )
835  IF(kpdbg) WRITE(ofu,*) 'MasterBcastNextOp done ', nextop; CALL fl(ofu)
836 END SUBROUTINE masterbcastnextop
837 
838 !-------------------------------------------------------------------------------
842 !-------------------------------------------------------------------------------
843 SUBROUTINE slavegetnextop( nextop )
844  !----------------------------------------------
845  ! Formal arguments
846  !----------------------------------------------
847  REAL(dp) :: realnextop
848  INTEGER, INTENT(INOUT) :: nextop
849 
850  IF(kpdbg) WRITE(ofu,*) 'SlaveGetNextOp started'; CALL fl(ofu)
851  CALL slavereceivevalue( realnextop )
852  nextop = int( realnextop )
853  IF(kpdbg) WRITE(ofu,*) 'SlaveGetNextOp done ', nextop; CALL fl(ofu)
854 END SUBROUTINE slavegetnextop
855 
856 !-------------------------------------------------------------------------------
860 !-------------------------------------------------------------------------------
861 SUBROUTINE slaveservice()
862  !----------------------------------------------
863  ! Local variables
864  !----------------------------------------------
865  INTEGER :: nextop
866 
867  !----------------------------------------------
868  IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
869  (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) ) THEN
870  IF(kpdbg) WRITE(ofu,*) 'SlaveService pariah falling out '; CALL fl(ofu)
871  ELSE
872  IF(kpdbg) WRITE(ofu,*) 'SlaveService started '; CALL fl(ofu)
873  oploop: DO WHILE (.true.)
874  nextop = op_none
875  CALL slavegetnextop( nextop )
876  IF ( nextop .EQ. op_done ) THEN
877  EXIT oploop
878  ELSE IF ( nextop .EQ. op_dgemm ) THEN
879  CALL slavedgemm()
880  ELSE IF ( nextop .EQ. op_dgetrf ) THEN
881  CALL slavedgetrf()
882  ELSE IF ( nextop .EQ. op_dgetrs ) THEN
883  CALL slavedgetrs()
884  ELSE
885  IF(kpdbg) WRITE(ofu,*) 'Bad Next Op', nextop; CALL fl(ofu)
886  stop
887  END IF
888  END DO oploop
889  IF(kpdbg) WRITE(ofu,*) 'SlaveService done '; CALL fl(ofu)
890  END IF
891 END SUBROUTINE slaveservice
892 
893 !-------------------------------------------------------------------------------
897 !-------------------------------------------------------------------------------
898 SUBROUTINE extractsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
899  A, nrows, ncols, subA, subnrows, subncols )
900  !----------------------------------------------
901  ! Formal arguments
902  !----------------------------------------------
903  INTEGER, INTENT(IN) :: bszr, bszc
904  INTEGER, INTENT(IN) :: pnr, pnc
905  INTEGER, INTENT(IN) :: pi, pj
906  REAL(dp), INTENT(IN) :: A(:,:)
907  INTEGER, INTENT(IN) :: nrows, ncols
908  REAL(dp), INTENT(OUT) :: subA(:)
909  INTEGER, INTENT(IN) :: subnrows, subncols
910  !----------------------------------------------
911  ! Local variables
912  !----------------------------------------------
913  INTEGER :: I, J, K, Q, R
914  INTEGER :: thisblkr, thisblkc
915 
916  IF(kpdbg) WRITE(ofu,*) 'ExtractSubMatrix NR=', subnrows, ' NC=', subncols; CALL fl(ofu)
917 
918  CALL bsystemclock(pstats%extract%t1)
919 
920  k = 0
921  DO j = 1, ncols, bszc
922  thisblkc = (j-1) / bszc
923  IF ( mod(thisblkc, pnc) .EQ. pj-1 ) THEN
924  DO r = 1, bszc
925  IF ( j+r-1 .LE. ncols ) THEN
926  DO i = 1, nrows, bszr
927  thisblkr = (i-1) / bszr
928  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
929  DO q = 1, bszr
930  IF ( i+q-1 .LE. nrows ) THEN
931  k = k + 1
932  suba(k) = a(i+q-1,j+r-1)
933  END IF
934  END DO
935  END IF
936  END DO
937  END IF
938  END DO
939  END IF
940  END DO
941 
942  !----------------------------------------------
943  IF ( k .NE. subnrows*subncols ) THEN
944  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
945  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows, ' subnc=', subncols; CALL fl(ofu)
946  stop
947  END IF
948 
949  CALL bsystemclock(pstats%extract%t2)
950  CALL chargetime( pstats%extract%tm, pstats%extract%t2, pstats%extract%t1, pstats%extract%cnt )
951 
952  IF(kpdbg) WRITE(ofu,*) 'ExtractSubMatrix done K', k; CALL fl(ofu)
953 
954 END SUBROUTINE extractsubmatrix
955 
956 !-------------------------------------------------------------------------------
962 !-------------------------------------------------------------------------------
963 SUBROUTINE mastersendmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
964  !----------------------------------------------
965  ! Formal arguments
966  !----------------------------------------------
967  REAL(dp), INTENT(IN) :: A(:,:)
968  INTEGER, INTENT(IN) :: nrows, ncols
969  REAL(dp), INTENT(OUT) :: ssubA(:)
970  INTEGER, INTENT(IN) :: ssubnrows, ssubncols
971  !----------------------------------------------
972  ! Local variables
973  !----------------------------------------------
974  INTEGER :: I, J, K
975  INTEGER :: aslaverank
976  TYPE(PBLASTempArray), ALLOCATABLE :: subA(:,:)
977  INTEGER :: subnrows, subncols
978  INTEGER :: maxsubnrows, maxsubncols
979  INTEGER :: bszr, bszc
980  INTEGER :: pnr, pnc
981 #if defined(MPI_OPT)
982  INTEGER, EXTERNAL :: NUMROC
983  INTEGER :: mpistatus(MPI_STATUS_SIZE)
984 #endif
985 
986  !----------------------------------------------
987  INTEGER :: mpinisends
988  INTEGER, ALLOCATABLE :: mpireqs(:)
989  INTEGER :: mpierr
990  INTEGER, ALLOCATABLE :: mpistatuses(:,:)
991  INTEGER :: waitmatchindex
992 
993  !----------------------------------------------
994  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix started'; CALL fl(ofu)
995  CALL bsystemclock(pstats%comm%t1)
996 
997 #if defined(MPI_OPT)
998  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
999  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1000 
1001  !----------------------------------------------
1002  mpinisends = 0
1003  ALLOCATE( mpireqs( pnr * pnc ) )
1004  ALLOCATE( mpistatuses( pnr * pnc, mpi_status_size ) )
1005 
1006  !----------------------------------------------
1007  maxsubnrows = numroc( nrows, bszr, 0, 0, pnr )
1008  maxsubncols = numroc( ncols, bszc, 0, 0, pnc )
1009  ALLOCATE( suba( 1 : pnr, 1 : pnc ) )
1010 
1011  DO i = 1, pnr
1012  DO j = 1, pnc
1013  aslaverank = blacs%pgrid%map(i,j)
1014 
1015  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1016  subncols = numroc( ncols, bszc, j-1, 0, pnc )
1017 
1018  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix to ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1019 
1020  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1021  !Self (local)
1022  IF ( aslaverank .NE. rank ) THEN
1023  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1024  stop
1025  END IF
1026  IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) ) THEN
1027  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1028  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows, ' SSNC ', ssubncols; CALL fl(ofu)
1029  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows, ' SNC ', subncols; CALL fl(ofu)
1030  stop
1031  END IF
1032 
1033  !Return a copy of this submatrix (will be like sent+received to/by self)
1034  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracting self submatrix'; CALL fl(ofu)
1035  CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1036  i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1037  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix kept self submatrix'; CALL fl(ofu)
1038  ELSE
1039  !A remote slave; send its corresponding matrix fragment
1040  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracting submatrix',i,j; CALL fl(ofu)
1041  ALLOCATE( suba(i,j)%temparray( subnrows*subncols ) )
1042  CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1043  i, j, a, nrows, ncols, suba(i,j)%temparray, &
1044  subnrows,subncols)
1045  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracted submatrix'; CALL fl(ofu)
1046  IF ( doblacscomm ) THEN
1047  CALL dgesd2d( blacs%levelcontext, subnrows, subncols, &
1048  suba(i,j)%temparray, subnrows, i-1, j-1 )
1049  ELSE
1050  mpinisends = mpinisends + 1
1051  CALL mpi_isend( suba(i,j)%temparray, subnrows*subncols, mpi_real8, &
1052  aslaverank, pblas%mpitag, pblas%mpicomm, mpireqs(mpinisends), mpierr )
1053  END IF
1054  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix sent slave submatrix'; CALL fl(ofu)
1055  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix deallocated temp submatrix'; CALL fl(ofu)
1056  END IF
1057  END DO
1058  END DO
1059 
1060  CALL bsystemclock(pstats%waitall%t1)
1061  CALL mpi_waitall( mpinisends, mpireqs, mpistatuses, mpierr )
1062  CALL bsystemclock(pstats%waitall%t2)
1063  waitall_time=waitall_time+(pstats%waitall%t2-pstats%waitall%t1)
1064  CALL chargetime( pstats%waitall%tm, pstats%waitall%t2, pstats%waitall%t1, pstats%waitall%cnt )
1065 
1066  DO i = 1, pnr
1067  DO j = 1, pnc
1068  IF ( .NOT. (i .EQ. 1 .AND. j .EQ. 1) ) THEN
1069  DEALLOCATE( suba(i,j)%temparray )
1070  END IF
1071  END DO
1072  END DO
1073  DEALLOCATE( suba )
1074  DEALLOCATE( mpireqs )
1075  DEALLOCATE( mpistatuses )
1076 
1077  CALL bsystemclock(pstats%comm%t2)
1078  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1079 #endif
1080  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix done'; CALL fl(ofu)
1081 
1082 END SUBROUTINE mastersendmatrix
1083 
1084 !-------------------------------------------------------------------------------
1088 !-------------------------------------------------------------------------------
1089 SUBROUTINE slavereceivematrix( nrows, ncols, subA, subnrows, subncols )
1090  !----------------------------------------------
1091  ! Formal arguments
1092  !----------------------------------------------
1093 #if defined(MPI_OPT)
1094  INTEGER :: mpistatus(MPI_STATUS_SIZE)
1095 #endif
1096  INTEGER, INTENT(IN) :: nrows, ncols
1097  REAL(dp), INTENT(OUT) :: subA(:)
1098  INTEGER, INTENT(IN) :: subnrows, subncols
1099  !----------------------------------------------
1100  ! Local variables
1101  !----------------------------------------------
1102  INTEGER :: masterrank
1103  INTEGER :: mpierr
1104 
1105  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveMatrix started ',subnrows,' ',subncols; CALL fl(ofu)
1106 
1107 #if defined(MPI_OPT)
1108  masterrank = blacs%pgrid%map(1,1)
1109 
1110  CALL bsystemclock(pstats%comm%t1)
1111 
1112  IF ( doblacscomm ) THEN
1113  CALL dgerv2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1114  ELSE
1115  CALL mpi_recv( suba, subnrows*subncols, mpi_real8, &
1116  masterrank, pblas%mpitag, pblas%mpicomm, mpistatus, mpierr )
1117  END IF
1118 
1119  CALL bsystemclock(pstats%comm%t2)
1120  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1121 #endif
1122 
1123  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveMatrix done'; CALL fl(ofu)
1124 END SUBROUTINE slavereceivematrix
1125 
1126 !-------------------------------------------------------------------------------
1130 !-------------------------------------------------------------------------------
1131 SUBROUTINE masterbcastvalue( val )
1132 IMPLICIT NONE
1133  !----------------------------------------------
1134  ! Formal arguments
1135  !----------------------------------------------
1136  REAL(dp), INTENT(IN) :: val
1137 
1138 #if defined(MPI_OPT)
1139  CALL dgebs2d( blacs%levelcontext, 'All', ' ', 1, 1, val, 1 );
1140 #endif
1141  IF(kpdbg) WRITE(ofu,*) 'MasterBcastValue bcast to slaves'; CALL fl(ofu)
1142 
1143 END SUBROUTINE masterbcastvalue
1144 
1145 !-------------------------------------------------------------------------------
1149 !-------------------------------------------------------------------------------
1150 SUBROUTINE slavereceivevalue( val )
1151 IMPLICIT NONE
1152  !----------------------------------------------
1153  ! Formal arguments
1154  !----------------------------------------------
1155  REAL(dp), INTENT(OUT) :: val
1156 
1157 #if defined(MPI_OPT)
1158  CALL dgebr2d( blacs%levelcontext, 'All', ' ', 1, 1, val, 1, 0, 0 )
1159 #endif
1160  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveValue bcast from master'
1161  CALL fl(ofu)
1162 
1163 END SUBROUTINE slavereceivevalue
1164 
1165 !-------------------------------------------------------------------------------
1169 !-------------------------------------------------------------------------------
1170 SUBROUTINE injectsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
1171  A, nrows, ncols, subA, subnrows, subncols )
1172  !----------------------------------------------
1173  ! Formal arguments
1174  !----------------------------------------------
1175  INTEGER, INTENT(IN) :: bszr, bszc
1176  INTEGER, INTENT(IN) :: pnr, pnc
1177  INTEGER, INTENT(IN) :: pi, pj
1178  REAL(dp), INTENT(OUT) :: A(:,:)
1179  INTEGER, INTENT(IN) :: nrows, ncols
1180  REAL(dp), INTENT(IN) :: subA(:)
1181  INTEGER, INTENT(IN) :: subnrows, subncols
1182  !----------------------------------------------
1183  ! Local variables
1184  !----------------------------------------------
1185  INTEGER :: I, J, K, Q, R
1186  INTEGER :: thisblkr, thisblkc
1187 
1188  IF(kpdbg) WRITE(ofu,*) 'InjectSubMatrix NR=', subnrows, ' NC=', subncols; CALL fl(ofu)
1189 
1190  k = 0
1191  DO j = 1, ncols, bszc
1192  thisblkc = (j-1) / bszc
1193  IF ( mod(thisblkc, pnc) .EQ. pj-1 ) THEN
1194  DO r = 1, bszc
1195  IF ( j+r-1 .LE. ncols ) THEN
1196  DO i = 1, nrows, bszr
1197  thisblkr = (i-1) / bszr
1198  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
1199  DO q = 1, bszr
1200  IF ( i+q-1 .LE. nrows ) THEN
1201  k = k + 1
1202  a(i+q-1,j+r-1) = suba(k)
1203  END IF
1204  END DO
1205  END IF
1206  END DO
1207  END IF
1208  END DO
1209  END IF
1210  END DO
1211 
1212  !----------------------------------------------
1213  IF ( k .NE. subnrows*subncols ) THEN
1214  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
1215  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows, ' subnc=', subncols; CALL fl(ofu)
1216  stop
1217  END IF
1218 
1219  IF(kpdbg) WRITE(ofu,*) 'InjectSubMatrix done K', k; CALL fl(ofu)
1220 
1221 END SUBROUTINE injectsubmatrix
1222 
1223 !-------------------------------------------------------------------------------
1227 !-------------------------------------------------------------------------------
1228 SUBROUTINE injectsubvector( bszr, pnr, pi, V, nrows, subV, subnrows )
1229  !----------------------------------------------
1230  ! Formal arguments
1231  !----------------------------------------------
1232  INTEGER, INTENT(IN) :: bszr
1233  INTEGER, INTENT(IN) :: pnr
1234  INTEGER, INTENT(IN) :: pi
1235  INTEGER, INTENT(OUT) :: V(:)
1236  INTEGER, INTENT(IN) :: nrows
1237  INTEGER, INTENT(IN) :: subV(:)
1238  INTEGER, INTENT(IN) :: subnrows
1239  !----------------------------------------------
1240  ! Local variables
1241  !----------------------------------------------
1242  INTEGER :: I, K, Q
1243  INTEGER :: thisblkr
1244 
1245  IF(kpdbg) WRITE(ofu,*) 'InjectSubVector NR=', subnrows; CALL fl(ofu)
1246 
1247  k = 0
1248  DO i = 1, nrows, bszr
1249  thisblkr = (i-1) / bszr
1250  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
1251  DO q = 1, bszr
1252  IF ( i+q-1 .LE. nrows ) THEN
1253  k = k + 1
1254  v(i+q-1) = subv(k)
1255  END IF
1256  END DO
1257  END IF
1258  END DO
1259 
1260  !----------------------------------------------
1261  IF ( k .NE. subnrows ) THEN
1262  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
1263  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows; CALL fl(ofu)
1264  stop
1265  END IF
1266 
1267  IF(kpdbg) WRITE(ofu,*) 'InjectSubVector done K', k; CALL fl(ofu)
1268 
1269 END SUBROUTINE injectsubvector
1270 
1271 !-------------------------------------------------------------------------------
1277 !-------------------------------------------------------------------------------
1278 SUBROUTINE masterrecvmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1279  !----------------------------------------------
1280  ! Formal arguments
1281  !----------------------------------------------
1282  REAL(dp), INTENT(OUT) :: A(:,:)
1283  INTEGER, INTENT(IN) :: nrows, ncols
1284  REAL(dp), INTENT(IN) :: ssubA(:)
1285  INTEGER, INTENT(IN) :: ssubnrows, ssubncols
1286  !----------------------------------------------
1287  ! Local variables
1288  !----------------------------------------------
1289  INTEGER :: I, J, K
1290  INTEGER :: aslaverank
1291  REAL(dp), ALLOCATABLE :: subA(:)
1292  INTEGER :: subnrows, subncols
1293  INTEGER :: bszr, bszc
1294  INTEGER :: pnr, pnc
1295 #if defined(MPI_OPT)
1296  INTEGER, EXTERNAL :: NUMROC
1297 #endif
1298 
1299  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix started'; CALL fl(ofu)
1300  CALL bsystemclock(pstats%comm%t1)
1301 
1302 #if defined(MPI_OPT)
1303  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1304  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1305 
1306  DO i = 1, pnr
1307  DO j = 1, pnc
1308  aslaverank = blacs%pgrid%map(i,j)
1309 
1310  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1311  subncols = numroc( ncols, bszc, j-1, 0, pnc )
1312 
1313  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix from ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1314 
1315  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1316  !Self (local)
1317  IF ( aslaverank .NE. rank ) THEN
1318  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1319  stop
1320  END IF
1321  IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) ) THEN
1322  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1323  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows, ' SSNC ', ssubncols; CALL fl(ofu)
1324  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows, ' SNC ', subncols; CALL fl(ofu)
1325  stop
1326  END IF
1327 
1328  !Use the given copy of this submatrix (like sent+received to/by self)
1329  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injecting self submatrix'; CALL fl(ofu)
1330  CALL injectsubmatrix(bszr, bszc, pnr, pnc, &
1331  i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1332  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix kept self submatrix'; CALL fl(ofu)
1333  ELSE
1334  !A remote slave; receive its corresponding matrix fragment
1335  ALLOCATE( suba( 1 : subnrows*subncols ) )
1336  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix receiving slave submatrix'; CALL fl(ofu)
1337  CALL bsystemclock(pstats%wait%t1)
1338  CALL dgerv2d( blacs%levelcontext, subnrows, subncols, &
1339  suba, subnrows, i-1, j-1 )
1340  CALL bsystemclock(pstats%wait%t2)
1341  CALL chargetime( pstats%wait%tm, pstats%wait%t2, pstats%wait%t1, pstats%wait%cnt )
1342  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injecting submatrix',i,j; CALL fl(ofu)
1343  CALL injectsubmatrix( bszr, bszc, pnr, pnc, &
1344  i, j, a, nrows, ncols, suba, subnrows, subncols )
1345  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injected submatrix'; CALL fl(ofu)
1346  DEALLOCATE( suba )
1347  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix deallocated temp submatrix'; CALL fl(ofu)
1348  END IF
1349  END DO
1350  END DO
1351 
1352  CALL bsystemclock(pstats%comm%t2)
1353  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1354 #endif
1355 
1356  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix done'; CALL fl(ofu)
1357 
1358 END SUBROUTINE masterrecvmatrix
1359 
1360 !-------------------------------------------------------------------------------
1364 !-------------------------------------------------------------------------------
1365 SUBROUTINE slavesendmatrix( nrows, ncols, subA, subnrows, subncols )
1366  !----------------------------------------------
1367  ! Formal arguments
1368  !----------------------------------------------
1369  INTEGER, INTENT(IN) :: nrows, ncols
1370  REAL(dp), INTENT(IN) :: subA(:)
1371  INTEGER, INTENT(IN) :: subnrows, subncols
1372 
1373  IF(kpdbg) WRITE(ofu,*) 'SlaveSendMatrix started ',subnrows,' ',subncols; CALL fl(ofu)
1374 
1375  CALL bsystemclock(pstats%comm%t1)
1376 #if defined(MPI_OPT)
1377  CALL dgesd2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1378 #endif
1379  CALL bsystemclock(pstats%comm%t2)
1380  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1381 
1382  IF(kpdbg) WRITE(ofu,*) 'SlaveSendMatrix done'; CALL fl(ofu)
1383 END SUBROUTINE slavesendmatrix
1384 
1385 !-------------------------------------------------------------------------------
1391 !-------------------------------------------------------------------------------
1392 SUBROUTINE masterrecvvector( V, nrows, ssubV, ssubnrows )
1393  !----------------------------------------------
1394  ! Formal arguments
1395  !----------------------------------------------
1396  INTEGER, INTENT(OUT) :: V(:)
1397  INTEGER, INTENT(IN) :: nrows
1398  INTEGER, INTENT(IN) :: ssubV(:)
1399  INTEGER, INTENT(IN) :: ssubnrows
1400  !----------------------------------------------
1401  ! Local variables
1402  !----------------------------------------------
1403  INTEGER :: I, J, K
1404  INTEGER :: aslaverank
1405  INTEGER, ALLOCATABLE :: subV(:)
1406  INTEGER :: subnrows
1407  INTEGER :: bszr
1408  INTEGER :: pnr
1409 #if defined(MPI_OPT)
1410  INTEGER, EXTERNAL :: NUMROC
1411 #endif
1412 
1413  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector started'; CALL fl(ofu)
1414  CALL bsystemclock(pstats%comm%t1)
1415 
1416 #if defined(MPI_OPT)
1417  bszr = blacs%pgrid%blockszrows
1418  pnr = blacs%pgrid%nrows
1419 
1420  DO i = 1, pnr
1421  DO j = 1, 1
1422  aslaverank = blacs%pgrid%map(i,j)
1423 
1424  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1425 
1426  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector from ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1427 
1428  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1429  !Self (local)
1430  IF ( aslaverank .NE. rank ) THEN
1431  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1432  stop
1433  END IF
1434  IF ( ssubnrows .NE. subnrows ) THEN
1435  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1436  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows; CALL fl(ofu)
1437  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows; CALL fl(ofu)
1438  stop
1439  END IF
1440 
1441  !Use the given copy of this subvector (like sent+received to/by self)
1442  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injecting self subvector'; CALL fl(ofu)
1443  CALL injectsubvector(bszr, pnr, i, v, nrows, ssubv, subnrows)
1444  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector kept self subvector'; CALL fl(ofu)
1445  ELSE
1446  !A remote slave; receive its corresponding vector fragment
1447  ALLOCATE( subv( 1 : subnrows ) )
1448  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector receiving slave subvector'; CALL fl(ofu)
1449  CALL igerv2d( blacs%levelcontext, subnrows, 1, subv, subnrows, i-1, 0 )
1450  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injecting subvector',i,j; CALL fl(ofu)
1451  CALL injectsubvector( bszr, pnr, i, v, nrows, subv, subnrows )
1452  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injected subvector'; CALL fl(ofu)
1453  DEALLOCATE( subv )
1454  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector deallocated temp subvector'; CALL fl(ofu)
1455  END IF
1456  END DO
1457  END DO
1458 
1459  CALL bsystemclock(pstats%comm%t2)
1460  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1461 #endif
1462 
1463  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector done'; CALL fl(ofu)
1464 
1465 END SUBROUTINE masterrecvvector
1466 
1467 !-------------------------------------------------------------------------------
1473 !-------------------------------------------------------------------------------
1474 SUBROUTINE slavesendvector( nrows, subV, subnrows )
1475  !----------------------------------------------
1476  ! Formal arguments
1477  !----------------------------------------------
1478  INTEGER, INTENT(IN) :: nrows
1479  INTEGER, INTENT(IN) :: subV(:)
1480  INTEGER, INTENT(IN) :: subnrows
1481  INTEGER :: pj
1482 
1483  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector started ', subnrows; CALL fl(ofu)
1484 
1485  CALL bsystemclock(pstats%comm%t1)
1486 
1487  pj = blacs%pgrid%mycol
1488  IF ( pj .GT. 0 ) THEN
1489  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector skipping since mycol>0 ',pj; CALL fl(ofu)
1490  ELSE
1491 #if defined(MPI_OPT)
1492  CALL igesd2d( blacs%levelcontext, subnrows, 1, subv, subnrows, 0, 0 )
1493 #endif
1494  END IF
1495 
1496  CALL bsystemclock(pstats%comm%t2)
1497  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1498 
1499  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector done'; CALL fl(ofu)
1500 END SUBROUTINE slavesendvector
1501 
1502 !-------------------------------------------------------------------------------
1506 !-------------------------------------------------------------------------------
1507 SUBROUTINE plbdgemm( alpha, A, B, beta, C )
1508  !----------------------------------------------
1509  ! Formal arguments
1510  !----------------------------------------------
1511  REAL(dp), INTENT(IN) :: alpha, beta
1512  REAL(dp), INTENT(IN) :: A(:,:), B(:,:)
1513  REAL(dp), INTENT(INOUT) :: C(:,:)
1514  !----------------------------------------------
1515  ! Local variables
1516  !----------------------------------------------
1517  REAL(dp), ALLOCATABLE :: subA(:), subB(:), subC(:)
1518  INTEGER :: descA(9), descB(9), descC(9)
1519  INTEGER :: lldA, lldB, lldC
1520  INTEGER :: nrows, ncols, subnrows, subncols
1521  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1522  LOGICAL :: useblas
1523  INTEGER :: ctxt, info
1524 #if defined(MPI_OPT)
1525  INTEGER, EXTERNAL :: NUMROC
1526 #endif
1527  REAL(dp) :: ton, toff
1528  INTEGER :: row
1529 
1530  CALL bsystemclock(skston)
1531 
1532  IF(kpdbg) WRITE(ofu,*) 'Using direct diagonal matrix mul'; CALL fl(ofu)
1533  CALL bsystemclock(ton)
1534  DO row = 1, m
1535  c(row,1)=alpha*a(row,1)*b(row,1)+beta*c(row,1)
1536  END DO
1537  CALL bsystemclock(toff)
1538  dgemm_time = dgemm_time + (toff-ton)
1539 
1540  CALL bsystemclock(skstoff)
1541 ! matmul_time = matmul_time + (skstoff - skston) !SPH-THIS IS NOT DEFINED!
1542 END SUBROUTINE plbdgemm
1543 
1544 !-------------------------------------------------------------------------------
1548 !-------------------------------------------------------------------------------
1549 SUBROUTINE slavedgemm()
1550  !----------------------------------------------
1551  ! Local variables
1552  !----------------------------------------------
1553  REAL(dp), ALLOCATABLE :: subA(:), subB(:), subC(:)
1554  REAL(dp) :: alpha, beta
1555  INTEGER :: descA(9), descB(9), descC(9)
1556  INTEGER :: lldA, lldB, lldC
1557  INTEGER :: nrows, ncols, subnrows, subncols
1558  INTEGER :: bszr, bszc, pnr, pnc
1559  INTEGER :: pi, pj
1560  INTEGER :: ctxt, info
1561 #if defined(MPI_OPT)
1562  INTEGER, EXTERNAL :: NUMROC
1563 #endif
1564 
1565  CALL bsystemclock(pstats%mm%t1)
1566 
1567 #if defined(MPI_OPT)
1568  nrows = m; ncols = m
1569  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1570  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1571  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1572 
1573  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1574  subncols = numroc( ncols, bszc, pj, 0, pnc )
1575 
1576  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM allocating subABC'; CALL fl(ofu)
1577  ALLOCATE( suba(1:subnrows*subncols) )
1578  ALLOCATE( subb(1:subnrows*subncols) )
1579  ALLOCATE( subc(1:subnrows*subncols) )
1580  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM allocated subABC'; CALL fl(ofu)
1581 
1582  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM desciniting subABC'; CALL fl(ofu)
1583  ctxt = blacs%levelcontext
1584  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1585  lldb = llda; lldc = llda
1586  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1587  CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1588  CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1589  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM desciniting subABC'; CALL fl(ofu)
1590 
1591  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving A'; CALL fl(ofu)
1592  CALL bsystemclock(pstats%mma%t1)
1593  CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1594  CALL bsystemclock(pstats%mma%t2)
1595  CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1596 
1597  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving B'; CALL fl(ofu)
1598  CALL bsystemclock(pstats%mmb%t1)
1599  CALL slavereceivematrix( nrows, ncols, subb, subnrows, subncols )
1600  CALL bsystemclock(pstats%mmb%t2)
1601  CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1602 
1603  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving C'; CALL fl(ofu)
1604  CALL bsystemclock(pstats%mmc%t1)
1605  CALL slavereceivematrix( nrows, ncols, subc, subnrows, subncols )
1606  CALL bsystemclock(pstats%mmc%t2)
1607  CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1608 
1609  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving alpha'; CALL fl(ofu)
1610  CALL bsystemclock(pstats%mmalpha%t1)
1611  CALL slavereceivevalue( alpha )
1612  CALL bsystemclock(pstats%mmalpha%t2)
1613  CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1614 
1615  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving beta'; CALL fl(ofu)
1616  CALL bsystemclock(pstats%mmbeta%t1)
1617  CALL slavereceivevalue( beta )
1618  CALL bsystemclock(pstats%mmbeta%t2)
1619  CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1620 
1621  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM invoking PDGEMM'; CALL fl(ofu)
1622  CALL bsystemclock(pstats%comp%t1)
1623  CALL pdgemm( 'N', 'N', m, m, m, alpha, &
1624  suba, 1, 1, desca, &
1625  subb, 1, 1, descb, &
1626  beta, &
1627  subc, 1, 1, descc )
1628  CALL bsystemclock(pstats%comp%t2)
1629  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1630  CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1631  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM done PDGEMM'; CALL fl(ofu)
1632 
1633  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM sending result matrix to master'; CALL fl(ofu)
1634  CALL bsystemclock(pstats%mmrc%t1)
1635  CALL slavesendmatrix( nrows, ncols, subc, subnrows, subncols )
1636  CALL bsystemclock(pstats%mmrc%t2)
1637  CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1638  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM sent result matrix to master'; CALL fl(ofu)
1639 
1640  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM deallocating subABC'; CALL fl(ofu)
1641  DEALLOCATE( suba )
1642  DEALLOCATE( subb )
1643  DEALLOCATE( subc )
1644  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM deallocated subABC'; CALL fl(ofu)
1645 #endif
1646 
1647  CALL bsystemclock(pstats%mm%t2)
1648  CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1649 
1650 END SUBROUTINE slavedgemm
1651 
1652 !-------------------------------------------------------------------------------
1656 !-------------------------------------------------------------------------------
1657 SUBROUTINE plbdgemv( alpha, A, x, beta, y )
1658  !----------------------------------------------
1659  ! Formal arguments
1660  !----------------------------------------------
1661  REAL(dp), INTENT(IN) :: alpha, beta
1662  REAL(dp), INTENT(IN) :: A(:,:)
1663  REAL(dp), INTENT(IN) :: x(:)
1664  REAL(dp), INTENT(INOUT) :: y(:)
1665 
1666  INTEGER :: i
1667  REAL(dp) :: ton, toff
1668 
1669  CALL bsystemclock(ton)
1670  DO i=1, m
1671  y(i) = alpha*a(i,1)*x(i) + beta*y(i)
1672  END DO
1673  CALL bsystemclock(toff)
1674  dgemv_time = dgemv_time + (toff-ton)
1675 END SUBROUTINE plbdgemv
1676 
1677 !-------------------------------------------------------------------------------
1681 !-------------------------------------------------------------------------------
1682 SUBROUTINE plbdgetrf( A, piv, info )
1683  !----------------------------------------------
1684  ! Formal arguments
1685  !----------------------------------------------
1686  REAL(dp), INTENT(INOUT) :: A(:,:)
1687  INTEGER, INTENT(OUT) :: piv(:)
1688  INTEGER, INTENT(INOUT) :: info
1689  !----------------------------------------------
1690  ! Local variables
1691  !----------------------------------------------
1692  REAL(dp), ALLOCATABLE :: subA(:)
1693  INTEGER, ALLOCATABLE :: subpiv(:)
1694  INTEGER :: descA(9)
1695  INTEGER :: lldA
1696  INTEGER :: nrows, ncols, subnrows, subncols
1697  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1698  INTEGER :: ctxt
1699  LOGICAL :: useblas
1700 #if defined(MPI_OPT)
1701  INTEGER, EXTERNAL :: NUMROC
1702 #endif
1703  REAL(dp) :: ton, toff
1704 
1705  info = 0
1706 
1707 END SUBROUTINE plbdgetrf
1708 
1709 !-------------------------------------------------------------------------------
1713 !-------------------------------------------------------------------------------
1714 SUBROUTINE slavedgetrf()
1715  !----------------------------------------------
1716  ! Local variables
1717  !----------------------------------------------
1718  REAL(dp), ALLOCATABLE :: subA(:)
1719  INTEGER, ALLOCATABLE :: subpiv(:)
1720  INTEGER :: descA(9)
1721  INTEGER :: lldA
1722  INTEGER :: nrows, ncols, subnrows, subncols
1723  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1724  INTEGER :: ctxt, info
1725 #if defined(MPI_OPT)
1726  INTEGER, EXTERNAL :: NUMROC
1727 #endif
1728 
1729  CALL bsystemclock(pstats%trf%t1)
1730 
1731 #if defined(MPI_OPT)
1732  nrows = m; ncols = m
1733  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1734  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1735  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1736  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1737  subncols = numroc( ncols, bszc, pj, 0, pnc )
1738 
1739  ctxt = blacs%levelcontext
1740  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1741  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1742 
1743  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF allocating subAPiv'; CALL fl(ofu)
1744  ALLOCATE( suba(1:subnrows*subncols) )
1745  ALLOCATE( subpiv(1:subnrows+bszr) )
1746  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF allocated subAPiv'; CALL fl(ofu)
1747 
1748  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF receiving A submatrix'; CALL fl(ofu)
1749  CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1750  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF received A submatrix'; CALL fl(ofu)
1751 
1752  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF invoking PDGETRF'; CALL fl(ofu)
1753  CALL bsystemclock(pstats%comp%t1)
1754  CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1755  CALL bsystemclock(pstats%comp%t2)
1756  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1757  CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1758  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF done PDGETRF'; CALL fl(ofu)
1759 
1760  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sending result matrix to master'; CALL fl(ofu)
1761  CALL slavesendmatrix( nrows, ncols, suba, subnrows, subncols )
1762  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sent result matrix to master'; CALL fl(ofu)
1763  CALL slavesendvector( nrows, subpiv, subnrows )
1764  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sent result vector to master'; CALL fl(ofu)
1765 
1766  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF deallocating subAPiv'; CALL fl(ofu)
1767  DEALLOCATE( subpiv )
1768  DEALLOCATE( suba )
1769  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF deallocated subAPiv'; CALL fl(ofu)
1770 
1771 #endif
1772 
1773  CALL bsystemclock(pstats%trf%t2)
1774  CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1775 
1776 END SUBROUTINE slavedgetrf
1777 
1778 !-------------------------------------------------------------------------------
1782 !-------------------------------------------------------------------------------
1783 SUBROUTINE plbdgetrs( nrhs, A, piv, B, info )
1784  !----------------------------------------------
1785  ! Formal arguments
1786  !----------------------------------------------
1787  INTEGER, INTENT(IN) :: nrhs
1788  REAL(dp), INTENT(IN) :: A(:,:)
1789  INTEGER, INTENT(IN) :: piv(:)
1790  REAL(dp), INTENT(INOUT) :: B(:,:)
1791  INTEGER :: info, i, j
1792  REAL(dp) :: ton, toff
1793 
1794 ! DO i=1, M
1795 ! DO j=1,M
1796 ! IF(ABS(A(i,j)).GT.0) WRITE(100+rank,*) i, j, A(i,j)
1797 ! IF (i.NE.j.AND.A(i,j).NE.0) &
1798 ! STOP 'A not diagonal'
1799 ! END DO
1800 ! END DO
1801 
1802  CALL bsystemclock(ton)
1803  DO j = 1, nrhs
1804  DO i = 1, m
1805  IF (a(i,1).EQ.0) stop 'Inverting zero'
1806  b(i,j) = b(i,j)/a(i,1)
1807  END DO
1808  END DO
1809  info = 0
1810  CALL bsystemclock(toff)
1811  dgetrs_time = dgetrs_time + (toff-ton)
1812 END SUBROUTINE plbdgetrs
1813 
1814 !-------------------------------------------------------------------------------
1818 !-------------------------------------------------------------------------------
1819 SUBROUTINE slavedgetrs()
1820 
1821  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRS not implemented'; CALL fl(ofu)
1822  stop !To be completed
1823 
1824 END SUBROUTINE slavedgetrs
1825 !-------------------------------------------------------------------------------
1826 !-------------------------------------------------------------------------------
1827 
1828 !-------------------------------------------------------------------------------
1832 !-------------------------------------------------------------------------------
1833 SUBROUTINE initialize_bst( do_mpiinit, inN, inM )
1834  !-----------------------------------------------------------------------------
1835  ! Formal arguments
1836  !-----------------------------------------------------------------------------
1837  LOGICAL, INTENT(IN) :: do_mpiinit
1838  INTEGER, INTENT(IN) :: inN
1839  INTEGER, INTENT(IN) :: inM
1840 
1841  !-----------------------------------------------------------------------------
1842  ! Local variables
1843  !-----------------------------------------------------------------------------
1844  INTEGER :: nrowsperrank
1845  INTEGER :: nspillrows
1846  INTEGER :: mpierr
1847  INTEGER :: globrow
1848  INTEGER :: globrowoff
1849  REAL :: logNbase2
1850 
1851  n = inn
1852  m = inm
1853 
1854  !-----------------------------------------------------------------------------
1855  use_mpiwtime = .false.
1856 #if defined(MPI_OPT)
1857  IF ( do_mpiinit ) THEN
1858  CALL mpi_init( mpierr )
1859  END IF
1860 ! CALL MPI_Comm_rank( NS_COMM, rank, mpierr )
1861 ! CALL MPI_Comm_size( NS_COMM, nranks, mpierr)
1862  use_mpiwtime = .true.
1863 #endif
1864 
1865  !-----------------------------------------------------------------------------
1866  ! Set output file unit, so we get output of each rank in its own file
1867  ! Extremely useful for debugging -- makes it so much easier to see progress!
1868  !-----------------------------------------------------------------------------
1869  ofu = rank+1000
1870  p = nranks
1871  IF ( p .GT. n ) THEN
1872  IF(kpdbg) WRITE(ofu,*) 'Detected extra ', p-n, ' ranks'; CALL fl(ofu)
1873  p = n
1874  END IF
1875 
1876  !-----------------------------------------------------------------------------
1877  !Set problem output file
1878  !-----------------------------------------------------------------------------
1879  pfu = ofu+nranks
1880 
1881  !-----------------------------------------------------------------------------
1882  ! Check to see if debugging output is to be written : SKS on Nov 9, 2011
1883  !-----------------------------------------------------------------------------
1884  kpdbg=.false.; kenvvar='KPDBG'
1885  CALL getenv(kenvvar,kenvval)
1886  IF (kenvval.NE.'') THEN
1887  IF (kenvval.EQ.'TRUE') THEN
1888  kpdbg=.true.
1889  ELSE
1890  kpdbg=.false.
1891  END IF
1892  END IF
1893 
1894  !-----------------------------------------------------------------------------
1895  IF(kpdbg) WRITE(ofu,*) 'Rank ', rank, ' NRanks ', nranks; CALL fl(ofu)
1896  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
1897  IF(kpdbg) WRITE(ofu,*) '------ Initialization start ------'; CALL fl(ofu)
1898  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
1899 
1900  !-----------------------------------------------------------------------------
1901  IF ( n .LT. 1 ) THEN
1902  IF(kpdbg) WRITE(ofu,*) 'Bad N', n; CALL fl(ofu)
1903  stop
1904  END IF
1905 
1906  !-----------------------------------------------------------------------------
1907  !Determine where we start and end in row list at level 1 on this processor
1908  !This processor's global row numbers span [startglobrow,endglobrow] inclusive
1909  !-----------------------------------------------------------------------------
1910  nrowsperrank = n/p !Number of rows evenly split across processors
1911  nspillrows = mod(n, p) !Some left over after even split
1912  IF ( rank .LT. nspillrows ) THEN !The first few ranks get one extra row each
1913  startglobrow = rank*nrowsperrank + rank + 1
1914  endglobrow = startglobrow + nrowsperrank
1915  ELSE IF ( rank .LT. p ) THEN !The other active ranks
1916  startglobrow = rank*nrowsperrank + nspillrows + 1
1917  endglobrow = startglobrow + nrowsperrank - 1
1918  ELSE !The rest (unnecessary, inactive) ranks
1919  startglobrow = -1
1920  endglobrow = -2
1921  END IF
1922 
1923  !-----------------------------------------------------------------------------
1924  !No. of levels is 1+ceil(lg(N)); e.g., 6 for N=23
1925  !-----------------------------------------------------------------------------
1926  lognbase2 = log(real(n))/log(2.0)
1927  nlevels = 1 + int(lognbase2)
1928  IF ( real(int(lognbase2)) .NE. lognbase2 ) THEN
1929  nlevels = nlevels + 1
1930  END IF
1931  IF(kpdbg) WRITE(ofu,*) 'NLEVELS=', nlevels; CALL fl(ofu)
1932 
1933  !---------------------------------------------------------------------------
1934  matdirtied = .true.
1935  rhsdirtied = .true.
1936 
1937  !---------------------------------------------------------------------------
1938  !Not sure if barriers are needed between levels, but they can be optionally
1939  !turned on by setting usebarriers to TRUE
1940  !---------------------------------------------------------------------------
1941  usebarriers = .false.
1942 
1943  !---------------------------------------------------------------------------
1944  writeproblemfile = .false.
1945  writesolution = .false.
1946 
1947  !---------------------------------------------------------------------------
1948  membytes = 0
1949  dpsz = 8 !8 bytes for double precision
1950  intsz = 4 !4 bytes for a single integer
1951  ptrsz = 8 !Assuming 64-bit addresses in the worst case
1952  one = 1
1953  zero = 0
1954 
1955  tottime = 0
1956  totcommtime = 0
1957  totinvtime = 0
1958  totinvcount = 0
1959  totmatmultime = 0
1960  totmatmulcount = 0
1961  totmatsoltime = 0
1962  totmatsolcount = 0
1963  CALL bclockinit()
1964 
1965  !---------------------------------------------------------------------------
1966  !The +2 in nlocrows+2 is for storing incoming values from neighbors, if any
1967  !The zero'th element stores the incoming element from top neighbor, the
1968  !last element stores the incoming element from the bottom neighbor.
1969  !---------------------------------------------------------------------------
1970  ! SKS
1971  IF (.NOT.ALLOCATED(lelement)) ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
1972  !ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
1973  CALL chargememory( nlevels*(endglobrow-startglobrow+3)*5*ptrsz )
1974 
1975  ! SKS
1976  IF (.NOT.ALLOCATED(selement)) ALLOCATE( selement(n) ) !Have a place for all x; only some will be allocated
1977  !ALLOCATE( selement(N) ) !Have a place for all x; only some will be allocated
1978  CALL chargememory( n*ptrsz )
1979 
1980 
1981  !---------------------------------------------------------------------------
1982  !Allocate (L,D,U,b,pivot) at level 1
1983  !---------------------------------------------------------------------------
1984  DO globrow = startglobrow, endglobrow, 1
1985  globrowoff = globrow-startglobrow+1
1986  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%L)) ALLOCATE( lelement(1, globrowoff)%L(m,1) )
1987  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%D)) ALLOCATE( lelement(1, globrowoff)%D(m,1) )
1988  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%U)) ALLOCATE( lelement(1, globrowoff)%U(m,1) )
1989  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%b)) ALLOCATE( lelement(1, globrowoff)%b(m,1) )
1990  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%pivot)) ALLOCATE( lelement(1, globrowoff)%pivot(m) )
1991  lelement(1, globrowoff)%L = 0
1992  lelement(1, globrowoff)%D = 0
1993  lelement(1, globrowoff)%U = 0
1994  lelement(1, globrowoff)%b = 0
1995  lelement(1, globrowoff)%pivot = 0
1996  END DO
1997 
1998 
1999  !---------------------------------------------------------------------------
2000  !Allocate (L,D,U,b) for saving the original problem
2001  !---------------------------------------------------------------------------
2002  IF(.NOT.ALLOCATED(orig)) ALLOCATE( orig(endglobrow-startglobrow+1) )
2003  DO globrow = startglobrow, endglobrow, 1
2004  globrowoff = globrow-startglobrow+1
2005  IF(.NOT.ALLOCATED(orig(globrowoff)%L)) ALLOCATE( orig(globrowoff)%L(m,1) )
2006  IF(.NOT.ALLOCATED(orig(globrowoff)%D)) ALLOCATE( orig(globrowoff)%D(m,1) )
2007  IF(.NOT.ALLOCATED(orig(globrowoff)%U)) ALLOCATE( orig(globrowoff)%U(m,1) )
2008  IF(.NOT.ALLOCATED(orig(globrowoff)%b)) ALLOCATE( orig(globrowoff)%b(m,1) )
2009  !-- DONT CHARGE MEMORY COST FOR THIS!!! --
2010  orig(globrowoff)%L = 0
2011  orig(globrowoff)%D = 0
2012  orig(globrowoff)%U = 0
2013  orig(globrowoff)%b = 0
2014  END DO
2015 
2016  !-----------------------------------------------------------------------------
2017  CALL plbinitialize()
2018 
2019  !-----------------------------------------------------------------------------
2020  IF(kpdbg) WRITE(ofu,*) ' P=',p,' N=',n,' M=',m; CALL fl(ofu)
2021  IF(kpdbg) WRITE(ofu,*) 'SROW=',startglobrow,' EROW=', endglobrow; CALL fl(ofu)
2022  IF(kpdbg) WRITE(ofu,*) '------ Initialization end ------'; CALL fl(ofu)
2023  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
2024 END SUBROUTINE initialize_bst
2025 !-------------------------------------------------------------------------------
2026 
2027 !-------------------------------------------------------------------------------
2031 !-------------------------------------------------------------------------------
2032 SUBROUTINE setmatrixrowcoll_bst( globrow, Lj )
2033  !-----------------------------------------------------------------------------
2034  ! Formal arguments
2035  !-----------------------------------------------------------------------------
2036  INTEGER, INTENT(IN) :: globrow
2037  REAL(dp), INTENT(IN) :: Lj(:)
2038  !-----------------------------------------------------------------------------
2039  ! Local variables
2040  !-----------------------------------------------------------------------------
2041  INTEGER :: i, globrowoff
2042 
2043  !-------------------------------------------
2044  ! Sanity checks on globrow
2045  !-------------------------------------------
2046  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2047  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Bad input globrow ',globrow; CALL fl(ofu)
2048  stop
2049  END IF
2050  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2051  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Non-local globrow ',globrow; CALL fl(ofu)
2052  stop
2053  END IF
2054 
2055  globrowoff = globrow-startglobrow+1
2056 
2057  !-------------------------------------------
2058  ! Copy given L column into allocated matrix
2059  !-------------------------------------------
2060  IF ( globrow .EQ. 1 ) THEN
2061  lelement(1, globrowoff)%L(:,1) = zero
2062  ELSE
2063  lelement(1, globrowoff)%L(:,1) = lj(:)
2064  END IF
2065 
2066 
2067  !-------------------------------------------
2068  !Save a copy of this original problem
2069  !-------------------------------------------
2070  orig(globrowoff)%L(:,1) = lelement(1,globrowoff)%L(:,1)
2071 
2072  !-------------------------------------------
2073  matdirtied = .true.
2074 
2075 END SUBROUTINE setmatrixrowcoll_bst
2076 !-------------------------------------------------------------------------------
2077 
2078 !-------------------------------------------------------------------------------
2082 !-------------------------------------------------------------------------------
2083 SUBROUTINE setmatrixrowcold_bst( globrow, Dj )
2084  !-----------------------------------------------------------------------------
2085  ! Formal arguments
2086  !-----------------------------------------------------------------------------
2087  INTEGER, INTENT(IN) :: globrow
2088  REAL(dp), INTENT(IN) :: Dj(:)
2089  !-----------------------------------------------------------------------------
2090  ! Local variables
2091  !-----------------------------------------------------------------------------
2092  REAL(dp) :: val
2093  INTEGER :: i, globrowoff
2094 
2095  !-------------------------------------------
2096  ! Sanity checks on globrow
2097  !-------------------------------------------
2098  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2099  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColD: Bad input globrow ',globrow; CALL fl(ofu)
2100  stop
2101  END IF
2102  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2103  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColD: Non-local globrow ',globrow; CALL fl(ofu)
2104  stop
2105  END IF
2106 
2107  globrowoff = globrow-startglobrow+1
2108 
2109  !-------------------------------------------
2110  ! Copy given D column into allocated matrix
2111  !-------------------------------------------
2112  lelement(1, globrowoff)%D(:,1) = dj(:)
2113 
2114  !-------------------------------------------
2115  !Save a copy of this original problem
2116  !-------------------------------------------
2117  orig(globrowoff)%D(:,1) = lelement(1,globrowoff)%D(:,1)
2118 
2119  !-------------------------------------------
2120  matdirtied = .true.
2121 
2122 END SUBROUTINE setmatrixrowcold_bst
2123 !-------------------------------------------------------------------------------
2124 
2125 
2126 !-------------------------------------------------------------------------------
2130 !-------------------------------------------------------------------------------
2131 SUBROUTINE setmatrixrowcolu_bst( globrow, Uj )
2132  !-----------------------------------------------------------------------------
2133  ! Formal arguments
2134  !-----------------------------------------------------------------------------
2135  INTEGER, INTENT(IN) :: globrow
2136  REAL(dp), INTENT(IN) :: Uj(:)
2137  !-----------------------------------------------------------------------------
2138  ! Local variables
2139  !-----------------------------------------------------------------------------
2140  INTEGER :: i, globrowoff
2141 
2142  !-------------------------------------------
2143  ! Sanity checks on globrow
2144  !-------------------------------------------
2145  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2146  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColU: Bad input globrow ',globrow; CALL fl(ofu)
2147  stop
2148  END IF
2149  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2150  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColU: Non-local globrow ',globrow; CALL fl(ofu)
2151  stop
2152  END IF
2153 
2154  globrowoff = globrow-startglobrow+1
2155 
2156  !-------------------------------------------
2157  ! Copy given U column into allocated matrix
2158  !-------------------------------------------
2159  IF ( globrow .EQ. n ) THEN
2160  lelement(1, globrowoff)%U(:,1) = zero
2161  ELSE
2162  lelement(1, globrowoff)%U(:,1) = uj
2163  END IF
2164 
2165  !-------------------------------------------
2166  !Save a copy of this original problem
2167  !-------------------------------------------
2168  orig(globrowoff)%U(:,1) = lelement(1,globrowoff)%U(:,1)
2169 
2170  !-------------------------------------------
2171  matdirtied = .true.
2172 
2173 END SUBROUTINE setmatrixrowcolu_bst
2174 !-------------------------------------------------------------------------------
2175 
2176 !-------------------------------------------------------------------------------
2180 !-------------------------------------------------------------------------------
2181 SUBROUTINE setmatrixrhs_bst( globrow, b )
2182  !-----------------------------------------------------------------------------
2183  ! Formal arguments
2184  !-----------------------------------------------------------------------------
2185  INTEGER, INTENT(IN) :: globrow
2186  REAL(dp), INTENT(IN) :: b(:)
2187  !-----------------------------------------------------------------------------
2188  ! Local variables
2189  !-----------------------------------------------------------------------------
2190  REAL(dp) :: val
2191  INTEGER :: i, globrowoff
2192 
2193  !-------------------------------------------
2194  ! Sanity checks on globrow
2195  !-------------------------------------------
2196  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2197  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRHS: Bad input globrow ',globrow; CALL fl(ofu)
2198  stop
2199  END IF
2200  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2201  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRHS: Non-local globrow ',globrow; CALL fl(ofu)
2202  stop
2203  END IF
2204 
2205  globrowoff = globrow-startglobrow+1
2206 
2207  !-------------------------------------------
2208  ! Copy given values into allocated RHS
2209  !-------------------------------------------
2210  DO i = 1, m
2211  val = b(i)
2212  lelement(1, globrowoff)%b(i,1) = val
2213  END DO
2214 
2215  !-------------------------------------------
2216  !Save a copy of this original problem
2217  !-------------------------------------------
2218  orig(globrowoff)%b = lelement(1,globrowoff)%b
2219 
2220  !-------------------------------------------
2221  rhsdirtied = .true.
2222 
2223 END SUBROUTINE setmatrixrhs_bst
2224 !-------------------------------------------------------------------------------
2225 
2226 !-------------------------------------------------------------------------------
2230 !-------------------------------------------------------------------------------
2231 SUBROUTINE getmatrixrowcoll( globrow, Lj, j )
2232  !-----------------------------------------------------------------------------
2233  ! Formal arguments
2234  !-----------------------------------------------------------------------------
2235  INTEGER, INTENT(IN) :: globrow
2236  REAL(dp), INTENT(OUT) :: Lj(:)
2237  INTEGER, INTENT(IN) :: j
2238  !-----------------------------------------------------------------------------
2239  ! Local variables
2240  !-----------------------------------------------------------------------------
2241  REAL(dp) :: val
2242  INTEGER :: i, globrowoff
2243 
2244  !-------------------------------------------
2245  ! Sanity checks on globrow
2246  !-------------------------------------------
2247  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2248  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad input globrow ',globrow; CALL fl(ofu)
2249  stop
2250  END IF
2251  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2252  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Non-local globrow ',globrow; CALL fl(ofu)
2253  stop
2254  END IF
2255  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2256  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2257  stop
2258  END IF
2259 
2260  globrowoff = globrow-startglobrow+1
2261 
2262  !-------------------------------------------
2263  ! Copy L from matrix
2264  !-------------------------------------------
2265  DO i = 1, m
2266  IF ( globrow .EQ. 1 ) THEN
2267  val = zero
2268  ELSE
2269  val = lelement(1, globrowoff)%L(i,j)
2270  END IF
2271  lj(i) = val
2272  END DO
2273 
2274 END SUBROUTINE getmatrixrowcoll
2275 
2276 !-------------------------------------------------------------------------------
2280 !-------------------------------------------------------------------------------
2281 SUBROUTINE getmatrixrowcold( globrow, Dj, j )
2282  !-----------------------------------------------------------------------------
2283  ! Formal arguments
2284  !-----------------------------------------------------------------------------
2285  INTEGER, INTENT(IN) :: globrow
2286  REAL(dp), INTENT(OUT) :: Dj(:)
2287  INTEGER, INTENT(IN) :: j
2288  !-----------------------------------------------------------------------------
2289  ! Local variables
2290  !-----------------------------------------------------------------------------
2291  REAL(dp) :: val
2292  INTEGER :: i, globrowoff
2293 
2294  !-------------------------------------------
2295  ! Sanity checks on globrow
2296  !-------------------------------------------
2297  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2298  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Bad input globrow ',globrow; CALL fl(ofu)
2299  stop
2300  END IF
2301  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2302  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Non-local globrow ',globrow; CALL fl(ofu)
2303  stop
2304  END IF
2305  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2306  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Bad j column ',j; CALL fl(ofu)
2307  stop
2308  END IF
2309 
2310  globrowoff = globrow-startglobrow+1
2311 
2312  !-------------------------------------------
2313  ! Copy D from matrix
2314  !-------------------------------------------
2315  DO i = 1, m
2316  val = lelement(1, globrowoff)%D(i,j)
2317  dj(i) = val
2318  END DO
2319 
2320 END SUBROUTINE getmatrixrowcold
2321 
2322 !-------------------------------------------------------------------------------
2326 !-------------------------------------------------------------------------------
2327 SUBROUTINE getmatrixrowcolu( globrow, Uj, j )
2328  !-----------------------------------------------------------------------------
2329  ! Formal arguments
2330  !-----------------------------------------------------------------------------
2331  INTEGER, INTENT(IN) :: globrow
2332  REAL(dp), INTENT(OUT) :: Uj(:)
2333  INTEGER, INTENT(IN) :: j
2334  !-----------------------------------------------------------------------------
2335  ! Local variables
2336  !-----------------------------------------------------------------------------
2337  REAL(dp) :: val
2338  INTEGER :: i, globrowoff
2339 
2340  !-------------------------------------------
2341  ! Sanity checks on globrow
2342  !-------------------------------------------
2343  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2344  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColU: Bad input globrow ',globrow; CALL fl(ofu)
2345  stop
2346  END IF
2347  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2348  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColU: Non-local globrow ',globrow; CALL fl(ofu)
2349  stop
2350  END IF
2351  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2352  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2353  stop
2354  END IF
2355 
2356  globrowoff = globrow-startglobrow+1
2357 
2358  !-------------------------------------------
2359  ! Copy U from matrix
2360  !-------------------------------------------
2361  DO i = 1, m
2362  IF ( globrow .EQ. n ) THEN
2363  val = zero
2364  ELSE
2365  val = lelement(1, globrowoff)%U(i,j)
2366  END IF
2367  uj(i) = val
2368  END DO
2369 
2370 END SUBROUTINE getmatrixrowcolu
2371 
2372 !-------------------------------------------------------------------------------
2376 !-------------------------------------------------------------------------------
2377 SUBROUTINE getmatrixrhs( globrow, b )
2378  !-----------------------------------------------------------------------------
2379  ! Formal arguments
2380  !-----------------------------------------------------------------------------
2381  INTEGER, INTENT(IN) :: globrow
2382  REAL(dp), INTENT(OUT) :: b(:)
2383  !-----------------------------------------------------------------------------
2384  ! Local variables
2385  !-----------------------------------------------------------------------------
2386  REAL(dp) :: val
2387  INTEGER :: i, globrowoff
2388 
2389  !-------------------------------------------
2390  ! Sanity checks on globrow
2391  !-------------------------------------------
2392  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2393  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRHS: Bad input globrow ',globrow; CALL fl(ofu)
2394  stop
2395  END IF
2396  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2397  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRHS: Non-local globrow ',globrow; CALL fl(ofu)
2398  stop
2399  END IF
2400 
2401  globrowoff = globrow-startglobrow+1
2402 
2403  !-------------------------------------------
2404  ! Copy RHS
2405  !-------------------------------------------
2406  DO i = 1, m
2407  val = lelement(1, globrowoff)%b(i,1)
2408  b(i) = val
2409  END DO
2410 
2411 END SUBROUTINE getmatrixrhs
2412 
2413 !-------------------------------------------------------------------------------
2417 !-------------------------------------------------------------------------------
2418 SUBROUTINE getsolutionvector_bst( globrow, x )
2419  !-----------------------------------------------------------------------------
2420  ! Formal arguments
2421  !-----------------------------------------------------------------------------
2422  INTEGER, INTENT(IN) :: globrow
2423  REAL(dp), INTENT(OUT) :: x(:)
2424  !-----------------------------------------------------------------------------
2425  ! Local variables
2426  !-----------------------------------------------------------------------------
2427  REAL(dp) :: val
2428  INTEGER :: i
2429 
2430  !-------------------------------------------
2431  ! Sanity checks on globrow
2432  !-------------------------------------------
2433  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2434  IF(kpdbg) WRITE(ofu,*) 'SetSolutionVector: Bad input globrow ',globrow; CALL fl(ofu)
2435  stop
2436  END IF
2437  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2438  IF(kpdbg) WRITE(ofu,*) 'SetSolutionVector: Non-local globrow ',globrow; CALL fl(ofu)
2439  stop
2440  END IF
2441 
2442  !-------------------------------------------
2443  ! Copy solution into given vector
2444  !-------------------------------------------
2445  DO i = 1, m
2446  val = selement(globrow)%x(i)
2447  x(i) = val
2448  END DO
2449 
2450 END SUBROUTINE getsolutionvector_bst
2451 
2452 !-------------------------------------------------------------------------------
2456 !-------------------------------------------------------------------------------
2457 SUBROUTINE setidentitytestcase
2458  !-----------------------------------------------------------------------------
2459  ! Local variables
2460  !-----------------------------------------------------------------------------
2461  INTEGER :: i, j, globrow, globrowoff
2462 
2463  IF(kpdbg) WRITE(ofu,*) '------ Using Identity Test Case ------'; CALL fl(ofu)
2464 
2465  !-----------------------------------------
2466  !Initialize the arrays with identity
2467  !-----------------------------------------
2468  DO globrow = startglobrow, endglobrow, 1
2469  globrowoff = globrow-startglobrow+1
2470  DO i = 1, m
2471  DO j = 1, m
2472  lelement(1, globrowoff)%L(i,j) = zero
2473  IF ( i .EQ. j ) THEN
2474  lelement(1, globrowoff)%D(i,j) = one
2475  ELSE
2476  lelement(1, globrowoff)%D(i,j) = zero
2477  END IF
2478  lelement(1, globrowoff)%U(i,j) = zero
2479  END DO
2480  lelement(1, globrowoff)%b(i,1) = one
2481  END DO
2482  !-------------------------------------------
2483  !Save a copy of this original problem
2484  !-------------------------------------------
2485  orig(globrowoff)%L = lelement(1,globrowoff)%L
2486  orig(globrowoff)%D = lelement(1,globrowoff)%D
2487  orig(globrowoff)%U = lelement(1,globrowoff)%U
2488  orig(globrowoff)%b = lelement(1,globrowoff)%b
2489  END DO
2490 
2491  !-------------------------------------------
2492  matdirtied = .true.
2493  rhsdirtied = .true.
2494 
2495 END SUBROUTINE setidentitytestcase
2496 
2497 !-------------------------------------------------------------------------------
2502 !-------------------------------------------------------------------------------
2503 SUBROUTINE setidentityrhs
2504  !-----------------------------------------------------------------------------
2505  ! Local variables
2506  !-----------------------------------------------------------------------------
2507  INTEGER :: i, j, globrow, globrowoff
2508 
2509  IF(kpdbg) WRITE(ofu,*) '------ Using Identity Test Case RHS ------'; CALL fl(ofu)
2510 
2511  !-----------------------------------------
2512  !Initialize the arrays with identity
2513  !-----------------------------------------
2514  DO globrow = startglobrow, endglobrow, 1
2515  globrowoff = globrow-startglobrow+1
2516  DO i = 1, m
2517  lelement(1, globrowoff)%b(i,1) = one
2518  END DO
2519  !-------------------------------------------
2520  !Save a copy of this original RHS
2521  !-------------------------------------------
2522  orig(globrowoff)%b = lelement(1,globrowoff)%b
2523  END DO
2524 
2525  !-------------------------------------------
2526  rhsdirtied = .true.
2527 
2528 END SUBROUTINE setidentityrhs
2529 
2530 !-------------------------------------------------------------------------------
2534 !-------------------------------------------------------------------------------
2535 SUBROUTINE setrandomtestcase
2536  !-----------------------------------------------------------------------------
2537  ! Local variables
2538  !-----------------------------------------------------------------------------
2539  REAL(dp) :: randval, rmin, rmax
2540  INTEGER :: rngwidth
2541  INTEGER, ALLOCATABLE :: seeds(:)
2542  INTEGER :: i, j, globrow, globrowoff
2543 
2544  IF(kpdbg) WRITE(ofu,*) '------ Using Random Test Case ------'; CALL fl(ofu)
2545 
2546  !-----------------------------------------
2547  !Initialize random number seeds
2548  !-----------------------------------------
2549  CALL random_seed( size=rngwidth)
2550  ALLOCATE( seeds(rngwidth) )
2551  DO i = 1, rngwidth
2552  seeds(i) = i*(rank+100)*p
2553  END DO
2554  CALL random_seed( put=seeds(1:rngwidth) )
2555  DEALLOCATE( seeds )
2556 
2557  !-----------------------------------------
2558  !Write the header information for problem
2559  !-----------------------------------------
2560  IF( writeproblemfile ) THEN !Write the dimensions N, NZ
2561  IF(kpdbg) WRITE(pfu,*) n*m; CALL fl(pfu)
2562  IF(kpdbg) WRITE(pfu,*) (3*n-2)*m*m; CALL fl(pfu)
2563  END IF
2564 
2565  !-----------------------------------------
2566  !Initialize the arrays with random values
2567  !-----------------------------------------
2568  rmin = 0.01_rprec
2569  rmax = one
2570  DO globrow = startglobrow, endglobrow, 1
2571  globrowoff = globrow-startglobrow+1
2572  DO i = 1, m
2573  DO j = 1, m
2574  IF ( globrow .EQ. 1 ) THEN
2575  randval = zero
2576  ELSE
2577  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2578  IF( writeproblemfile ) THEN
2579  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow-2)*m+j, randval
2580  END IF
2581  END IF
2582  lelement(1, globrowoff)%L(i,j) = randval
2583 
2584  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2585  lelement(1, globrowoff)%D(i,j) = randval
2586  IF( writeproblemfile ) THEN
2587  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow-1)*m+j, randval
2588  END IF
2589 
2590  IF ( globrow .EQ. n ) THEN
2591  randval = zero
2592  ELSE
2593  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2594  IF( writeproblemfile ) THEN
2595  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow)*m+j, randval
2596  END IF
2597  END IF
2598  lelement(1, globrowoff)%U(i,j) = randval
2599  END DO
2600  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2601  lelement(1, globrowoff)%b(i,1) = randval
2602  END DO
2603 
2604  !-------------------------------------------
2605  !Save a copy of this original problem
2606  !-------------------------------------------
2607  orig(globrowoff)%L = lelement(1,globrowoff)%L
2608  orig(globrowoff)%D = lelement(1,globrowoff)%D
2609  orig(globrowoff)%U = lelement(1,globrowoff)%U
2610  orig(globrowoff)%b = lelement(1,globrowoff)%b
2611  END DO
2612 
2613  IF (writeproblemfile) THEN
2614  DO globrow = startglobrow, endglobrow, 1
2615  globrowoff = globrow-startglobrow+1
2616  DO i = 1, m
2617  IF(kpdbg) WRITE(pfu,*) lelement(1,globrowoff)%b(i,1); CALL fl(pfu)
2618  END DO
2619  END DO
2620  END IF
2621 
2622  !-------------------------------------------
2623  matdirtied = .true.
2624  rhsdirtied = .true.
2625 
2626 END SUBROUTINE setrandomtestcase
2627 
2628 !-------------------------------------------------------------------------------
2633 !-------------------------------------------------------------------------------
2634 SUBROUTINE setrandomrhs( randseedoff )
2635  !-----------------------------------------------------------------------------
2636  ! Formal arguments
2637  !-----------------------------------------------------------------------------
2638  INTEGER, INTENT(IN) :: randseedoff
2639  !-----------------------------------------------------------------------------
2640  ! Local variables
2641  !-----------------------------------------------------------------------------
2642  REAL(dp) :: randval, rmin, rmax
2643  INTEGER :: rngwidth
2644  INTEGER, ALLOCATABLE :: seeds(:)
2645  INTEGER :: i, j, globrow, globrowoff
2646 
2647  IF(kpdbg) WRITE(ofu,*) '------ Using Random Test Case RHS ------'; CALL fl(ofu)
2648 
2649  !-----------------------------------------
2650  !Initialize random number seeds
2651  !-----------------------------------------
2652  CALL random_seed( size=rngwidth)
2653  ALLOCATE( seeds(rngwidth) )
2654  DO i = 1, rngwidth
2655  seeds(i) = i*(rank+100+34+randseedoff)*p
2656  END DO
2657  CALL random_seed( put=seeds(1:rngwidth) )
2658  DEALLOCATE( seeds )
2659 
2660  !-----------------------------------------
2661  !Initialize the arrays with random values
2662  !-----------------------------------------
2663  rmin = 0.01_dp
2664  rmax = one
2665  DO globrow = startglobrow, endglobrow, 1
2666  globrowoff = globrow-startglobrow+1
2667  DO i = 1, m
2668  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
2669  lelement(1, globrowoff)%b(i,1) = randval
2670  END DO
2671 
2672  !-------------------------------------------
2673  !Save a copy of this original RHS
2674  !-------------------------------------------
2675  orig(globrowoff)%b = lelement(1,globrowoff)%b
2676  END DO
2677 
2678  !-------------------------------------------
2679  rhsdirtied = .true.
2680 
2681 END SUBROUTINE setrandomrhs
2682 
2683 !-------------------------------------------------------------------------------
2687 !-------------------------------------------------------------------------------
2688 SUBROUTINE finalize_bst( do_mpifinalize )
2689  !-----------------------------------------------------------------------------
2690  ! Formal arguments
2691  !-----------------------------------------------------------------------------
2692  LOGICAL, INTENT(IN) :: do_mpifinalize
2693  !-----------------------------------------------------------------------------
2694  ! Local variables
2695  !-----------------------------------------------------------------------------
2696  INTEGER :: level
2697  INTEGER :: globrow
2698  INTEGER :: mpierr
2699  INTEGER :: tempinvcount, tempmulcount, tempsolcount
2700 
2701  !-----------------------------------------------------------------------------
2702  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
2703  IF(kpdbg) WRITE(ofu,*) '------ Finalizing start ------'; CALL fl(ofu)
2704 
2705  !-----------------------------------------------------------------------------
2706  CALL plbfinalize()
2707 
2708  !-----------------------------------------------------------------------------
2709  ! Deallocate mats and hats, if any
2710  !-----------------------------------------------------------------------------
2711  IF ( ALLOCATED(lelement) ) THEN
2712  DO level = lbound(lelement,1), ubound(lelement,1)
2713  DO globrow = lbound(lelement,2), ubound(lelement,2)
2714  IF ( ALLOCATED(lelement(level,globrow)%L) ) THEN
2715  DEALLOCATE( lelement(level, globrow)%L )
2716  END IF
2717  IF ( ALLOCATED(lelement(level,globrow)%D) ) THEN
2718  DEALLOCATE( lelement(level, globrow)%D )
2719  END IF
2720  IF ( ALLOCATED(lelement(level,globrow)%U) ) THEN
2721  DEALLOCATE( lelement(level, globrow)%U )
2722  END IF
2723  IF ( ALLOCATED(lelement(level,globrow)%b) ) THEN
2724  DEALLOCATE( lelement(level, globrow)%b )
2725  END IF
2726  IF ( ALLOCATED(lelement(level,globrow)%pivot) ) THEN
2727  DEALLOCATE( lelement(level, globrow)%pivot )
2728  END IF
2729  END DO
2730  END DO
2731  DEALLOCATE(lelement)
2732  END IF
2733 
2734  IF ( ALLOCATED(orig) ) THEN
2735  DO globrow = lbound(orig,1), ubound(orig,1)
2736  IF ( ALLOCATED(orig(globrow)%L) ) THEN
2737  DEALLOCATE( orig(globrow)%L )
2738  END IF
2739  IF ( ALLOCATED(orig(globrow)%D) ) THEN
2740  DEALLOCATE( orig(globrow)%D )
2741  END IF
2742  IF ( ALLOCATED(orig(globrow)%U) ) THEN
2743  DEALLOCATE( orig(globrow)%U )
2744  END IF
2745  IF ( ALLOCATED(orig(globrow)%b) ) THEN
2746  DEALLOCATE( orig(globrow)%b )
2747  END IF
2748  IF ( ALLOCATED(orig(globrow)%pivot) ) THEN
2749  DEALLOCATE( orig(globrow)%pivot )
2750  END IF
2751  END DO
2752  DEALLOCATE(orig)
2753  END IF
2754 
2755  IF ( ALLOCATED(selement) ) THEN
2756  DO globrow = 1, n, 1
2757  IF ( ALLOCATED(selement(globrow)%x) ) THEN
2758  DEALLOCATE( selement(globrow)%x )
2759  END IF
2760  END DO
2761  DEALLOCATE(selement)
2762  END IF
2763 
2764  !-----------------------------------------------------------------------------
2765 #if defined(MPI_OPT)
2766  IF ( usebarriers ) THEN
2767  IF(kpdbg) WRITE(ofu,*) 'Barrier in finalize'; CALL fl(ofu)
2768  CALL mpi_barrier( ns_comm, mpierr )
2769  IF(kpdbg) WRITE(ofu,*) 'Done barrier in finalize'; CALL fl(ofu)
2770  END IF
2771  !-----------------------------------------------------------------------------
2772  IF ( do_mpifinalize ) THEN
2773  CALL mpi_finalize( mpierr )
2774  END IF
2775 #endif
2776 
2777  bcyclic_comp_time = bcyclic_comp_time + (tottime-totcommtime)
2778  bcyclic_comm_time = bcyclic_comm_time + totcommtime
2779  !-----------------------------------------------------------------------------
2780  tempmulcount=totmatmulcount; IF ( tempmulcount .LE. 0 ) tempmulcount = 1
2781  tempinvcount=totinvcount; IF ( tempinvcount .LE. 0 ) tempinvcount = 1
2782  tempsolcount=totmatsolcount; IF ( tempsolcount .LE. 0 ) tempsolcount = 1
2783  IF(kpdbg) WRITE(ofu,*) 'N=', n, ' M=', m, ' P=', p, ' rank=', rank
2784  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory ', membytes/1e6, ' MB'
2785  IF(kpdbg) WRITE(ofu,'(A,F8.4,A)') 'Computation ', tottime-totcommtime,' sec'
2786  IF(kpdbg) WRITE(ofu,'(A,F8.4,A)') 'Communication ', totcommtime,' sec'
2787  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix inv ',totinvcount, ' * ', &
2788  totinvtime/tempinvcount,' sec = ', totinvtime, ' sec'
2789  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix mul ',totmatmulcount, ' * ', &
2790  totmatmultime/tempmulcount, ' sec = ', totmatmultime, ' sec'
2791  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix sol ',totmatsolcount, ' * ', &
2792  totmatsoltime/tempsolcount, ' sec = ', totmatsoltime, ' sec'
2793  IF(kpdbg) WRITE(ofu,*) 'Finalized rank ', rank
2794  IF(kpdbg) WRITE(ofu,*) '------ Finalization end ------'; CALL fl(ofu)
2795 END SUBROUTINE finalize_bst
2796 
2797 !-------------------------------------------------------------------------------
2801 !-------------------------------------------------------------------------------
2802 LOGICAL FUNCTION iseven( num )
2803  INTEGER, INTENT(IN) :: num
2804  iseven = mod(num,2) .EQ. 0
2805 END FUNCTION iseven
2806 
2807 !-------------------------------------------------------------------------------
2811 !-------------------------------------------------------------------------------
2812 LOGICAL FUNCTION isodd( num )
2813  INTEGER, INTENT(IN) :: num
2814  isodd = (.NOT. iseven(num))
2815 END FUNCTION isodd
2816 
2817 !-------------------------------------------------------------------------------
2821 !-------------------------------------------------------------------------------
2822 FUNCTION lr2gr( locrow, level ) result ( globrow )
2823  !-----------------------------------------------------------------------------
2824  ! Formal arguments
2825  !-----------------------------------------------------------------------------
2826  INTEGER, INTENT(IN) :: locrow
2827  INTEGER, INTENT(IN) :: level
2828  INTEGER :: globrow
2829 
2830  !-----------------------------------------------------------------------------
2831  ! Local variables
2832  !-----------------------------------------------------------------------------
2833  INTEGER :: i
2834 
2835  !-----------------------------------------------------------------------------
2836  ! Invert the rule r=(r+1)/2 backward for all levels
2837  !-----------------------------------------------------------------------------
2838  globrow = locrow
2839  levelloop: DO i = level-1, 1, -1
2840  globrow = 2*globrow - 1
2841  END DO levelloop
2842 
2843  !-----------------------------------------------------------------------------
2844  ! DEBUGGING: Just double-check
2845  !-----------------------------------------------------------------------------
2846  IF ( ((2.0 ** (level-1)) * (locrow-1) + 1) .NE. globrow ) THEN
2847  stop !Consistency check failed
2848  END IF
2849 
2850  RETURN
2851 END FUNCTION lr2gr
2852 
2853 !-------------------------------------------------------------------------------
2859 !-------------------------------------------------------------------------------
2860 FUNCTION gr2lr( globrow, level ) result ( locrow )
2861  !-----------------------------------------------------------------------------
2862  ! Formal arguments
2863  !-----------------------------------------------------------------------------
2864  INTEGER, INTENT(IN) :: globrow
2865  INTEGER, INTENT(IN) :: level
2866  INTEGER :: locrow
2867 
2868  !-----------------------------------------------------------------------------
2869  ! Local variables
2870  !-----------------------------------------------------------------------------
2871  INTEGER :: i
2872 
2873  !-----------------------------------------------------------------------------
2874  ! Apply the rule r=(r+1)/2 for all odd-numbered rows
2875  ! Any time r becomes even, give up
2876  !-----------------------------------------------------------------------------
2877  locrow = globrow
2878  levelloop: DO i = 1, level-1, 1
2879  IF ( iseven(locrow) ) THEN !Even-numbered rows don't go any further in level
2880  locrow = 0
2881  EXIT levelloop !Stop looping
2882  END IF
2883  locrow = (locrow+1) / 2
2884  END DO levelloop
2885 
2886  RETURN
2887 END FUNCTION gr2lr
2888 
2889 !-------------------------------------------------------------------------------
2893 !-------------------------------------------------------------------------------
2894 FUNCTION gr2rank( globrow ) result ( outrank )
2895  !-----------------------------------------------------------------------------
2896  ! Formal arguments
2897  !-----------------------------------------------------------------------------
2898  INTEGER, INTENT(IN) :: globrow
2899  INTEGER :: outrank
2900 
2901  !-----------------------------------------------------------------------------
2902  ! Local variables
2903  !-----------------------------------------------------------------------------
2904  INTEGER :: m !Average integral number of rows per processor
2905  INTEGER :: spill !Spill over beyond prefect loading of rows to processors
2906 
2907  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2908  outrank = -1
2909  ELSE
2910  m = n / p
2911  spill = mod( n, p )
2912  IF ( globrow .LE. (m+1)*spill ) THEN
2913  outrank = (globrow-1)/(m+1)
2914  ELSE
2915  outrank = (globrow-1 - (m+1)*spill)/m + spill
2916  END IF
2917  END IF
2918 
2919  RETURN
2920 END FUNCTION gr2rank
2921 
2922 !-------------------------------------------------------------------------------
2926 !-------------------------------------------------------------------------------
2927 FUNCTION lr2rank( locrow, level ) result ( outrank )
2928  !-----------------------------------------------------------------------------
2929  ! Formal arguments
2930  !-----------------------------------------------------------------------------
2931  INTEGER, INTENT(IN) :: locrow
2932  INTEGER, INTENT(IN) :: level
2933  INTEGER :: outrank
2934 
2935  !-----------------------------------------------------------------------------
2936  ! Local variables
2937  !-----------------------------------------------------------------------------
2938  INTEGER :: globrow !Global row number of given locrow
2939 
2940  globrow = lr2gr( locrow, level )
2941  outrank = gr2rank( globrow )
2942 
2943  RETURN
2944 END FUNCTION lr2rank
2945 
2946 !-------------------------------------------------------------------------------
2952 !-------------------------------------------------------------------------------
2953 #if defined(MPI_OPT)
2954 SUBROUTINE computeforwardoddrowhats(locrow, level, startlocrow, endlocrow,bonly)
2955  !-----------------------------------------------------------------------------
2956  ! Formal arguments
2957  !-----------------------------------------------------------------------------
2958  INTEGER, INTENT(IN) :: locrow
2959  INTEGER, INTENT(IN) :: level
2960  INTEGER, INTENT(IN) :: startlocrow
2961  INTEGER, INTENT(IN) :: endlocrow
2962  LOGICAL, INTENT(IN) :: bonly
2963  !-----------------------------------------------------------------------------
2964  ! Local variables
2965  !-----------------------------------------------------------------------------
2966  INTEGER :: globrow
2967  INTEGER :: globrowoff
2968  INTEGER :: prevglobrowoff
2969  INTEGER :: nextglobrowoff
2970  INTEGER :: row
2971 
2972  !-------------------------------------------------------------
2973  ! Nothing to do if last level
2974  !-------------------------------------------------------------
2975  IF ( level .GE. nlevels ) THEN
2976  IF(kpdbg) WRITE(ofu,*) 'mat-mul last lvl: no op '; CALL fl(ofu)
2977  IF ( rank .NE. 0 ) THEN
2978  IF(kpdbg) WRITE(ofu,*) 'mat-mul last lvl: impossible ',rank; CALL fl(ofu)
2979  stop
2980  END IF
2981  RETURN
2982  END IF
2983 
2984  !-------------------------------------------------------------
2985  ! Sanity check
2986  !-------------------------------------------------------------
2987  IF ( iseven( locrow ) ) THEN
2988  IF(kpdbg) WRITE(ofu,*) 'mat-mul: odd only ',globrow,' ',locrow; CALL fl(ofu); stop
2989  END IF
2990  IF ( .NOT. ( (startlocrow .LE. locrow) .AND. (locrow .LE. endlocrow) ) ) THEN
2991  IF(kpdbg) WRITE(ofu,*) 'mat-mul: locrow prob ',globrow,' ',locrow;CALL fl(ofu);stop
2992  END IF
2993 
2994  !-------------------------------------------------------------
2995  globrow = lr2gr( locrow, level )
2996  globrowoff = globrow - startglobrow + 1
2997  IF(kpdbg) WRITE(ofu,*) ' Compute mat-mul ',globrow,' ',locrow; CALL fl(ofu)
2998 
2999  !-------------------------------------------------------------
3000  ! Ensure there are mats at (thislevel, thisrow)
3001  !-------------------------------------------------------------
3002  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D ) ) THEN
3003  IF(kpdbg) WRITE(ofu,*) ' Copying bad D'; CALL fl(ofu); stop
3004  END IF
3005  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L ) ) THEN
3006  IF(kpdbg) WRITE(ofu,*) ' Copying bad L'; CALL fl(ofu); stop
3007  END IF
3008  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U ) ) THEN
3009  IF(kpdbg) WRITE(ofu,*) ' Copying bad U'; CALL fl(ofu); stop
3010  END IF
3011  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b ) ) THEN
3012  IF(kpdbg) WRITE(ofu,*) ' Copying bad b'; CALL fl(ofu); stop
3013  END IF
3014 
3015  !-------------------------------------------------------------
3016  ! Ensure there is space at (nextlevel, thisrow)
3017  !-------------------------------------------------------------
3018  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%D ) ) THEN
3019  IF(kpdbg) WRITE(ofu,*) ' Copying bad D +1'; CALL fl(ofu); stop
3020  END IF
3021  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%L ) ) THEN
3022  IF(kpdbg) WRITE(ofu,*) ' Copying bad L +1'; CALL fl(ofu); stop
3023  END IF
3024  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%U ) ) THEN
3025  IF(kpdbg) WRITE(ofu,*) ' Copying bad U +1'; CALL fl(ofu); stop
3026  END IF
3027  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%b ) ) THEN
3028  IF(kpdbg) WRITE(ofu,*) ' Copying bad b +1'; CALL fl(ofu); stop
3029  END IF
3030 
3031  !-------------------------------------------------------------
3032  ! Copy over the D and b as is first
3033  !-------------------------------------------------------------
3034  IF ( .NOT. bonly ) THEN
3035  lelement(level+1,globrowoff)%D = lelement(level,globrowoff)%D
3036  END IF
3037  lelement(level+1,globrowoff)%b = lelement(level,globrowoff)%b
3038 
3039  !-------------------------------------------------------------
3040  ! Check if this row has a top neighbor at this level
3041  !-------------------------------------------------------------
3042  IF ( lr2rank(locrow-1,level) .LT. 0 ) THEN
3043  ! No rank before ours
3044  IF ( .NOT. bonly ) THEN
3045  lelement(level+1,globrowoff)%U = 0
3046  IF(kpdbg) WRITE(ofu,*) ' ZERO L'; CALL fl(ofu)
3047  END IF
3048  ELSE
3049  IF ( locrow .EQ. startlocrow ) THEN
3050  prevglobrowoff = 0
3051  ELSE IF ( locrow .GT. startlocrow ) THEN
3052  prevglobrowoff = lr2gr( locrow-1, level ) - startglobrow + 1
3053  ELSE
3054  IF(kpdbg) WRITE(ofu,*) 'mat-mul: impossible prev'; CALL fl(ofu); stop
3055  END IF
3056 
3057  IF ( .NOT. bonly ) THEN
3058  !------------------------------------------------
3059  ! D = D - L*Uhat(prev)
3060  !------------------------------------------------
3061  CALL bsystemclock(mattimer1)
3062  CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3063  lelement(level,prevglobrowoff)%U, &
3064  one, lelement(level+1,globrowoff)%D )
3065  CALL bsystemclock(mattimer2)
3066  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3067  IF(kpdbg) WRITE(ofu,*) ' Dtop'; CALL fl(ofu)
3068 
3069  !------------------------------------------------
3070  ! L = -L*Lhat(prev)
3071  !------------------------------------------------
3072  CALL bsystemclock(mattimer1)
3073  CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3074  lelement(level,prevglobrowoff)%L, &
3075  zero, lelement(level+1,globrowoff)%L )
3076  CALL bsystemclock(mattimer2)
3077  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3078  IF(kpdbg) WRITE(ofu,*) ' L'; CALL fl(ofu)
3079  END IF
3080 
3081  !------------------------------------------------
3082  ! b = b - L*bhat(prev)
3083  !------------------------------------------------
3084  CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
3085  lelement(level,prevglobrowoff)%b(:,1), &
3086  one, lelement(level+1,globrowoff)%b(:,1) )
3087  IF(kpdbg) WRITE(ofu,*) ' btop'; CALL fl(ofu)
3088  END IF
3089 
3090  !-------------------------------------------------------------
3091  ! Check if this row has a bottom neighbor at this level
3092  !-------------------------------------------------------------
3093  IF ( lr2rank(locrow+1,level) .LT. 0 ) THEN
3094  ! No rank after ours
3095  IF ( .NOT. bonly ) THEN
3096  lelement(level+1,globrowoff)%U = 0
3097  IF(kpdbg) WRITE(ofu,*) ' ZERO U'; CALL fl(ofu)
3098  END IF
3099  ELSE
3100  IF ( locrow .EQ. endlocrow ) THEN
3101  nextglobrowoff = endglobrow - startglobrow + 2
3102  ELSE IF ( locrow .LT. endlocrow ) THEN
3103  nextglobrowoff = lr2gr( locrow+1, level ) - startglobrow + 1
3104  ELSE
3105  IF(kpdbg) WRITE(ofu,*) 'mat-mul: impossible next'; CALL fl(ofu); stop
3106  END IF
3107 
3108  IF ( .NOT. bonly ) THEN
3109  !------------------------------------------------
3110  ! D = D - U*Lhat(next)
3111  !------------------------------------------------
3112  CALL bsystemclock(mattimer1)
3113  CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3114  lelement(level,nextglobrowoff)%L, &
3115  one, lelement(level+1,globrowoff)%D )
3116  CALL bsystemclock(mattimer2)
3117  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3118  IF(kpdbg) WRITE(ofu,*) ' Dbot'; CALL fl(ofu)
3119 
3120  !------------------------------------------------
3121  ! U = -U*Uhat(next)
3122  !------------------------------------------------
3123  CALL bsystemclock(mattimer1)
3124  CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3125  lelement(level,nextglobrowoff)%U, &
3126  zero, lelement(level+1,globrowoff)%U )
3127  CALL bsystemclock(mattimer2)
3128  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3129  IF(kpdbg) WRITE(ofu,*) ' U'; CALL fl(ofu)
3130  END IF
3131 
3132  !------------------------------------------------
3133  ! b = b - U*bhat(next)
3134  !------------------------------------------------
3135  CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
3136  lelement(level,nextglobrowoff)%b(:,1), &
3137  one, lelement(level+1,globrowoff)%b(:,1) )
3138  IF(kpdbg) WRITE(ofu,*) ' bbot'; CALL fl(ofu)
3139  END IF
3140 END SUBROUTINE computeforwardoddrowhats
3141 
3142 !-------------------------------------------------------------------------------
3146 !-------------------------------------------------------------------------------
3147 SUBROUTINE forwardsolve_bst
3148  !-----------------------------------------------------------------------------
3149  ! Local variables
3150  !-----------------------------------------------------------------------------
3151  INTEGER :: level
3152  INTEGER :: locrow
3153  INTEGER :: globrow
3154  INTEGER :: globrowoff
3155  INTEGER :: startlocrow
3156  INTEGER :: endlocrow
3157  INTEGER :: rowoffset
3158  INTEGER :: nbrrank
3159  INTEGER :: nbrmpireqindx
3160  INTEGER :: nbrmpireqcnt
3161  INTEGER :: nbrmpinirecvs
3162  INTEGER :: nbrmpinisends
3163  INTEGER :: nbrmpireq(6)
3164  INTEGER :: nbrmpierr(6)
3165  INTEGER :: mpiwaiterr
3166  INTEGER :: mpierr
3167  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
3168  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
3169  INTEGER :: waitmatchindex
3170  INTEGER :: stag
3171  INTEGER :: reqi
3172  INTEGER :: blaserr
3173  DOUBLE PRECISION :: fwton, fwtoff
3174  INTEGER :: i
3175 
3176 ! DO globrow = startglobrow, endglobrow
3177 ! globrowoff = globrow-startglobrow+1
3178 ! DO i = 1, M
3179 ! WRITE(100+rank,*) "L:", globrow, i,lelement(1,globrowoff)%L(i,1)
3180 ! CALL FLUSH(100+rank)
3181 ! WRITE(100+rank,*) "D:", globrow, i,lelement(1,globrowoff)%D(i,1)
3182 ! CALL FLUSH(100+rank)
3183 ! WRITE(100+rank,*) "U:", globrow, i,lelement(1,globrowoff)%U(i,1)
3184 ! CALL FLUSH(100+rank)
3185 ! END DO
3186 ! END DO
3187 ! CALL STOPMPI(923)
3188 
3189  CALL bsystemclock(globtimer1)
3190  CALL bsystemclock(fwton)
3191 
3192  !-----------------------------------------------------------------------------
3193  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3194  IF(kpdbg) WRITE(ofu,*) '------ Forward solve start ------'; CALL fl(ofu)
3195  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3196 
3197  !-----------------------------------------------------------------------------
3198  ! Forward solving loop in which even rows compute inverses and odd rows
3199  ! compute matrix-matrix products for new terms
3200  !-----------------------------------------------------------------------------
3201  forwardsolveloop: DO level = 1, nlevels, 1
3202 
3203  !SKS: t0
3204  CALL bsystemclock(skston)
3205  !---------------------------------------------------------------------------
3206  IF ( usebarriers ) THEN
3207  IF(kpdbg) WRITE(ofu,*) 'Barrier at forward level ', level; CALL fl(ofu)
3208  CALL mpi_barrier( ns_comm, mpierr )
3209  IF(kpdbg) WRITE(ofu,*) 'Done barrier at forward level ', level; CALL fl(ofu)
3210  END IF
3211 
3212  !---------------------------------------------------------------------------
3213  !Determine start and end local rows at current level
3214  !---------------------------------------------------------------------------
3215  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
3216  endlocrow = -1 !Start with an invalid value (less than startlocrow)
3217  DO globrow = startglobrow, endglobrow, 1
3218  locrow = gr2lr( globrow, level ) !globrow's place at current level
3219  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
3220  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
3221  startlocrow = locrow
3222  END IF
3223  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
3224  endlocrow = locrow
3225  END IF
3226  END IF
3227  END DO
3228 
3229  !---------------------------------------------------------------------------
3230  !We may have run out of work; see if we have a valid range of rows left
3231  !---------------------------------------------------------------------------
3232  IF ( startlocrow .GT. endlocrow ) THEN !No rows left at/above this level
3233  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at forward level ', level; CALL fl(ofu)
3234  CALL plbforwardinitializelevel( level, .false. )
3235  cycle forwardsolveloop !Move on to next level; don't EXIT (for barriers)
3236  ELSE
3237  CALL plbforwardinitializelevel( level, .true. )
3238  END IF
3239  CALL bsystemclock(skstoff)
3240  t(1) = t(1) + skstoff-skston
3241  skston=skstoff
3242 
3243  !SKS: t1
3244 
3245  IF(kpdbg) WRITE(ofu,*) '**** Forward level ', level, ' ****'; CALL fl(ofu)
3246 
3247  !---------------------------------------------------------------------------
3248  !Allocate memory for incoming nbr values, if any, at this level
3249  !---------------------------------------------------------------------------
3250  IF ( isodd(startlocrow) .AND. &
3251  (lr2rank(startlocrow-1,level) .GE. 0) ) THEN
3252  globrowoff = 0
3253  IF ( .NOT. ALLOCATED( lelement(level, globrowoff)%L ) ) THEN
3254  ALLOCATE( lelement(level, globrowoff)%L(m,1) )
3255  ALLOCATE( lelement(level, globrowoff)%U(m,1) )
3256  ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3257  END IF
3258  lelement(level, globrowoff)%L = 0
3259  lelement(level, globrowoff)%U = 0
3260  lelement(level, globrowoff)%b = 0
3261  END IF
3262  IF ( isodd(endlocrow) .AND. &
3263  (lr2rank(endlocrow+1,level) .GE. 0) ) THEN
3264  globrowoff = endglobrow-startglobrow+2
3265  IF ( .NOT. ALLOCATED( lelement(level, globrowoff)%L ) ) THEN
3266  ALLOCATE( lelement(level, globrowoff)%L(m,1) )
3267  ALLOCATE( lelement(level, globrowoff)%U(m,1) )
3268  ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3269  END IF
3270  lelement(level, globrowoff)%L = 0
3271  lelement(level, globrowoff)%U = 0
3272  lelement(level, globrowoff)%b = 0
3273  END IF
3274  !SKS: t2
3275  CALL bsystemclock(skstoff)
3276  t(2) = t(2) + skstoff-skston
3277  skston=skstoff
3278 
3279  !---------------------------------------------------------------------------
3280  !Allocate memory at next level for those rows that are odd at current level
3281  !---------------------------------------------------------------------------
3282  IF ( level .LT. nlevels ) THEN
3283  DO locrow = startlocrow, endlocrow, 1
3284  IF ( isodd(locrow) ) THEN
3285  globrow = lr2gr( locrow, level )
3286  globrowoff = globrow-startglobrow+1
3287  IF ( .NOT. ALLOCATED( lelement(level+1, globrowoff)%L ) ) THEN
3288  ALLOCATE( lelement(level+1, globrowoff)%L(m,1) )
3289  ALLOCATE( lelement(level+1, globrowoff)%D(m,1) )
3290  ALLOCATE( lelement(level+1, globrowoff)%U(m,1) )
3291  ALLOCATE( lelement(level+1, globrowoff)%b(m,1) )
3292  END IF
3293  lelement(level+1, globrowoff)%L = 0
3294  lelement(level+1, globrowoff)%D = 0
3295  lelement(level+1, globrowoff)%U = 0
3296  lelement(level+1, globrowoff)%b = 0
3297  END IF
3298  END DO
3299  END IF
3300  !SKS: t3
3301  CALL bsystemclock(skstoff)
3302  t(3) = t(3) + skstoff-skston
3303  skston=skstoff
3304 
3305  !---------------------------------------------------------------------------
3306  !Reset requests
3307  !---------------------------------------------------------------------------
3308  DO nbrmpireqindx = 1, 6
3309  nbrmpireq(nbrmpireqindx) = mpi_request_null
3310  END DO
3311 
3312  !---------------------------------------------------------------------------
3313  !Pre-post the expected MPI receives via a non-blocking recv primitive
3314  !Pre-posting can help overlap communication with computing inverses
3315  !---------------------------------------------------------------------------
3316  nbrmpireqindx = 1
3317  nbrmpinirecvs = 0
3318  nbrrank = lr2rank(startlocrow-1,level)
3319  IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
3320  !-------------------------------------------------------------------------
3321  !Our top row at this level is odd and top row's previous is valid
3322  !We will get the previous (even-numbered) row's L-hat, U-hat, and b-hat
3323  !-------------------------------------------------------------------------
3324  globrowoff = 0
3325  IF(kpdbg) WRITE(ofu,*) ' Irecv ',startlocrow-1,' ',nbrrank,' '; CALL fl(ofu)
3326 
3327  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3328  IF(kpdbg) WRITE(ofu,*)' Irecv bad L'; CALL fl(ofu); stop
3329  END IF
3330  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3331  IF(kpdbg) WRITE(ofu,*)' Irecv bad U'; CALL fl(ofu); stop
3332  END IF
3333  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3334  IF(kpdbg) WRITE(ofu,*)' Irecv bad b'; CALL fl(ofu); stop
3335  END IF
3336 
3337  CALL bsystemclock(loctimer1)
3338  CALL mpi_irecv( lelement(level, globrowoff)%L, m, mpi_real8, &
3339  nbrrank, 1, ns_comm, nbrmpireq(nbrmpireqindx), &
3340  nbrmpierr(nbrmpireqindx) )
3341  nbrmpireqindx = nbrmpireqindx + 1
3342  nbrmpinirecvs = nbrmpinirecvs + 1
3343  IF(kpdbg) WRITE(ofu,*) ' Irecv 1 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3344 
3345  CALL mpi_irecv( lelement(level, globrowoff)%U, m, mpi_real8, &
3346  nbrrank, 2, ns_comm, nbrmpireq(nbrmpireqindx), &
3347  nbrmpierr(nbrmpireqindx))
3348  nbrmpireqindx = nbrmpireqindx + 1
3349  nbrmpinirecvs = nbrmpinirecvs + 1
3350  IF(kpdbg) WRITE(ofu,*) ' Irecv 2 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3351 
3352  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3353  nbrrank, 3, ns_comm, nbrmpireq(nbrmpireqindx), &
3354  nbrmpierr(nbrmpireqindx))
3355  nbrmpireqindx = nbrmpireqindx + 1
3356  nbrmpinirecvs = nbrmpinirecvs + 1
3357  IF(kpdbg) WRITE(ofu,*) ' Irecv 3 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3358  CALL bsystemclock(loctimer2)
3359  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3360  END IF
3361  !SKS: t4
3362  CALL bsystemclock(skstoff)
3363  t(4) = t(4) + skstoff-skston
3364  skston=skstoff
3365 
3366  nbrrank = lr2rank(endlocrow+1,level)
3367  IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
3368  !-------------------------------------------------------------------------
3369  !Our bottom row at this level is odd and bottom row's next is valid
3370  !We will get the next (even-numbered) row's L-hat, U-hat, and b-hat
3371  !-------------------------------------------------------------------------
3372  globrowoff = endglobrow-startglobrow+2
3373  IF(kpdbg) WRITE(ofu,*) ' Irecv ',endlocrow+1,' ', nbrrank; CALL fl(ofu)
3374 
3375  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3376  IF(kpdbg) WRITE(ofu,*)'Irecv bad L'; CALL fl(ofu); stop
3377  END IF
3378  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3379  IF(kpdbg) WRITE(ofu,*)'Irecv bad U'; CALL fl(ofu); stop
3380  END IF
3381  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3382  IF(kpdbg) WRITE(ofu,*)'Irecv bad b'; CALL fl(ofu); stop
3383  END IF
3384 
3385  CALL bsystemclock(loctimer1)
3386  CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3387  nbrrank, 4, ns_comm, nbrmpireq(nbrmpireqindx), &
3388  nbrmpierr(nbrmpireqindx))
3389  nbrmpireqindx = nbrmpireqindx + 1
3390  nbrmpinirecvs = nbrmpinirecvs + 1
3391  IF(kpdbg) WRITE(ofu,*) ' Irecv 4 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3392 
3393  CALL mpi_irecv( lelement(level, globrowoff)%U, m, mpi_real8, &
3394  nbrrank, 5, ns_comm, nbrmpireq(nbrmpireqindx), &
3395  nbrmpierr(nbrmpireqindx))
3396  nbrmpireqindx = nbrmpireqindx + 1
3397  nbrmpinirecvs = nbrmpinirecvs + 1
3398  IF(kpdbg) WRITE(ofu,*) ' Irecv 5 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3399 
3400  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3401  nbrrank, 6, ns_comm, nbrmpireq(nbrmpireqindx), &
3402  nbrmpierr(nbrmpireqindx))
3403  nbrmpireqindx = nbrmpireqindx + 1
3404  nbrmpinirecvs = nbrmpinirecvs + 1
3405  IF(kpdbg) WRITE(ofu,*) ' Irecv 6 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3406  CALL bsystemclock(loctimer2)
3407  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3408  END IF
3409  !SKS: t5
3410  CALL bsystemclock(skstoff)
3411  t(5) = t(5) + skstoff-skston
3412  skston=skstoff
3413 
3414  !---------------------------------------------------------------------------
3415  !Compute inverses for even-numbered rows at this level
3416  !---------------------------------------------------------------------------
3417  nbrmpinisends = 0
3418  DO locrow = startlocrow, endlocrow, 1
3419  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
3420  globrow = lr2gr( locrow, level )
3421  globrowoff = globrow-startglobrow+1
3422 
3423  IF(kpdbg) WRITE(ofu,*) ' Compute even hats ',globrow,' ', locrow; CALL fl(ofu)
3424 
3425  !-----------------------------------------------------------------------
3426  !Sanity checks
3427  !-----------------------------------------------------------------------
3428  IF ( level .EQ. nlevels ) THEN
3429  IF ( (rank .NE. 0) .OR. &
3430  (startlocrow .NE. endlocrow) .OR. &
3431  (locrow .NE. 1) .OR. (globrow .NE. 1) ) THEN
3432  IF(kpdbg) WRITE(ofu,*) ' EVEN ERROR ',globrow,' ', locrow; CALL fl(ofu)
3433  stop
3434  END IF
3435  END IF
3436  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D) ) THEN
3437  IF(kpdbg) WRITE(ofu,*)'Chats bad D'; CALL fl(ofu); stop
3438  END IF
3439  IF ( .NOT. ALLOCATED(lelement(1,globrowoff)%pivot) ) THEN
3440  IF(kpdbg) WRITE(ofu,*)'Chats bad pivot'; CALL fl(ofu); stop
3441  END IF
3442  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3443  IF(kpdbg) WRITE(ofu,*)'Chats bad L'; CALL fl(ofu); stop
3444  END IF
3445  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3446  IF(kpdbg) WRITE(ofu,*)'Chats bad U'; CALL fl(ofu); stop
3447  END IF
3448  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3449  IF(kpdbg) WRITE(ofu,*)'Chats bad b'; CALL fl(ofu); stop
3450  END IF
3451 
3452  !-----------------------------------------------------------------------
3453  !Do LU factorization of D (in place into D itself)
3454  !-----------------------------------------------------------------------
3455  CALL bsystemclock(mattimer1)
3456  CALL plbdgetrf( lelement(level,globrowoff)%D, &
3457  lelement(1,globrowoff)%pivot, blaserr )
3458  CALL bsystemclock(mattimer2)
3459  dgetrf_time = dgetrf_time + (mattimer2-mattimer1)
3460  CALL chargetime( totinvtime, mattimer2, mattimer1, totinvcount )
3461  IF (blaserr .NE. 0) THEN
3462  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRF failed ', blaserr; CALL fl(ofu); stop
3463  END IF
3464 
3465  !-----------------------------------------------------------------------
3466  !Compute hats D^(-1)L and D^(-1)U if not last level
3467  !-----------------------------------------------------------------------
3468  IF ( level .LT. nlevels ) THEN
3469  CALL bsystemclock(mattimer1)
3470  DO i=1, m
3471  lelement(level,globrowoff)%L(i,1)=lelement(level,globrowoff)%L(i,1)/&
3472  lelement(level,globrowoff)%D(i,1)
3473  END DO
3474  blaserr=0
3475  CALL bsystemclock(mattimer2)
3476  dgetrs_time = dgetrs_time + (mattimer2-mattimer1)
3477  CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3478  IF (blaserr .NE. 0) THEN
3479  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRS L failed'; CALL fl(ofu); stop
3480  END IF
3481  CALL bsystemclock(mattimer1)
3482  DO i=1, m
3483  lelement(level,globrowoff)%U(i,1)=lelement(level,globrowoff)%U(i,1)/&
3484  lelement(level,globrowoff)%D(i,1)
3485  END DO
3486  blaserr=0
3487  CALL bsystemclock(mattimer2)
3488  dgetrs_time = dgetrs_time + (mattimer2-mattimer1)
3489  CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3490  IF (blaserr .NE. 0) THEN
3491  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRS U failed'; CALL fl(ofu); stop
3492  END IF
3493  END IF
3494 
3495  !-----------------------------------------------------------------------
3496  !Compute b-hats D^(-1)b
3497  !-----------------------------------------------------------------------
3498  CALL bsystemclock(mattimer1)
3499  DO i=1, m
3500  lelement(level,globrowoff)%b(i,1)=lelement(level,globrowoff)%b(i,1)/&
3501  lelement(level,globrowoff)%D(i,1)
3502  END DO
3503  blaserr=0
3504  IF (blaserr .NE. 0) THEN
3505  IF(kpdbg) WRITE(ofu,*) 'PLBDGETRS b failed'; CALL fl(ofu); stop
3506  END IF
3507  CALL bsystemclock(mattimer2)
3508  dgetrs_time = dgetrs_time + (mattimer2-mattimer1)
3509 
3510  !-----------------------------------------------------------------------
3511  !Send my Lhats, Uhats and bhats to neighbor row if that neighbor exists
3512  !and that neighbor row is hosted outside this rank
3513  !-----------------------------------------------------------------------
3514  DO rowoffset = -1, 1, 2
3515  nbrrank = lr2rank(locrow+rowoffset, level)
3516  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
3517  CALL bsystemclock(loctimer1)
3518  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3519  IF(kpdbg) WRITE(ofu,*)'ISend bad L'; CALL fl(ofu); stop
3520  END IF
3521  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3522  IF(kpdbg) WRITE(ofu,*)'ISend bad U'; CALL fl(ofu); stop
3523  END IF
3524  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3525  IF(kpdbg) WRITE(ofu,*)'ISend bad b'; CALL fl(ofu); stop
3526  END IF
3527 
3528  IF(kpdbg) WRITE(ofu,*)' ISend ',globrow,' ',locrow,' ',nbrrank;CALL fl(ofu)
3529  globrowoff = globrow-startglobrow+1
3530  stag = (((1-rowoffset))/2)*3
3531  CALL mpi_isend( lelement(level,globrowoff)%L, m, mpi_real8, &
3532  nbrrank, stag+1, ns_comm, &
3533  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3534  nbrmpireqindx = nbrmpireqindx + 1
3535  nbrmpinisends = nbrmpinisends + 1
3536  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+1,' ',globrowoff; CALL fl(ofu)
3537 
3538  CALL mpi_isend( lelement(level,globrowoff)%U, m, mpi_real8, &
3539  nbrrank, stag+2, ns_comm, &
3540  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3541  nbrmpireqindx = nbrmpireqindx + 1
3542  nbrmpinisends = nbrmpinisends + 1
3543  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+2,' ',globrowoff; CALL fl(ofu)
3544 
3545  CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
3546  nbrrank, stag+3, ns_comm, &
3547  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3548  nbrmpireqindx = nbrmpireqindx + 1
3549  nbrmpinisends = nbrmpinisends + 1
3550  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+3,' ',globrowoff; CALL fl(ofu)
3551  CALL bsystemclock(loctimer2)
3552  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3553  END IF
3554  END DO
3555  END IF
3556  END DO
3557  !SKS: t6
3558  CALL bsystemclock(skstoff)
3559  t(6) = t(6) + skstoff-skston
3560  skston=skstoff
3561 
3562  !---------------------------------------------------------------------------
3563  !Compute the matrix-matrix multiplications for non-boundary odd rows
3564  !---------------------------------------------------------------------------
3565  DO locrow = startlocrow, endlocrow, 1
3566  globrow = lr2gr( locrow, level )
3567  IF ( isodd( locrow ) .AND. (level .NE. nlevels) ) THEN
3568  IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) ) THEN
3569  IF(kpdbg) WRITE(ofu,*) ' Precomp mat-mul ',globrow,' ',locrow; CALL fl(ofu)
3570  CALL computeforwardoddrowhats( locrow, level, &
3571  startlocrow, endlocrow, .false. )
3572  END IF
3573  END IF
3574  END DO
3575  !SKS: t7
3576  CALL bsystemclock(skstoff)
3577  t(7) = t(7) + skstoff-skston
3578  skston=skstoff
3579 
3580  !---------------------------------------------------------------------------
3581  !We have to wait for incoming even-numbered neighboring rows, before
3582  !computing inverses for the odd boundaries, if any.
3583  !Complete the send-receive of inverses, by doing a "wait all"
3584  !---------------------------------------------------------------------------
3585  nbrmpireqcnt = nbrmpireqindx - 1
3586  IF ( nbrmpireqcnt .GT. 0 ) THEN
3587  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
3588  CALL bsystemclock(loctimer1)
3589  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
3590  CALL bsystemclock(loctimer2)
3591  waitall_time=waitall_time+(loctimer2-loctimer1)
3592  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3593  END IF
3594  !SKS: t8
3595  CALL bsystemclock(skstoff)
3596  t(8) = t(8) + skstoff-skston
3597  skston=skstoff
3598 
3599  !---------------------------------------------------------------------------
3600  !Now we can compute inverses for odd boundaries, if any
3601  !---------------------------------------------------------------------------
3602  IF ( isodd(startlocrow) ) THEN
3603  globrow = lr2gr( startlocrow, level )
3604  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',startlocrow;CALL fl(ofu)
3605  CALL computeforwardoddrowhats( startlocrow, level, &
3606  startlocrow, endlocrow, .false. )
3607  END IF
3608  IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) ) THEN
3609  globrow = lr2gr( endlocrow, level )
3610  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',endlocrow;CALL fl(ofu)
3611  CALL computeforwardoddrowhats( endlocrow, level, &
3612  startlocrow, endlocrow, .false. )
3613  END IF
3614  !SKS: t9
3615  CALL bsystemclock(skstoff)
3616  t(9) = t(9) + skstoff-skston
3617  skston=skstoff
3618 
3619  !SKS: t10
3620  CALL bsystemclock(skstoff)
3621  t(10) = t(10) + skstoff-skston
3622  skston=skstoff
3623  END DO forwardsolveloop
3624 
3625  !-----------------------------------------------------------------------------
3626  matdirtied = .false.
3627  rhsdirtied = .false.
3628 
3629  !-----------------------------------------------------------------------------
3630  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
3631  IF(kpdbg) WRITE(ofu,*) '------ Forward solve end ------'; CALL fl(ofu)
3632 
3633  CALL bsystemclock(globtimer2)
3634  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
3635 
3636  !SKS: t11
3637  CALL bsystemclock(fwtoff)
3638  t(11) = t(11) + fwtoff-skston
3639  forwardsolveloop_time = forwardsolveloop_time + (fwtoff-fwton)
3640 
3641 END SUBROUTINE forwardsolve_bst
3642 
3643 !-------------------------------------------------------------------------------
3648 !-------------------------------------------------------------------------------
3649 SUBROUTINE forwardupdateb
3650  !-----------------------------------------------------------------------------
3651  ! Local variables
3652  !-----------------------------------------------------------------------------
3653  INTEGER :: level
3654  INTEGER :: locrow
3655  INTEGER :: globrow
3656  INTEGER :: globrowoff
3657  INTEGER :: startlocrow
3658  INTEGER :: endlocrow
3659  INTEGER :: rowoffset
3660  INTEGER :: nbrrank
3661  INTEGER :: nbrmpireqindx
3662  INTEGER :: nbrmpireqcnt
3663  INTEGER :: nbrmpinirecvs
3664  INTEGER :: nbrmpinisends
3665  INTEGER :: nbrmpireq(6)
3666  INTEGER :: nbrmpierr(6)
3667  INTEGER :: mpiwaiterr
3668  INTEGER :: mpierr
3669  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
3670  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
3671  INTEGER :: waitmatchindex
3672  INTEGER :: stag
3673  INTEGER :: reqi
3674  INTEGER :: blaserr
3675  INTEGER :: i
3676 
3677  !-----------------------------------------------------------------------------
3678  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3679  IF(kpdbg) WRITE(ofu,*) '------ Forward updateb start ------'; CALL fl(ofu)
3680  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3681 
3682  !-----------------------------------------------------------------------------
3683  IF ( matdirtied ) THEN
3684  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3685  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied matrix ------'; CALL fl(ofu)
3686  IF(kpdbg) WRITE(ofu,*) ' ------ Forward solve, not updateb... ------'; CALL fl(ofu)
3687  CALL forwardsolve_bst()
3688  RETURN
3689  END IF
3690 
3691  CALL bsystemclock(globtimer1)
3692 
3693  !-----------------------------------------------------------------------------
3694  ! Forward solving loop in which even rows have already computed inverses and
3695  ! odd rows recompute matrix-vector products with new rhs
3696  !-----------------------------------------------------------------------------
3697  forwardsolveloop: DO level = 1, nlevels, 1
3698 
3699  !---------------------------------------------------------------------------
3700  IF ( usebarriers ) THEN
3701  IF(kpdbg) WRITE(ofu,*) 'Barrier at forward updateb level ', level; CALL fl(ofu)
3702  CALL mpi_barrier( ns_comm, mpierr )
3703  IF(kpdbg) WRITE(ofu,*) 'Done barrier at forward updateb level ', level; CALL fl(ofu)
3704  END IF
3705 
3706  !---------------------------------------------------------------------------
3707  !Determine start and end local rows at current level
3708  !---------------------------------------------------------------------------
3709  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
3710  endlocrow = -1 !Start with an invalid value (less than startlocrow)
3711  DO globrow = startglobrow, endglobrow, 1
3712  locrow = gr2lr( globrow, level ) !globrow's place at current level
3713  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
3714  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
3715  startlocrow = locrow
3716  END IF
3717  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
3718  endlocrow = locrow
3719  END IF
3720  END IF
3721  END DO
3722 
3723  !---------------------------------------------------------------------------
3724  !We may have run out of work; see if we have a valid range of rows left
3725  !---------------------------------------------------------------------------
3726  IF ( startlocrow .GT. endlocrow ) THEN !No rows left at/above this level
3727  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at forward updateb level ',level; CALL fl(ofu)
3728  CALL plbforwardinitializelevel( level, .false. )
3729  cycle forwardsolveloop !Move on to next level; don't EXIT (for barriers)
3730  ELSE
3731  CALL plbforwardinitializelevel( level, .true. )
3732  END IF
3733 
3734  IF(kpdbg) WRITE(ofu,*) '**** Forward updateb level ', level, ' ****'; CALL fl(ofu)
3735 
3736  !---------------------------------------------------------------------------
3737  !Memory has already been allocated in ForwardSolve for incoming nbr values,
3738  !if any, at this level
3739  !---------------------------------------------------------------------------
3740  IF ( isodd(startlocrow) .AND. &
3741  (lr2rank(startlocrow-1,level) .GE. 0) ) THEN
3742  globrowoff = 0
3743  IF( .NOT. ( ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
3744  ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
3745  ALLOCATED( lelement(level, globrowoff)%b ) ) ) THEN
3746  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +0 memory error'; CALL fl(ofu); stop
3747  END IF
3748  lelement(level, globrowoff)%b = 0
3749  END IF
3750  IF ( isodd(endlocrow) .AND. &
3751  (lr2rank(endlocrow+1,level) .GE. 0) ) THEN
3752  globrowoff = endglobrow-startglobrow+2
3753  IF ( .NOT. ( ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
3754  ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
3755  ALLOCATED( lelement(level, globrowoff)%b ) ) ) THEN
3756  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +2 memory error'; CALL fl(ofu); stop
3757  END IF
3758  lelement(level, globrowoff)%b = 0
3759  END IF
3760 
3761  !---------------------------------------------------------------------------
3762  !Memory has already been allocated in ForwardSolve at next level for those
3763  !rows that are odd at current level
3764  !---------------------------------------------------------------------------
3765  IF ( level .LT. nlevels ) THEN
3766  DO locrow = startlocrow, endlocrow, 1
3767  IF ( isodd(locrow) ) THEN
3768  globrow = lr2gr( locrow, level )
3769  globrowoff = globrow-startglobrow+1
3770  IF ( .NOT. ( ALLOCATED( lelement(level+1, globrowoff)%L ) .AND. &
3771  ALLOCATED( lelement(level+1, globrowoff)%D ) .AND. &
3772  ALLOCATED( lelement(level+1, globrowoff)%U ) .AND. &
3773  ALLOCATED( lelement(level+1, globrowoff)%b ) ) ) THEN
3774  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +1 memory error'; CALL fl(ofu); stop
3775  END IF
3776  lelement(level+1, globrowoff)%b = 0
3777  END IF
3778  END DO
3779  END IF
3780 
3781  !---------------------------------------------------------------------------
3782  !Reset requests
3783  !---------------------------------------------------------------------------
3784  DO nbrmpireqindx = 1, 6
3785  nbrmpireq(nbrmpireqindx) = mpi_request_null
3786  END DO
3787 
3788  !---------------------------------------------------------------------------
3789  !Pre-post the expected MPI receives via a non-blocking recv primitive
3790  !Pre-posting can help overlap communication with computing inverses
3791  !---------------------------------------------------------------------------
3792  nbrmpireqindx = 1
3793  nbrmpinirecvs = 0
3794  nbrrank = lr2rank(startlocrow-1,level)
3795  IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
3796  !-------------------------------------------------------------------------
3797  !Our top row at this level is odd and top row's previous is valid
3798  !We will get the previous (even-numbered) row's L-hat, U-hat, and b-hat
3799  !-------------------------------------------------------------------------
3800  globrowoff = 0
3801  IF(kpdbg) WRITE(ofu,*) ' Irecv ',startlocrow-1,' ',nbrrank,' '; CALL fl(ofu)
3802 
3803  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3804  IF(kpdbg) WRITE(ofu,*)' Irecv bad L'; CALL fl(ofu); stop
3805  END IF
3806  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3807  IF(kpdbg) WRITE(ofu,*)' Irecv bad U'; CALL fl(ofu); stop
3808  END IF
3809  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3810  IF(kpdbg) WRITE(ofu,*)' Irecv bad b'; CALL fl(ofu); stop
3811  END IF
3812 
3813  CALL bsystemclock(loctimer1)
3814  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3815  nbrrank, 3, ns_comm, nbrmpireq(nbrmpireqindx), &
3816  nbrmpierr(nbrmpireqindx))
3817  nbrmpireqindx = nbrmpireqindx + 1
3818  nbrmpinirecvs = nbrmpinirecvs + 1
3819  IF(kpdbg) WRITE(ofu,*) ' Irecv 3 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3820  CALL bsystemclock(loctimer2)
3821  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3822  END IF
3823 
3824  nbrrank = lr2rank(endlocrow+1,level)
3825  IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
3826  !-------------------------------------------------------------------------
3827  !Our bottom row at this level is odd and bottom row's next is valid
3828  !We will get the next (even-numbered) row's L-hat, U-hat, and b-hat
3829  !-------------------------------------------------------------------------
3830  globrowoff = endglobrow-startglobrow+2
3831  IF(kpdbg) WRITE(ofu,*) ' Irecv ',endlocrow+1,' ', nbrrank; CALL fl(ofu)
3832 
3833  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3834  IF(kpdbg) WRITE(ofu,*)'Irecv bad L'; CALL fl(ofu); stop
3835  END IF
3836  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3837  IF(kpdbg) WRITE(ofu,*)'Irecv bad U'; CALL fl(ofu); stop
3838  END IF
3839  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3840  IF(kpdbg) WRITE(ofu,*)'Irecv bad b'; CALL fl(ofu); stop
3841  END IF
3842 
3843  CALL bsystemclock(loctimer1)
3844  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3845  nbrrank, 6, ns_comm, nbrmpireq(nbrmpireqindx), &
3846  nbrmpierr(nbrmpireqindx))
3847  nbrmpireqindx = nbrmpireqindx + 1
3848  nbrmpinirecvs = nbrmpinirecvs + 1
3849  IF(kpdbg) WRITE(ofu,*) ' Irecv 6 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3850  CALL bsystemclock(loctimer2)
3851  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3852  END IF
3853 
3854  !---------------------------------------------------------------------------
3855  !Inverses have already been computed for even-numbered rows at this level
3856  !---------------------------------------------------------------------------
3857  nbrmpinisends = 0
3858  DO locrow = startlocrow, endlocrow, 1
3859  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
3860  globrow = lr2gr( locrow, level )
3861  globrowoff = globrow-startglobrow+1
3862 
3863  IF(kpdbg) WRITE(ofu,*) ' Computed even hats ',globrow,' ', locrow; CALL fl(ofu)
3864 
3865  !-----------------------------------------------------------------------
3866  !Sanity checks
3867  !-----------------------------------------------------------------------
3868  IF ( level .EQ. nlevels ) THEN
3869  IF ( (rank .NE. 0) .OR. &
3870  (startlocrow .NE. endlocrow) .OR. &
3871  (locrow .NE. 1) .OR. (globrow .NE. 1) ) THEN
3872  IF(kpdbg) WRITE(ofu,*) ' EVEN ERROR ',globrow,' ', locrow; CALL fl(ofu)
3873  stop
3874  END IF
3875  END IF
3876  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D) ) THEN
3877  IF(kpdbg) WRITE(ofu,*)'Chats bad D'; CALL fl(ofu); stop
3878  END IF
3879  IF ( .NOT. ALLOCATED(lelement(1,globrowoff)%pivot) ) THEN
3880  IF(kpdbg) WRITE(ofu,*)'Chats bad pivot'; CALL fl(ofu); stop
3881  END IF
3882  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3883  IF(kpdbg) WRITE(ofu,*)'Chats bad L'; CALL fl(ofu); stop
3884  END IF
3885  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3886  IF(kpdbg) WRITE(ofu,*)'Chats bad U'; CALL fl(ofu); stop
3887  END IF
3888  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3889  IF(kpdbg) WRITE(ofu,*)'Chats bad b'; CALL fl(ofu); stop
3890  END IF
3891 
3892  !-----------------------------------------------------------------------
3893  !LU factorization of D has already been performed in place into D itself
3894  !-----------------------------------------------------------------------
3895 
3896  !-----------------------------------------------------------------------
3897  !Compute b-hats D^(-1)b
3898  !-----------------------------------------------------------------------
3899  CALL bsystemclock(mattimer1)
3900  DO i=1, m
3901  lelement(level,globrowoff)%b(i,1)=lelement(level,globrowoff)%b(i,1)/&
3902  lelement(level,globrowoff)%D(i,1)
3903  END DO
3904  blaserr=0
3905  IF (blaserr .NE. 0) THEN
3906  IF(kpdbg) WRITE(ofu,*) 'PLBDGETRS b failed'; CALL fl(ofu); stop
3907  END IF
3908  CALL bsystemclock(mattimer2)
3909  dgetrs_time = dgetrs_time + (mattimer2-mattimer1)
3910 
3911  !-----------------------------------------------------------------------
3912  !Send my bhats to neighbor row if that neighbor exists
3913  !and that neighbor row is hosted outside this rank
3914  !-----------------------------------------------------------------------
3915  DO rowoffset = -1, 1, 2
3916  nbrrank = lr2rank(locrow+rowoffset, level)
3917  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
3918  CALL bsystemclock(loctimer1)
3919  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3920  IF(kpdbg) WRITE(ofu,*)'ISend bad L'; CALL fl(ofu); stop
3921  END IF
3922  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3923  IF(kpdbg) WRITE(ofu,*)'ISend bad U'; CALL fl(ofu); stop
3924  END IF
3925  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3926  IF(kpdbg) WRITE(ofu,*)'ISend bad b'; CALL fl(ofu); stop
3927  END IF
3928 
3929  IF(kpdbg) WRITE(ofu,*)' ISend ',globrow,' ',locrow,' ',nbrrank;CALL fl(ofu)
3930  globrowoff = globrow-startglobrow+1
3931  stag = (((1-rowoffset))/2)*3
3932  CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
3933  nbrrank, stag+3, ns_comm, &
3934  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3935  nbrmpireqindx = nbrmpireqindx + 1
3936  nbrmpinisends = nbrmpinisends + 1
3937  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+3,' ',globrowoff; CALL fl(ofu)
3938  CALL bsystemclock(loctimer2)
3939  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3940  END IF
3941  END DO
3942  END IF
3943  END DO
3944 
3945  !---------------------------------------------------------------------------
3946  !Compute the matrix-vector multiplications for non-boundary odd rows
3947  !---------------------------------------------------------------------------
3948  DO locrow = startlocrow, endlocrow, 1
3949  globrow = lr2gr( locrow, level )
3950  IF ( isodd( locrow ) .AND. (level .NE. nlevels) ) THEN
3951  IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) ) THEN
3952  IF(kpdbg) WRITE(ofu,*) ' Precomp mat-mul ',globrow,' ',locrow; CALL fl(ofu)
3953  CALL computeforwardoddrowhats( locrow, level, &
3954  startlocrow, endlocrow, .true. )
3955  END IF
3956  END IF
3957  END DO
3958 
3959  !---------------------------------------------------------------------------
3960  !We have to wait for incoming even-numbered neighboring rows, before
3961  !computing inverses for the odd boundaries, if any.
3962  !Complete the send-receive of inverses, by doing a "wait all"
3963  !---------------------------------------------------------------------------
3964  nbrmpireqcnt = nbrmpireqindx - 1
3965  IF ( nbrmpireqcnt .GT. 0 ) THEN
3966  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
3967  CALL bsystemclock(loctimer1)
3968  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
3969  CALL bsystemclock(loctimer2)
3970  waitall_time=waitall_time+(loctimer2-loctimer1)
3971  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3972  END IF
3973 
3974  !---------------------------------------------------------------------------
3975  !Now we can compute inverses for odd boundaries, if any
3976  !---------------------------------------------------------------------------
3977  IF ( isodd(startlocrow) ) THEN
3978  globrow = lr2gr( startlocrow, level )
3979  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',startlocrow;CALL fl(ofu)
3980  CALL computeforwardoddrowhats( startlocrow, level, &
3981  startlocrow, endlocrow, .true. )
3982  END IF
3983  IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) ) THEN
3984  globrow = lr2gr( endlocrow, level )
3985  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',endlocrow;CALL fl(ofu)
3986  CALL computeforwardoddrowhats( endlocrow, level, &
3987  startlocrow, endlocrow, .true. )
3988  END IF
3989  END DO forwardsolveloop
3990 
3991  !-----------------------------------------------------------------------------
3992  rhsdirtied = .false.
3993 
3994  !-----------------------------------------------------------------------------
3995  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
3996  IF(kpdbg) WRITE(ofu,*) '------ updateb solve end ------'; CALL fl(ofu)
3997 
3998  CALL bsystemclock(globtimer2)
3999  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4000 
4001 END SUBROUTINE forwardupdateb
4002 
4003 !-------------------------------------------------------------------------------
4007 !-------------------------------------------------------------------------------
4008 SUBROUTINE backwardsolve_bst
4009  !-----------------------------------------------------------------------------
4010  ! Local variables
4011  !-----------------------------------------------------------------------------
4012  INTEGER :: level
4013  INTEGER :: locrow
4014  INTEGER :: globrow
4015  INTEGER :: globrowoff
4016  INTEGER :: prevglobrow
4017  INTEGER :: nextglobrow
4018  INTEGER :: startlocrow
4019  INTEGER :: endlocrow
4020  INTEGER :: rowoffset
4021  INTEGER :: nbrrank
4022  INTEGER :: nbrmpireqindx
4023  INTEGER :: nbrmpireqcnt
4024  INTEGER :: nbrmpinirecvs
4025  INTEGER :: nbrmpinisends
4026  INTEGER :: nbrmpireq(2)
4027  INTEGER :: nbrmpierr(2)
4028  INTEGER :: mpiwaiterr
4029  INTEGER :: mpierr
4030  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4031  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4032  INTEGER :: waitmatchindex
4033  INTEGER :: stag
4034  INTEGER :: reqi
4035  INTEGER :: blaserr
4036 
4037  CALL bsystemclock(globtimer1)
4038 
4039  !-----------------------------------------------------------------------------
4040  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4041  IF(kpdbg) WRITE(ofu,*) '------ Backward solve start ------'; CALL fl(ofu)
4042 
4043  !-----------------------------------------------------------------------------
4044  IF ( matdirtied ) THEN
4045  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4046  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied matrix; updating... ------'; CALL fl(ofu)
4047  CALL forwardsolve_bst()
4048  END IF
4049  IF ( rhsdirtied ) THEN
4050  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4051  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied RHS; updating... ------'; CALL fl(ofu)
4052  CALL forwardupdateb()
4053  END IF
4054 
4055  !-----------------------------------------------------------------------------
4056  ! Make sure we have the forward solve data
4057  !-----------------------------------------------------------------------------
4058  IF ( (.NOT. ALLOCATED(lelement)) .OR. (.NOT. ALLOCATED(selement)) ) THEN
4059  IF(kpdbg) WRITE(ofu,*) 'Forward not called before backward?'; CALL fl(ofu)
4060  RETURN
4061  END IF
4062 
4063  !-----------------------------------------------------------------------------
4064  ! Backward solving loop in which odd rows communicate their solution to even
4065  ! rows and even rows compute their solution
4066  !-----------------------------------------------------------------------------
4067  backwardsolveloop: DO level = nlevels, 1, -1
4068  !---------------------------------------------------------------------------
4069  IF ( usebarriers ) THEN
4070  IF(kpdbg) WRITE(ofu,*) 'Barrier at backward level ', level; CALL fl(ofu)
4071  CALL mpi_barrier( ns_comm, mpierr )
4072  IF(kpdbg) WRITE(ofu,*) 'Done barrier at backward level ', level; CALL fl(ofu)
4073  END IF
4074 
4075  !---------------------------------------------------------------------------
4076  !Determine start and end local rows at current level
4077  !---------------------------------------------------------------------------
4078  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
4079  endlocrow = -1 !Start with an invalid value (less than startlocrow)
4080  DO globrow = startglobrow, endglobrow, 1
4081  locrow = gr2lr( globrow, level ) !globrow's place at current level
4082  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
4083  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
4084  startlocrow = locrow
4085  END IF
4086  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
4087  endlocrow = locrow
4088  END IF
4089  END IF
4090  END DO
4091 
4092  !---------------------------------------------------------------------------
4093  !We may have nothing to do at this level; if so, just go back another level
4094  !---------------------------------------------------------------------------
4095  IF ( startlocrow .GT. endlocrow ) THEN !No more rows left at this level
4096  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at backward level', level; CALL fl(ofu)
4097  CALL plbbackwardinitializelevel( level, .false. )
4098  cycle backwardsolveloop !Move on to previous level
4099  ELSE
4100  CALL plbbackwardinitializelevel( level, .true. )
4101  END IF
4102 
4103  IF(kpdbg) WRITE(ofu,*) '**** Backward level ', level, ' ****'; CALL fl(ofu)
4104 
4105  !---------------------------------------------------------------------------
4106  !Reset requests
4107  !---------------------------------------------------------------------------
4108  DO nbrmpireqindx = 1, 2
4109  nbrmpireq(nbrmpireqindx) = mpi_request_null
4110  END DO
4111 
4112  !---------------------------------------------------------------------------
4113  !Pre-post the expected MPI receives via a non-blocking recv primitive
4114  !Pre-posting can help overlap communication with computing inverses
4115  !---------------------------------------------------------------------------
4116  nbrmpireqindx = 1
4117  nbrmpinirecvs = 0
4118  nbrrank = lr2rank(startlocrow-1,level)
4119  IF( iseven(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
4120  !-------------------------------------------------------------------------
4121  !Our top row at this level is even and top row's previous is valid
4122  !We will get the previous (odd-numbered) row's solution
4123  !-------------------------------------------------------------------------
4124  stag = 1
4125  globrow = lr2gr(startlocrow-1,level)
4126  IF(kpdbg) WRITE(ofu,*)' Irecv ',startlocrow-1,' ',globrow,' ',nbrrank;CALL fl(ofu)
4127  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4128  ALLOCATE( selement(globrow)%x(m) )
4129  CALL chargememory( m*dpsz )
4130  END IF
4131  CALL bsystemclock(loctimer1)
4132  CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4133  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4134  nbrmpireqindx = nbrmpireqindx + 1
4135  nbrmpinirecvs = nbrmpinirecvs + 1
4136  CALL bsystemclock(loctimer2)
4137  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4138  END IF
4139  nbrrank = lr2rank(endlocrow+1,level)
4140  IF ( iseven(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
4141  !-------------------------------------------------------------------------
4142  !Our bottom row at this level is even and bottom row's next is valid
4143  !We will get the next (odd-numbered) row's solution
4144  !-------------------------------------------------------------------------
4145  stag = 2
4146  globrow = lr2gr(endlocrow+1,level)
4147  IF(kpdbg) WRITE(ofu,*)' Irecv ',endlocrow+1,' ',globrow,' ',nbrrank;CALL fl(ofu)
4148  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4149  ALLOCATE( selement(globrow)%x(m) )
4150  CALL chargememory( m*dpsz )
4151  END IF
4152  CALL bsystemclock(loctimer1)
4153  CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4154  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4155  nbrmpireqindx = nbrmpireqindx + 1
4156  nbrmpinirecvs = nbrmpinirecvs + 1
4157  CALL bsystemclock(loctimer2)
4158  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4159  END IF
4160 
4161  !---------------------------------------------------------------------------
4162  !Send our odd-numbered rows' solutions
4163  !---------------------------------------------------------------------------
4164  nbrmpinisends = 0
4165  DO locrow = startlocrow, endlocrow, 1
4166  IF ( isodd( locrow ) ) THEN
4167  !-----------------------------------------------------------------------
4168  !Send my solution to neighbor row if that neighbor exists
4169  !and is hosted outside this rank; use non-blocking sends
4170  !-----------------------------------------------------------------------
4171  DO rowoffset = -1, 1, 2
4172  nbrrank = lr2rank(locrow+rowoffset, level)
4173  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
4174  IF(kpdbg) WRITE(ofu,*) ' Isend ', locrow, ' ', nbrrank; CALL fl(ofu)
4175  globrow = lr2gr(locrow,level)
4176  IF ( .NOT. ALLOCATED(selement(globrow)%x) ) THEN
4177  IF(kpdbg) WRITE(ofu,*) 'ERROR BISEND AT LEVEL', level; CALL fl(ofu)
4178  IF(kpdbg) WRITE(ofu,*) locrow, ' ', globrow, ' ', nbrrank; CALL fl(ofu)
4179  stop
4180  END IF
4181  stag = (1-rowoffset)/2+1
4182  CALL bsystemclock(loctimer1)
4183  CALL mpi_isend( selement(globrow)%x, m, mpi_real8, &
4184  nbrrank, stag, ns_comm, nbrmpireq(nbrmpireqindx), &
4185  nbrmpierr(nbrmpireqindx) )
4186  nbrmpireqindx = nbrmpireqindx + 1
4187  nbrmpinisends = nbrmpinisends + 1
4188  CALL bsystemclock(loctimer2)
4189  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4190  END IF
4191  END DO
4192  END IF
4193  END DO
4194 
4195  !---------------------------------------------------------------------------
4196  !Complete the send-receive of solutions, by doing an MPI "wait all"
4197  !---------------------------------------------------------------------------
4198  nbrmpireqcnt = nbrmpireqindx - 1
4199  IF ( nbrmpireqcnt .GT. 0 ) THEN
4200  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
4201  CALL bsystemclock(loctimer1)
4202  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4203  CALL bsystemclock(loctimer2)
4204  waitall_time=waitall_time+(loctimer2-loctimer1)
4205  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4206  END IF
4207 
4208  !---------------------------------------------------------------------------
4209  !Compute the solution for even rows at this level
4210  !---------------------------------------------------------------------------
4211  DO locrow = startlocrow, endlocrow, 1
4212  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
4213  globrow = lr2gr( locrow, level )
4214  globrowoff = globrow - startglobrow + 1
4215  IF ( ( level .EQ. nlevels ) .AND. (locrow .NE. 1) ) THEN
4216  IF(kpdbg) WRITE(ofu,*) 'ERROR ',level, ' ',globrow, ' ', locrow; CALL fl(ofu)
4217  stop
4218  END IF
4219 
4220  IF(kpdbg) WRITE(ofu,*) ' Compute solution ', globrow, ' ', locrow; CALL fl(ofu)
4221 
4222  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4223  ALLOCATE( selement(globrow)%x(m) )
4224  CALL chargememory( m*dpsz )
4225  END IF
4226 
4227  !----------------------------------------------------
4228  ! x = b-hat
4229  !----------------------------------------------------
4230  selement(globrow)%x = lelement(level,globrowoff)%b(:,1)
4231 
4232  !----------------------------------------------------
4233  ! x = x - L-hat*x(prev)
4234  !----------------------------------------------------
4235  nbrrank = lr2rank(locrow-1,level)
4236  IF ( nbrrank .GE. 0 ) THEN
4237  prevglobrow = lr2gr(locrow-1,level)
4238  CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
4239  selement(prevglobrow)%x, &
4240  one, selement(globrow)%x )
4241  END IF
4242 
4243  !----------------------------------------------------
4244  ! x = x - U-hat*x(next)
4245  !----------------------------------------------------
4246  nbrrank = lr2rank(locrow+1,level)
4247  IF ( nbrrank .GE. 0 ) THEN
4248  nextglobrow = lr2gr(locrow+1,level)
4249  CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
4250  selement(nextglobrow)%x, &
4251  one, selement(globrow)%x )
4252  END IF
4253 
4254  !----------------------------------------------------
4255  !Print the solution vector, if desired
4256  !----------------------------------------------------
4257  IF ( .false. ) THEN
4258  IF(kpdbg) WRITE(ofu,*) ' x[',globrow,']=',selement(globrow)%x; CALL fl(ofu)
4259  END IF
4260  END IF
4261  END DO
4262  END DO backwardsolveloop
4263 
4264  !-----------------------------------------------------------------------------
4265  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
4266  IF(kpdbg) WRITE(ofu,*) '------ Backward solve end ------'; CALL fl(ofu)
4267 
4268  CALL bsystemclock(globtimer2)
4269  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4270 
4271 END SUBROUTINE backwardsolve_bst
4272 
4273 !-------------------------------------------------------------------------------
4277 !-------------------------------------------------------------------------------
4278 SUBROUTINE verifysolution
4279  !-----------------------------------------------------------------------------
4280  ! Local variables
4281  !-----------------------------------------------------------------------------
4282  INTEGER :: i, k, globrow, globrowoff
4283  REAL(dp) :: term, totrmserr
4284  INTEGER :: nbrrank
4285  INTEGER :: nbrmpireqindx
4286  INTEGER :: nbrmpireqcnt
4287  INTEGER :: nbrmpinirecvs
4288  INTEGER :: nbrmpinisends
4289  INTEGER :: nbrmpireq(2)
4290  INTEGER :: nbrmpierr(2)
4291  INTEGER :: mpiwaiterr
4292  INTEGER :: mpierr
4293  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4294  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4295  INTEGER :: waitmatchindex
4296 
4297  !-----------------------------------------------------------------------------
4298  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4299  IF(kpdbg) WRITE(ofu,*) '------ Verifying solution ------'; CALL fl(ofu)
4300 
4301  !-----------------------------------------------------------------------------
4302  !Alocate memory, do Irecvs/Isends, for boundary odd/even rows resp.
4303  !-----------------------------------------------------------------------------
4304  nbrmpireqindx = 1
4305  nbrmpinirecvs = 0
4306  nbrmpinisends = 0
4307  nbrrank = gr2rank(startglobrow-1)
4308  IF ( isodd(startglobrow) .AND. (nbrrank .GE. 0) ) THEN
4309  IF ( .NOT. ALLOCATED( selement(startglobrow-1)%x ) ) THEN
4310  ALLOCATE( selement(startglobrow-1)%x(m) )
4311  END IF
4312  CALL mpi_irecv( selement(startglobrow-1)%x, m, mpi_real8, nbrrank, 1, &
4313  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4314  nbrmpireqindx = nbrmpireqindx + 1
4315  nbrmpinirecvs = nbrmpinirecvs + 1
4316  END IF
4317  nbrrank = gr2rank(endglobrow+1)
4318  IF ( isodd(endglobrow) .AND. (nbrrank .GE. 0) ) THEN
4319  IF ( .NOT. ALLOCATED( selement(endglobrow+1)%x ) ) THEN
4320  ALLOCATE( selement(endglobrow+1)%x(m) )
4321  END IF
4322  CALL mpi_irecv( selement(endglobrow+1)%x, m, mpi_real8, nbrrank, 2, &
4323  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4324  nbrmpireqindx = nbrmpireqindx + 1
4325  nbrmpinirecvs = nbrmpinirecvs + 1
4326  END IF
4327  nbrrank = gr2rank(startglobrow-1)
4328  IF ( iseven(startglobrow) .AND. (nbrrank .GE. 0) ) THEN
4329  CALL mpi_isend( selement(startglobrow)%x, m, mpi_real8, nbrrank, 2, &
4330  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4331  nbrmpireqindx = nbrmpireqindx + 1
4332  nbrmpinisends = nbrmpinisends + 1
4333  END IF
4334  nbrrank = gr2rank(endglobrow+1)
4335  IF ( iseven(endglobrow) .AND. (nbrrank .GE. 0) ) THEN
4336  CALL mpi_isend( selement(endglobrow)%x, m, mpi_real8, nbrrank, 1, &
4337  ns_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4338  nbrmpireqindx = nbrmpireqindx + 1
4339  nbrmpinisends = nbrmpinisends + 1
4340  END IF
4341  nbrmpireqcnt = nbrmpireqindx - 1
4342  IF ( nbrmpireqcnt .GT. 0 ) THEN
4343  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
4344  CALL bsystemclock(loctimer1)!SKS
4345  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4346  CALL bsystemclock(loctimer2)!SKS
4347  waitall_time=waitall_time+(loctimer2-loctimer1)!SKS
4348  END IF
4349 
4350  !-----------------------------------------------------------------------------
4351  totrmserr = 0
4352  DO globrow = startglobrow, endglobrow, 1
4353  globrowoff = globrow - startglobrow + 1
4354  DO i = 1, m, 1
4355  term = zero
4356  IF ( globrow .GT. 1 ) THEN
4357  term = term + sum( orig(globrowoff)%L(i,:) * selement(globrow-1)%x )
4358  END IF
4359 
4360  term = term + sum( orig(globrowoff)%D(i,:) * selement(globrow)%x )
4361 
4362  IF (writesolution) THEN
4363  IF( i .EQ. 1 ) THEN
4364  DO k = 1, m
4365  IF(kpdbg) WRITE(ofu,*) 'X[',(globrow-1)*m+k,']=', selement(globrow)%x(k);CALL fl(ofu)
4366  END DO
4367  END IF
4368  END IF
4369 
4370  IF ( globrow .LT. n ) THEN
4371  term = term + sum( orig(globrowoff)%U(i,:) * selement(globrow+1)%x )
4372  END IF
4373 
4374  totrmserr = totrmserr + (term - orig(globrowoff)%b(i,1))**2
4375  END DO
4376  END DO
4377 
4378  IF ( endglobrow .LT. startglobrow ) THEN
4379  totrmserr = 0
4380  ELSE
4381  totrmserr = sqrt( totrmserr / ((endglobrow-startglobrow+1) * m) )
4382  END IF
4383  IF(kpdbg) WRITE(ofu,'(A,E15.8E3)') 'TOTAL RMS ERROR = ', totrmserr; CALL fl(ofu)
4384  IF(kpdbg) WRITE(ofu,*) '------ Solution verified ------'; CALL fl(ofu)
4385 END SUBROUTINE verifysolution
4386 !-------------------------------------------------------------------------------
4387 
4388 #endif
4389 
4390 END MODULE blocktridiagonalsolver_bst
4391 !-------------------------------------------------------------------------------
blocktridiagonalsolver_bst::blacsprocessgrid
BLACS/PBLAS process grid information.
Definition: blocktridiagonalsolver_bst.f90:136
mpi_inc
Umbrella module avoid multiple inlcudes of the mpif.h header.
Definition: mpi_inc.f:11
blocktridiagonalsolver_bst::blacsparameters
BLACS/PBLAS information.
Definition: blocktridiagonalsolver_bst.f90:148
blocktridiagonalsolver_bst::mastertoslavemapping
Master-to-slave mapping.
Definition: blocktridiagonalsolver_bst.f90:164
blocktridiagonalsolver_bst::timecount
Statistics (timing, etc.)
Definition: blocktridiagonalsolver_bst.f90:205
blocktridiagonalsolver_bst::pblastemparray
Definition: blocktridiagonalsolver_bst.f90:220
blocktridiagonalsolver_bst::pblaslevelparameters
Level-specific PBLAS information.
Definition: blocktridiagonalsolver_bst.f90:175
blocktridiagonalsolver_bst::solutionelement
Solution of selected rows of interest to this rank.
Definition: blocktridiagonalsolver_bst.f90:44
blocktridiagonalsolver_bst::pblasstats
Definition: blocktridiagonalsolver_bst.f90:210
blocktridiagonalsolver_bst::levelelement
Data associated with each row at each level.
Definition: blocktridiagonalsolver_bst.f90:34