V3FIT
blocktridiagonalsolver_s.f90
1 !-------------------------------------------------------------------------------
5 
6 !-------------------------------------------------------------------------------
8 USE v3_utilities, ONLY: assert
9 #if defined(MPI_OPT)
10 USE mpi_inc
11 #endif
12 USE descriptor_mod, ONLY: siesta_comm
13 USE shared_data, ONLY: lequi1
14 IMPLICIT NONE
15 PRIVATE
16 !-------------------------------------------------------------------------------
17 ! Precision settings
18 !-------------------------------------------------------------------------------
19 INTEGER, PARAMETER :: rprec = selected_real_kind(12,100)
20 INTEGER, PARAMETER :: iprec = selected_int_kind(8)
21 INTEGER, PARAMETER :: cprec = kind((1.0_rprec,1.0_rprec))
22 INTEGER, PARAMETER :: dp = rprec
23 INTEGER :: MPI_ERR
24 
25 !-------------------------------------------------------------------------------
29 !-------------------------------------------------------------------------------
31  REAL(dp), ALLOCATABLE :: L(:,:), D(:,:), U(:,:), b(:,:)
32  INTEGER, ALLOCATABLE :: pivot(:)
33 END TYPE levelelement
34 !-------------------------------------------------------------------------------
38 !-------------------------------------------------------------------------------
40  REAL(dp), ALLOCATABLE :: x(:) !Vector of size M
41 END TYPE solutionelement
42 
43 !-------------------------------------------------------------------------------
50 !-------------------------------------------------------------------------------
51 TYPE (levelelement), ALLOCATABLE :: lelement(:,:)
52 
53 !-------------------------------------------------------------------------------
58 !-------------------------------------------------------------------------------
59 TYPE (levelelement), ALLOCATABLE :: orig(:)
60 
61 !-------------------------------------------------------------------------------
66 !-------------------------------------------------------------------------------
67 TYPE (solutionelement), ALLOCATABLE :: selement(:)
68 
69 !-------------------------------------------------------------------------------
70 INTEGER :: rank
71 INTEGER :: nranks
72 INTEGER :: p
73 INTEGER :: n
74 INTEGER :: m
75 INTEGER :: startglobrow
76 INTEGER :: endglobrow
77 INTEGER :: startrow1
78 INTEGER :: endrow1
79 INTEGER :: nlevels
80 LOGICAL :: matdirtied
81 LOGICAL :: rhsdirtied
82 
83 !-------------------------------------------------------------------------------
84 CHARACTER*100 :: kenvvar
85 CHARACTER*100 :: kenvval
86 LOGICAL :: lbcylic_init=.false.
87 LOGICAL :: lplb_init=.false.
88 LOGICAL :: kpdbg
89 INTEGER :: ofu
90 INTEGER :: pfu
91 LOGICAL :: writeproblemfile
92 LOGICAL :: writesolution
93 LOGICAL :: usebarriers
94 REAL :: membytes
95 REAL :: dpsz
96 REAL :: intsz
97 REAL :: ptrsz
98 REAL(dp) :: one
99 REAL(dp) :: zero
100 LOGICAL(dp) :: l_colscale=.false.
101 
102 !-------------------------------------------------------------------------------
103 LOGICAL :: use_mpiwtime=.false.
104 DOUBLE PRECISION :: loctimer1, loctimer2
105 DOUBLE PRECISION :: mattimer1, mattimer2
106 DOUBLE PRECISION :: globtimer1, globtimer2
107 DOUBLE PRECISION :: timerfreq
108 DOUBLE PRECISION :: tottime
109 INTEGER :: totcount
110 DOUBLE PRECISION :: totcommtime
111 INTEGER :: totcommcount
112 DOUBLE PRECISION :: totinvtime
113 INTEGER :: totinvcount
114 DOUBLE PRECISION :: totmatmultime
115 INTEGER :: totmatmulcount
116 DOUBLE PRECISION :: totmatsoltime
117 INTEGER :: totmatsolcount
118 
119 !-------------------------------------------------------------------------------
123 !-------------------------------------------------------------------------------
124 REAL(dp), ALLOCATABLE :: origdiagelement(:)
125 REAL(dp), ALLOCATABLE :: topscalefac(:), botscalefac(:)
126 !-------------------------------------------------------------------------------
127 
128 !-------------------------------------------------------------------------------
129 !-------------------------------------------------------------------------------
133 !-------------------------------------------------------------------------------
134 LOGICAL :: doblasonly
135 LOGICAL :: doblacscomm
136 
137 !-------------------------------------------------------------------------------
141 !-------------------------------------------------------------------------------
143  INTEGER :: myrow, mycol
144  INTEGER :: nrows, ncols
145  INTEGER :: blockszrows, blockszcols
146  INTEGER, ALLOCATABLE :: map(:,:)
147 END TYPE blacsprocessgrid
148 
149 !-------------------------------------------------------------------------------
153 !-------------------------------------------------------------------------------
155  INTEGER :: iam
156  INTEGER :: nprocs
157  INTEGER :: maincontext
158  INTEGER :: levelcontext
159  TYPE(BlacsProcessGrid) :: pgrid
160  INTEGER :: nbpp
161 END TYPE blacsparameters
162 
163 TYPE(blacsparameters) :: blacs
164 
165 !-------------------------------------------------------------------------------
169 !-------------------------------------------------------------------------------
171  INTEGER :: masterrank
172  INTEGER :: nslaves
173  INTEGER, ALLOCATABLE :: slaveranks(:)
174 END TYPE mastertoslavemapping
175 
176 !-------------------------------------------------------------------------------
180 !-------------------------------------------------------------------------------
182  LOGICAL :: ammaster
183  TYPE(MasterToSlaveMapping) :: msmap
184 
185  INTEGER :: mpicomm, mpitag, mpierr
186 #if defined(MPI_OPT)
187  INTEGER :: mpistatus(MPI_STATUS_SIZE)
188 #endif
189 
190 END TYPE pblaslevelparameters
191 
192 TYPE(PBLASLevelParameters) :: pblas
193 
194 !-------------------------------------------------------------------------------
198 !-------------------------------------------------------------------------------
199 INTEGER, PARAMETER :: op_none = 0
200 INTEGER, PARAMETER :: op_done = 1
201 INTEGER, PARAMETER :: op_dgemm = 2
202 INTEGER, PARAMETER :: op_dgemv = 3
203 INTEGER, PARAMETER :: op_dgetrf = 4
204 INTEGER, PARAMETER :: op_dgetrs = 5
205 
206 !-------------------------------------------------------------------------------
210 !-------------------------------------------------------------------------------
212  DOUBLE PRECISION :: tm
213  INTEGER :: cnt
214  DOUBLE PRECISION :: t1, t2
215 END TYPE timecount
217  TYPE(TimeCount) :: wait, comm, comp
218  TYPE(TimeCount) :: mm, trf, pmm, ptrf
219  TYPE(TimeCount) :: mma, mmb, mmc, mmalpha, mmbeta, mmrc
220  TYPE(TimeCount) :: extract, waitall
221 END TYPE pblasstats
222 
223 TYPE(PBLASStats) :: pstats
224 
225 !-------------------------------------------------------------------------------
227  DOUBLE PRECISION, ALLOCATABLE :: temparray(:)
228 END TYPE pblastemparray
229 
230 !-------------------------------------------------------------------------------
231 !PUBLIC METHODS
232 PUBLIC :: setmatrixrowcoll, setmatrixrowcold, setmatrixrowcolu, &
233  getmatrixrowcoll, getmatrixrowcold, getmatrixrowcolu, &
234  getmatrixrhs, storediagonal, initialize, forwardsolve, &
235  setmatrixrhs, backwardsolve, getsolutionvector, padsides, &
236  checksymmetry, applyparallelscaling, findminmax_tri, getranks,&
237  refactorhessian, parmatvec, finalize, checkconditionnumber
238 REAL(dp), PUBLIC :: maxeigen_tri
239 REAL(dp), PUBLIC :: dmin_tri
240 
241 CONTAINS
242 
243 !-------------------------------------------------------------------------------
247 !-------------------------------------------------------------------------------
248 SUBROUTINE bclockinit()
249  !-----------------------------------------------------------------------------
250  ! Local variables
251  !-----------------------------------------------------------------------------
252  INTEGER :: tempint
253 
254  IF ( use_mpiwtime ) THEN
255  timerfreq = 1.0
256  ELSE
257  CALL system_clock(count_rate=tempint)
258  timerfreq = tempint
259  END IF
260 END SUBROUTINE bclockinit
261 
262 !-------------------------------------------------------------------------------
266 !-------------------------------------------------------------------------------
267 SUBROUTINE bsystemclock( ts )
268  !-----------------------------------------------------------------------------
269  ! Formal arguments
270  !-----------------------------------------------------------------------------
271  DOUBLE PRECISION, INTENT(INOUT) :: ts
272 
273  !-----------------------------------------------------------------------------
274  ! Local variables
275  !-----------------------------------------------------------------------------
276  INTEGER :: tempint
277  DOUBLE PRECISION :: tempdbl
278 
279  IF ( use_mpiwtime ) THEN
280 #if defined(MPI_OPT)
281  tempdbl = mpi_wtime()
282  ts = tempdbl
283 #endif
284  ELSE
285  CALL system_clock( tempint )
286  ts = tempint
287  END IF
288 END SUBROUTINE bsystemclock
289 
290 !-------------------------------------------------------------------------------
294 !-------------------------------------------------------------------------------
295 SUBROUTINE fl( u )
296  !-----------------------------------------------------------------------------
297  ! Formal arguments
298  !-----------------------------------------------------------------------------
299  INTEGER, INTENT(IN) :: u
300 
301  CALL flush( u )
302 END SUBROUTINE fl
303 
304 !-------------------------------------------------------------------------------
308 !-------------------------------------------------------------------------------
309 SUBROUTINE chargememory( bytes )
310  !-----------------------------------------------------------------------------
311  ! Formal arguments
312  !-----------------------------------------------------------------------------
313  REAL, INTENT(IN) :: bytes
314 
315  !-----------------------------------------------------------------------------
316  membytes = membytes + bytes
317 END SUBROUTINE chargememory
318 
319 !-------------------------------------------------------------------------------
323 !-------------------------------------------------------------------------------
324 SUBROUTINE chargetime( tot, t2, t1, cnt )
325  !-----------------------------------------------------------------------------
326  ! Formal arguments
327  !-----------------------------------------------------------------------------
328  DOUBLE PRECISION, INTENT(INOUT) :: tot
329  DOUBLE PRECISION, INTENT(IN) :: t2
330  DOUBLE PRECISION, INTENT(IN) :: t1
331  INTEGER, INTENT(INOUT) :: cnt
332 
333  !-----------------------------------------------------------------------------
334  tot = tot + (real(t2-t1))/timerfreq
335  cnt = cnt + 1
336 END SUBROUTINE chargetime
337 
338 !-------------------------------------------------------------------------------
339 !-------------------------------------------------------------------------------
343 !-------------------------------------------------------------------------------
344 SUBROUTINE timecountinit( tc )
345  !----------------------------------------------
346  ! Formal arguments
347  !----------------------------------------------
348  TYPE(timecount), INTENT(INOUT) :: tc
349 
350  !----------------------------------------------
351  tc%tm = 0
352  tc%cnt = 0
353 END SUBROUTINE timecountinit
354 
355 !-------------------------------------------------------------------------------
359 !-------------------------------------------------------------------------------
360 SUBROUTINE timecountprint( tc, msg )
361  !----------------------------------------------
362  ! Formal arguments
363  !----------------------------------------------
364  TYPE(timecount), INTENT(INOUT) :: tc
365  CHARACTER(*), INTENT(IN) :: msg
366  !----------------------------------------------
367  ! Local Variables
368  !----------------------------------------------
369  DOUBLE PRECISION :: avg
370 
371  !----------------------------------------------
372  avg = 0
373  IF (tc%cnt .GT. 0) avg = tc%tm / tc%cnt
374  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') msg,tc%cnt,' * ',avg,' sec = ',tc%tm,' sec'
375 END SUBROUTINE timecountprint
376 
377 !-------------------------------------------------------------------------------
381 !-------------------------------------------------------------------------------
382 SUBROUTINE plbinitstats()
383  CALL timecountinit( pstats%wait )
384  CALL timecountinit( pstats%comm )
385  CALL timecountinit( pstats%comp )
386  CALL timecountinit( pstats%mm )
387  CALL timecountinit( pstats%trf )
388  CALL timecountinit( pstats%pmm )
389  CALL timecountinit( pstats%ptrf )
390 
391  CALL timecountinit( pstats%mma )
392  CALL timecountinit( pstats%mmb )
393  CALL timecountinit( pstats%mmc )
394  CALL timecountinit( pstats%mmalpha )
395  CALL timecountinit( pstats%mmbeta )
396  CALL timecountinit( pstats%mmrc )
397  CALL timecountinit( pstats%extract )
398  CALL timecountinit( pstats%waitall )
399 END SUBROUTINE plbinitstats
400 
401 !-------------------------------------------------------------------------------
405 !-------------------------------------------------------------------------------
406 SUBROUTINE plbprintstats()
407  CALL timecountprint( pstats%wait, 'PBLAS Wait ' )
408  CALL timecountprint( pstats%comm, 'PBLAS Comm ' )
409  CALL timecountprint( pstats%comp, 'PBLAS Comp ' )
410  CALL timecountprint( pstats%mm, 'PBLAS MM ' )
411  CALL timecountprint( pstats%trf, 'PBLAS TRF ' )
412  CALL timecountprint( pstats%pmm, 'PBLAS PMM ' )
413  CALL timecountprint( pstats%ptrf, 'PBLAS PTRF ' )
414 
415  CALL timecountprint( pstats%mma, 'PBLAS MMA ' )
416  CALL timecountprint( pstats%mmb, 'PBLAS MMB ' )
417  CALL timecountprint( pstats%mmc, 'PBLAS MMC ' )
418  CALL timecountprint( pstats%mmalpha, 'PBLAS MMalpha ' )
419  CALL timecountprint( pstats%mmbeta, 'PBLAS MMbeta ' )
420  CALL timecountprint( pstats%mmrc, 'PBLAS MMRC ' )
421  CALL timecountprint( pstats%extract, 'PBLAS Extract ' )
422  CALL timecountprint( pstats%waitall, 'PBLAS Waitall ' )
423 END SUBROUTINE plbprintstats
424 
425 !-------------------------------------------------------------------------------
429 !-------------------------------------------------------------------------------
430 SUBROUTINE plbinitialize
431  !----------------------------------------------
432  ! Local Variables
433  !----------------------------------------------
434  CHARACTER*100 :: envvar
435  CHARACTER*100 :: envval
436 
437  lplb_init=.true.
438  IF(kpdbg) WRITE(ofu,*) 'PLBInitialize Started'; CALL fl(ofu)
439 
440  !----------------------------------------------
441  doblasonly = .true.
442  IF ( m .GE. 2048 ) THEN
443  doblasonly = .true.
444  envvar = 'BLOCKTRI_BLASONLY'
445  CALL getenv( envvar, envval )
446  IF ( envval .EQ. 'TRUE' ) THEN
447  doblasonly = .true.
448  IF(kpdbg) WRITE(ofu,*) 'BLAS ONLY -- obeying env var ', envvar; CALL fl(ofu)
449  END IF
450  END IF
451  IF(kpdbg) WRITE(ofu,*) 'doblasonly = ', doblasonly; CALL fl(ofu)
452 
453  !----------------------------------------------
454  doblacscomm = .false.
455  envvar = 'BLOCKTRI_BLACSCOMM'
456  CALL getenv( envvar, envval )
457  IF ( envval .EQ. 'TRUE' ) THEN
458  doblacscomm = .true.
459  IF(kpdbg) WRITE(ofu,*) 'BLACS COMM -- obeying env var ', envvar; CALL fl(ofu)
460  END IF
461  IF(kpdbg) WRITE(ofu,*) 'doblacscomm = ', doblacscomm; CALL fl(ofu)
462 
463  !----------------------------------------------
464  blacs%nbpp = 1
465  envvar = 'BLOCKTRI_NBPP'
466  CALL getenv( envvar, envval )
467  IF ( envval .NE. '' ) THEN
468  READ( envval, *) blacs%nbpp
469  IF(kpdbg) WRITE(ofu,*) 'NBPP -- obeying env var ', envvar; CALL fl(ofu)
470  END IF
471  IF(kpdbg) WRITE(ofu,*) 'NBPP = ', blacs%nbpp; CALL fl(ofu)
472 
473  !----------------------------------------------
474  CALL plbinitstats()
475 
476  !----------------------------------------------
477  IF (doblasonly) THEN
478  IF(kpdbg) WRITE(ofu,*) 'BLAS only (not using PBLAS)'; CALL fl(ofu)
479  ELSE
480 #if defined(MPI_OPT)
481  CALL blacs_pinfo( blacs%iam, blacs%nprocs )
482  IF(kpdbg) WRITE(ofu,*) 'BLACS_PINFO ', blacs%iam, ' ', blacs%nprocs; CALL fl(ofu)
483  CALL blacs_get( 0, 0, blacs%maincontext )
484  IF(kpdbg) WRITE(ofu,*) 'BLACS_GET ', blacs%maincontext; CALL fl(ofu)
485  CALL blacs_gridinit( blacs%maincontext, 'R', 1, blacs%nprocs )
486  IF(kpdbg) WRITE(ofu,*) 'BLACS_GRIDINIT'; CALL fl(ofu)
487  CALL blacs_barrier( blacs%maincontext, 'All' )
488 #endif
489  END IF
490 
491  IF(kpdbg) WRITE(ofu,*) 'PLBInitialize Done'; CALL fl(ofu)
492 END SUBROUTINE plbinitialize
493 
494 !-------------------------------------------------------------------------------
498 !-------------------------------------------------------------------------------
499 SUBROUTINE plbfinalize
500 
501  IF(.NOT.lplb_init) RETURN
502 
503  IF(kpdbg) WRITE(ofu,*) 'PLBFinalize Started'; CALL fl(ofu)
504  IF (doblasonly) THEN
505  IF(kpdbg) WRITE(ofu,*) 'BLAS only (not using PBLAS)'; CALL fl(ofu)
506  ELSE
507 #if defined(MPI_OPT)
508  CALL blacs_barrier( blacs%maincontext, 'All' )
509  IF(kpdbg) WRITE(ofu,*) 'BLACS_BARRIER'; CALL fl(ofu)
510  CALL blacs_exit(1)
511  IF(kpdbg) WRITE(ofu,*) 'BLACS_EXIT nprocs= ', blacs%nprocs; CALL fl(ofu)
512  CALL plbprintstats()
513 #endif
514  END IF
515  IF(kpdbg) WRITE(ofu,*) 'PLBFinalize Done'; CALL fl(ofu)
516 END SUBROUTINE plbfinalize
517 
518 !-------------------------------------------------------------------------------
522 !-------------------------------------------------------------------------------
523 SUBROUTINE plbforwardinitialize
524  !-----------------------------------------------------------------------------
525  ! Nothing to do
526 END SUBROUTINE plbforwardinitialize
527 
528 !-------------------------------------------------------------------------------
532 !-------------------------------------------------------------------------------
533 SUBROUTINE plbforwardfinalize
534  !-----------------------------------------------------------------------------
535  ! Nothing to do
536 END SUBROUTINE plbforwardfinalize
537 
538 !-------------------------------------------------------------------------------
542 !-------------------------------------------------------------------------------
543 SUBROUTINE plbbackwardinitialize
544  !-----------------------------------------------------------------------------
545  ! Nothing to do
546 END SUBROUTINE plbbackwardinitialize
547 
548 !-------------------------------------------------------------------------------
552 !-------------------------------------------------------------------------------
553 SUBROUTINE plbbackwardfinalize
554  !-----------------------------------------------------------------------------
555  ! Nothing to do
556 END SUBROUTINE plbbackwardfinalize
557 
558 !-------------------------------------------------------------------------------
562 !-------------------------------------------------------------------------------
563 SUBROUTINE plbforwardinitializelevel( lvl, ammaster )
564  !----------------------------------------------
565  ! Formal arguments
566  !----------------------------------------------
567  INTEGER :: lvl
568  LOGICAL :: ammaster
569  !----------------------------------------------
570  ! Local Variables
571  !----------------------------------------------
572  INTEGER :: I, J, K
573  INTEGER :: maxslaves, actualslaves
574 
575  !----------------------------------------------
576  !Short-circuit PBLAS to BLAS
577  IF ( doblasonly ) THEN
578  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel BLAS only'; CALL fl(ofu)
579  RETURN
580  END IF
581 
582  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Started', ammaster; CALL fl(ofu)
583 
584 #if defined(MPI_OPT)
585  !----------------------------------------------
586  pblas%ammaster = ammaster
587  pblas%msmap%masterrank = -1
588  pblas%msmap%nslaves = 0
589  CALL determinemasterslaveranks()
590 
591  !----------------------------------------------
592  !Set up BLACS process grid and a new context
593  blacs%levelcontext = -1
594  CALL blacs_get( blacs%maincontext, 10, blacs%levelcontext )
595  IF(kpdbg) WRITE(ofu,*) 'Created new context'; CALL fl(ofu)
596  IF(kpdbg) WRITE(ofu,*) 'Nslaves ',pblas%msmap%nslaves; CALL fl(ofu)
597 
598  blacs%pgrid%blockszrows = 64
599  IF ( blacs%pgrid%blockszrows .GT. m ) blacs%pgrid%blockszrows = m
600  blacs%pgrid%blockszcols = blacs%pgrid%blockszrows
601  IF(kpdbg) WRITE(ofu,*) 'Block NR=',blacs%pgrid%blockszrows; CALL fl(ofu)
602  IF(kpdbg) WRITE(ofu,*) 'Block NC=',blacs%pgrid%blockszcols; CALL fl(ofu)
603 
604  maxslaves = (m*m) / (blacs%nbpp*blacs%nbpp* &
605  blacs%pgrid%blockszrows*blacs%pgrid%blockszcols)
606  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
607  IF ( maxslaves .LT. 1 ) maxslaves = 1
608  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
609  IF ( maxslaves .GT. pblas%msmap%nslaves ) maxslaves = pblas%msmap%nslaves
610  IF(kpdbg) WRITE(ofu,*) 'Max slaves ', maxslaves; CALL fl(ofu)
611  actualslaves = maxslaves
612  IF(kpdbg) WRITE(ofu,*) ' Actual slaves ', actualslaves; CALL fl(ofu)
613 
614  blacs%pgrid%nrows = int( sqrt( real( actualslaves ) ) )
615  IF ( blacs%pgrid%nrows .LT. 1 ) blacs%pgrid%nrows = 1
616  blacs%pgrid%ncols = int( actualslaves / blacs%pgrid%nrows )
617  IF(kpdbg) WRITE(ofu,*) 'NR=',blacs%pgrid%nrows,' NC=',blacs%pgrid%ncols; CALL fl(ofu)
618  ALLOCATE( blacs%pgrid%map( 1 : blacs%pgrid%nrows, 1 : blacs%pgrid%ncols ) )
619  k = 0
620  DO i = 1, blacs%pgrid%nrows
621  DO j = 1, blacs%pgrid%ncols
622  blacs%pgrid%map(i,j) = pblas%msmap%slaveranks(k+1)
623  k = k + 1
624  END DO
625  END DO
626  IF(kpdbg) WRITE(ofu,*) 'NR*NC=',k; CALL fl(ofu)
627  CALL blacs_gridmap( blacs%levelcontext, blacs%pgrid%map, &
628  blacs%pgrid%nrows, blacs%pgrid%nrows, blacs%pgrid%ncols )
629  IF(kpdbg) WRITE(ofu,*) 'GridMap done'; CALL fl(ofu)
630  CALL blacs_gridinfo( blacs%levelcontext, &
631  blacs%pgrid%nrows, blacs%pgrid%ncols, &
632  blacs%pgrid%myrow, blacs%pgrid%mycol )
633  IF(kpdbg) WRITE(ofu,*) 'GridInfo done'; CALL fl(ofu)
634  IF(kpdbg) WRITE(ofu,*) 'Myrowcol ',blacs%pgrid%myrow,' ',blacs%pgrid%mycol; CALL fl(ofu)
635 
636  !----------------------------------------------
637  pblas%mpicomm = siesta_comm
638  pblas%mpitag = 1234
639 
640  !----------------------------------------------
641  CALL blacs_barrier( blacs%maincontext, 'All' )
642  IF(kpdbg) WRITE(ofu,*) 'BLACS_BARRIER'; CALL fl(ofu)
643 
644  !----------------------------------------------
645  IF ( ammaster ) THEN
646  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Master'; CALL fl(ofu)
647  ELSE
648  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Slave'; CALL fl(ofu)
649  CALL slaveservice()
650  END IF
651 
652  IF(kpdbg) WRITE(ofu,*) 'PLBForwardInitializeLevel Done', ammaster; CALL fl(ofu)
653 #endif
654 
655 END SUBROUTINE plbforwardinitializelevel
656 
657 !-------------------------------------------------------------------------------
661 !-------------------------------------------------------------------------------
662 #if defined(MPI_OPT)
663 SUBROUTINE plbforwardfinalizelevel( lvl, ammaster )
664  !----------------------------------------------
665  ! Formal arguments
666  !----------------------------------------------
667  INTEGER :: lvl
668  LOGICAL :: ammaster
669 
670  !----------------------------------------------
671  !Short-circuit PBLAS to BLAS
672  IF ( doblasonly ) THEN
673  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel BLAS only'; CALL fl(ofu)
674  RETURN
675  END IF
676  !----------------------------------------------
677  !If I'm master, tell slaves that they're done
678  IF ( ammaster .AND. (pblas%msmap%nslaves .GT. 1) ) THEN
679  CALL masterbcastnextop( op_done )
680  END IF
681 
682  IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
683  (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) ) THEN
684  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel pariah !level-barrier';CALL fl(ofu)
685  ELSE
686  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel level-barrier'; CALL fl(ofu)
687  CALL blacs_barrier( blacs%levelcontext, 'All' )
688  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel level grid exit'; CALL fl(ofu)
689  CALL blacs_gridexit( blacs%levelcontext )
690  END IF
691 
692  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel main-barrier'; CALL fl(ofu)
693  CALL blacs_barrier( blacs%maincontext, 'All' )
694 
695  !----------------------------------------------
696  pblas%msmap%masterrank = -1
697  pblas%msmap%nslaves = 0
698  DEALLOCATE( pblas%msmap%slaveranks )
699  DEALLOCATE( blacs%pgrid%map )
700 
701  !----------------------------------------------
702  CALL plbprintstats()
703 
704  IF(kpdbg) WRITE(ofu,*) 'PLBForwardFinalizeLevel ', ammaster; CALL fl(ofu)
705 
706 END SUBROUTINE plbforwardfinalizelevel
707 #endif
708 
709 !-------------------------------------------------------------------------------
713 !-------------------------------------------------------------------------------
714 SUBROUTINE plbbackwardinitializelevel( lvl, ammaster )
715  !----------------------------------------------
716  ! Formal arguments
717  !----------------------------------------------
718  INTEGER :: lvl
719  LOGICAL :: ammaster
720 
721  !----------------------------------------------
722  ! Nothing to do
723 END SUBROUTINE plbbackwardinitializelevel
724 
725 !-------------------------------------------------------------------------------
729 !-------------------------------------------------------------------------------
730 SUBROUTINE plbbackwardfinalizelevel( lvl, ammaster )
731  !----------------------------------------------
732  ! Formal arguments
733  !----------------------------------------------
734  INTEGER :: lvl
735  LOGICAL :: ammaster
736 
737  !----------------------------------------------
738  !Nothing to do
739 END SUBROUTINE plbbackwardfinalizelevel
740 
741 !-------------------------------------------------------------------------------
745 !-------------------------------------------------------------------------------
746 #if defined(MPI_OPT)
747 SUBROUTINE determinemasterslaveranks()
748  !----------------------------------------------
749  ! Local Variables
750  !----------------------------------------------
751  LOGICAL, ALLOCATABLE :: recv_ammaster(:), assigned(:)
752  TYPE(mastertoslavemapping), ALLOCATABLE :: allmsmaps(:)
753  INTEGER :: totmasters, nslavespermaster, adjustednslavespermaster, tempmaster
754  INTEGER :: mpi_err, I, J, masterindex
755 
756  IF(kpdbg) WRITE(ofu,*) 'DetermineMasterSlaveRanks started'; CALL fl(ofu)
757 
758  !----------------------------------------------
759  !Initialize temporary data structures used for mapping
760  ALLOCATE( recv_ammaster(1:nranks) )
761  ALLOCATE( assigned(1:nranks) )
762  DO i = 1, nranks
763  assigned( i ) = .false.
764  END DO
765  ALLOCATE( allmsmaps(1:nranks) )
766  DO i = 1, nranks
767  allmsmaps(i)%masterrank = -1
768  allmsmaps(i)%nslaves = 0
769  END DO
770 
771  !----------------------------------------------
772  CALL mpi_allgather(pblas%ammaster, 1, mpi_logical, &
773  recv_ammaster, 1, mpi_logical, &
774  siesta_comm, mpi_err)
775 
776  !----------------------------------------------
777  !Figure out total number of masters
778  totmasters = 0
779  DO i = 1, nranks
780  IF(kpdbg) WRITE(ofu,*) ' recv_ammaster ',i,' ',recv_ammaster(i); CALL fl(ofu)
781  IF ( recv_ammaster(i) ) THEN
782  totmasters = totmasters + 1
783  END IF
784  END DO
785 
786  IF ( totmasters .LE. 0 ) THEN
787  IF(kpdbg) WRITE(ofu,*) 'Total masters must be greater than 0'; CALL fl(ofu)
788  CALL assert(.false.,'blocktri #1')
789  END IF
790 
791  nslavespermaster = (nranks / totmasters)
792  IF(kpdbg) WRITE(ofu,*) 'Avg nslavespermaster',nslavespermaster; CALL fl(ofu)
793 
794  !----------------------------------------------
795  !Assign slaves to each master
796  tempmaster = 0
797  masterloop: DO i = 1, nranks
798  IF ( recv_ammaster(i) ) THEN
799 
800  tempmaster = tempmaster + 1
801 
802  !-------------------------------------------------------------
803  !Give one more slave to earlier ranks, if not evenly divisible
804  adjustednslavespermaster = nslavespermaster
805  IF ( tempmaster .LE. mod( nranks, totmasters ) ) THEN
806  adjustednslavespermaster = adjustednslavespermaster + 1
807  END IF
808 
809  IF(kpdbg) WRITE(ofu,*) 'Adjusted nslavespm',adjustednslavespermaster; CALL fl(ofu)
810  allmsmaps(i)%masterrank = i-1
811  ALLOCATE( allmsmaps(i)%slaveranks(1:adjustednslavespermaster) )
812 
813  !-------------------------------------------------------------
814  !Add the master as first in its own slave rank set
815  assigned(i) = .true.
816  allmsmaps(i)%nslaves = 1
817  allmsmaps(i)%slaveranks(1) = i-1
818 
819  !-------------------------------------------------------------
820  !Assign the next block of unassigned slaves
821  slaveloop: DO j = 1, nranks
822  IF ( allmsmaps(i)%nslaves .GE. adjustednslavespermaster ) THEN
823  EXIT slaveloop
824  END IF
825  IF ( (.NOT. assigned(j)) .AND. (.NOT. recv_ammaster(j)) ) THEN
826  assigned(j) = .true.
827  allmsmaps(j)%masterrank = i-1
828  allmsmaps(i)%nslaves = allmsmaps(i)%nslaves + 1
829  allmsmaps(i)%slaveranks(allmsmaps(i)%nslaves) = j-1
830  END IF
831  END DO slaveloop
832  END IF
833  END DO masterloop
834  IF(kpdbg) WRITE(ofu,*) 'Computed all mappings'; CALL fl(ofu)
835 
836  !----------------------------------------------
837  !Copy the mapping relevant to us (if I'm master, my own, else, my master's)
838  masterindex = rank+1
839  IF ( allmsmaps(masterindex)%masterrank .NE. rank ) THEN
840  masterindex = allmsmaps(masterindex)%masterrank+1
841  END IF
842  pblas%msmap%masterrank = allmsmaps(masterindex)%masterrank
843  pblas%msmap%nslaves = allmsmaps(masterindex)%nslaves
844  ALLOCATE(pblas%msmap%slaveranks(1:allmsmaps(masterindex)%nslaves))
845  pblas%msmap%slaveranks(:) = allmsmaps(masterindex)%slaveranks(:)
846 
847  IF(kpdbg) WRITE(ofu,*) 'Extracted my mappings'; CALL fl(ofu)
848  IF(kpdbg) WRITE(ofu,*) 'My master rank',masterindex-1; CALL fl(ofu)
849  IF(kpdbg) WRITE(ofu,*) 'Nslaves in my set',allmsmaps(masterindex)%nslaves; CALL fl(ofu)
850  DO j = 1, allmsmaps(masterindex)%nslaves
851  IF(kpdbg) WRITE(ofu,*) 'Slave',j,' ',allmsmaps(masterindex)%slaveranks(j);CALL fl(ofu)
852  END DO
853 
854  !----------------------------------------------
855  !Deallocations of temporary space
856  DO i = 1, nranks
857  IF ( recv_ammaster(i) ) THEN
858  DEALLOCATE( allmsmaps(i)%slaveranks )
859  END IF
860  END DO
861  DEALLOCATE( assigned )
862  DEALLOCATE( allmsmaps )
863  DEALLOCATE( recv_ammaster )
864 
865  IF(kpdbg) WRITE(ofu,*) 'DetermineMasterSlaveRanks done'; CALL fl(ofu)
866 END SUBROUTINE determinemasterslaveranks
867 #endif
868 
869 !-------------------------------------------------------------------------------
873 !-------------------------------------------------------------------------------
874 SUBROUTINE masterbcastnextop( nextop )
875  !----------------------------------------------
876  ! Formal arguments
877  !----------------------------------------------
878  INTEGER, INTENT(IN) :: nextop
879  !----------------------------------------------
880  ! Local variables
881  !----------------------------------------------
882  INTEGER :: K, destrank
883  REAL(dp) :: realnextop
884 
885  realnextop = real(nextop)
886  IF(kpdbg) WRITE(ofu,*) 'MasterBcastNextOp started ', nextop; CALL fl(ofu)
887  CALL masterbcastvalue( realnextop )
888  IF(kpdbg) WRITE(ofu,*) 'MasterBcastNextOp done ', nextop; CALL fl(ofu)
889 END SUBROUTINE masterbcastnextop
890 
891 !-------------------------------------------------------------------------------
895 !-------------------------------------------------------------------------------
896 SUBROUTINE slavegetnextop( nextop )
897  !----------------------------------------------
898  ! Formal arguments
899  !----------------------------------------------
900  REAL(dp) :: realnextop
901  INTEGER, INTENT(INOUT) :: nextop
902 
903  IF(kpdbg) WRITE(ofu,*) 'SlaveGetNextOp started'; CALL fl(ofu)
904  CALL slavereceivevalue( realnextop )
905  nextop = int( realnextop )
906  IF(kpdbg) WRITE(ofu,*) 'SlaveGetNextOp done ', nextop; CALL fl(ofu)
907 END SUBROUTINE slavegetnextop
908 
909 !-------------------------------------------------------------------------------
913 !-------------------------------------------------------------------------------
914 SUBROUTINE slaveservice()
915  !----------------------------------------------
916  ! Local variables
917  !----------------------------------------------
918  INTEGER :: nextop
919 
920  !----------------------------------------------
921  IF ( (blacs%pgrid%myrow .LT. 0) .OR. &
922  (blacs%pgrid%myrow .GE. blacs%pgrid%nrows) ) THEN
923  IF(kpdbg) WRITE(ofu,*) 'SlaveService pariah falling out '; CALL fl(ofu)
924  ELSE
925  IF(kpdbg) WRITE(ofu,*) 'SlaveService started '; CALL fl(ofu)
926  oploop: DO WHILE (.true.)
927  nextop = op_none
928  CALL slavegetnextop( nextop )
929  IF ( nextop .EQ. op_done ) THEN
930  EXIT oploop
931  ELSE IF ( nextop .EQ. op_dgemm ) THEN
932  CALL slavedgemm()
933  ELSE IF ( nextop .EQ. op_dgetrf ) THEN
934  CALL slavedgetrf()
935  ELSE IF ( nextop .EQ. op_dgetrs ) THEN
936  CALL slavedgetrs()
937  ELSE
938  IF(kpdbg) WRITE(ofu,*) 'Bad Next Op', nextop; CALL fl(ofu)
939  CALL assert(.false.,'blocktri #2')
940  END IF
941  END DO oploop
942  IF(kpdbg) WRITE(ofu,*) 'SlaveService done '; CALL fl(ofu)
943  END IF
944 END SUBROUTINE slaveservice
945 
946 !-------------------------------------------------------------------------------
950 !-------------------------------------------------------------------------------
951 SUBROUTINE extractsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
952  A, nrows, ncols, subA, subnrows, subncols )
953  !----------------------------------------------
954  ! Formal arguments
955  !----------------------------------------------
956  INTEGER, INTENT(IN) :: bszr, bszc
957  INTEGER, INTENT(IN) :: pnr, pnc
958  INTEGER, INTENT(IN) :: pi, pj
959  REAL(dp), INTENT(IN) :: A(:,:)
960  INTEGER, INTENT(IN) :: nrows, ncols
961  REAL(dp), INTENT(OUT) :: subA(:)
962  INTEGER, INTENT(IN) :: subnrows, subncols
963  !----------------------------------------------
964  ! Local variables
965  !----------------------------------------------
966  INTEGER :: I, J, K, Q, R
967  INTEGER :: thisblkr, thisblkc
968 
969  IF(kpdbg) WRITE(ofu,*) 'ExtractSubMatrix NR=', subnrows, ' NC=', subncols; CALL fl(ofu)
970 
971  CALL bsystemclock(pstats%extract%t1)
972 
973  k = 0
974  DO j = 1, ncols, bszc
975  thisblkc = (j-1) / bszc
976  IF ( mod(thisblkc, pnc) .EQ. pj-1 ) THEN
977  DO r = 1, bszc
978  IF ( j+r-1 .LE. ncols ) THEN
979  DO i = 1, nrows, bszr
980  thisblkr = (i-1) / bszr
981  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
982  DO q = 1, bszr
983  IF ( i+q-1 .LE. nrows ) THEN
984  k = k + 1
985  suba(k) = a(i+q-1,j+r-1)
986  END IF
987  END DO
988  END IF
989  END DO
990  END IF
991  END DO
992  END IF
993  END DO
994 
995  !----------------------------------------------
996  IF ( k .NE. subnrows*subncols ) THEN
997  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
998  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows, ' subnc=', subncols; CALL fl(ofu)
999  CALL assert(.false.,'blocktri #3')
1000  END IF
1001 
1002  CALL bsystemclock(pstats%extract%t2)
1003  CALL chargetime( pstats%extract%tm, pstats%extract%t2, pstats%extract%t1, pstats%extract%cnt )
1004 
1005  IF(kpdbg) WRITE(ofu,*) 'ExtractSubMatrix done K', k; CALL fl(ofu)
1006 
1007 END SUBROUTINE extractsubmatrix
1008 
1009 !-------------------------------------------------------------------------------
1015 !-------------------------------------------------------------------------------
1016 SUBROUTINE mastersendmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1017  !----------------------------------------------
1018  ! Formal arguments
1019  !----------------------------------------------
1020  REAL(dp), INTENT(IN) :: A(:,:)
1021  INTEGER, INTENT(IN) :: nrows, ncols
1022  REAL(dp), INTENT(OUT) :: ssubA(:)
1023  INTEGER, INTENT(IN) :: ssubnrows, ssubncols
1024  !----------------------------------------------
1025  ! Local variables
1026  !----------------------------------------------
1027  INTEGER :: I, J, K
1028  INTEGER :: aslaverank
1029  TYPE(pblastemparray), ALLOCATABLE :: subA(:,:)
1030  INTEGER :: subnrows, subncols
1031  INTEGER :: maxsubnrows, maxsubncols
1032  INTEGER :: bszr, bszc
1033  INTEGER :: pnr, pnc
1034 #if defined(MPI_OPT)
1035  INTEGER, EXTERNAL :: NUMROC
1036  INTEGER :: mpistatus(MPI_STATUS_SIZE)
1037 #endif
1038 
1039  !----------------------------------------------
1040  INTEGER :: mpinisends
1041  INTEGER, ALLOCATABLE :: mpireqs(:)
1042  INTEGER :: mpierr
1043  INTEGER, ALLOCATABLE :: mpistatuses(:,:)
1044  INTEGER :: waitmatchindex
1045 
1046  !----------------------------------------------
1047  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix started'; CALL fl(ofu)
1048  CALL bsystemclock(pstats%comm%t1)
1049 
1050 #if defined(MPI_OPT)
1051  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1052  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1053 
1054  !----------------------------------------------
1055  mpinisends = 0
1056  ALLOCATE( mpireqs( pnr * pnc ) )
1057  ALLOCATE( mpistatuses( pnr * pnc, mpi_status_size ) )
1058 
1059  !----------------------------------------------
1060  maxsubnrows = numroc( nrows, bszr, 0, 0, pnr )
1061  maxsubncols = numroc( ncols, bszc, 0, 0, pnc )
1062  ALLOCATE( suba( 1 : pnr, 1 : pnc ) )
1063 
1064  DO i = 1, pnr
1065  DO j = 1, pnc
1066  aslaverank = blacs%pgrid%map(i,j)
1067 
1068  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1069  subncols = numroc( ncols, bszc, j-1, 0, pnc )
1070 
1071  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix to ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1072 
1073  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1074  !Self (local)
1075  IF ( aslaverank .NE. rank ) THEN
1076  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1077  CALL assert(.false.,'blocktri #4')
1078  END IF
1079  IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) ) THEN
1080  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1081  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows, ' SSNC ', ssubncols; CALL fl(ofu)
1082  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows, ' SNC ', subncols; CALL fl(ofu)
1083  CALL assert(.false.,'blocktri #5')
1084  END IF
1085 
1086  !Return a copy of this submatrix (will be like sent+received to/by self)
1087  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracting self submatrix'; CALL fl(ofu)
1088  CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1089  i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1090  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix kept self submatrix'; CALL fl(ofu)
1091  ELSE
1092  !A remote slave; send its corresponding matrix fragment
1093  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracting submatrix',i,j; CALL fl(ofu)
1094  ALLOCATE( suba(i,j)%temparray( subnrows*subncols ) )
1095  CALL extractsubmatrix(bszr, bszc, pnr, pnc, &
1096  i, j, a, nrows, ncols, suba(i,j)%temparray, &
1097  subnrows,subncols)
1098  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix extracted submatrix'; CALL fl(ofu)
1099  IF ( doblacscomm ) THEN
1100  CALL dgesd2d( blacs%levelcontext, subnrows, subncols, &
1101  suba(i,j)%temparray, subnrows, i-1, j-1 )
1102  ELSE
1103  mpinisends = mpinisends + 1
1104  CALL mpi_isend( suba(i,j)%temparray, subnrows*subncols, mpi_real8, &
1105  aslaverank, pblas%mpitag, pblas%mpicomm, mpireqs(mpinisends), mpierr )
1106  END IF
1107  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix sent slave submatrix'; CALL fl(ofu)
1108  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix deallocated temp submatrix'; CALL fl(ofu)
1109  END IF
1110  END DO
1111  END DO
1112 
1113  CALL bsystemclock(pstats%waitall%t1)
1114  CALL mpi_waitall( mpinisends, mpireqs, mpistatuses, mpierr )
1115  CALL bsystemclock(pstats%waitall%t2)
1116  CALL chargetime( pstats%waitall%tm, pstats%waitall%t2, pstats%waitall%t1, pstats%waitall%cnt )
1117 
1118  DO i = 1, pnr
1119  DO j = 1, pnc
1120  IF ( .NOT. (i .EQ. 1 .AND. j .EQ. 1) ) THEN
1121  DEALLOCATE( suba(i,j)%temparray )
1122  END IF
1123  END DO
1124  END DO
1125  DEALLOCATE( suba )
1126  DEALLOCATE( mpireqs )
1127  DEALLOCATE( mpistatuses )
1128 
1129  CALL bsystemclock(pstats%comm%t2)
1130  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1131 #endif
1132  IF(kpdbg) WRITE(ofu,*) 'MasterSendMatrix done'; CALL fl(ofu)
1133 
1134 END SUBROUTINE mastersendmatrix
1135 
1136 !-------------------------------------------------------------------------------
1140 !-------------------------------------------------------------------------------
1141 SUBROUTINE slavereceivematrix( nrows, ncols, subA, subnrows, subncols )
1142  !----------------------------------------------
1143  ! Formal arguments
1144  !----------------------------------------------
1145 #if defined(MPI_OPT)
1146  INTEGER :: mpistatus(MPI_STATUS_SIZE)
1147 #endif
1148  INTEGER, INTENT(IN) :: nrows, ncols
1149  REAL(dp), INTENT(OUT) :: subA(:)
1150  INTEGER, INTENT(IN) :: subnrows, subncols
1151  !----------------------------------------------
1152  ! Local variables
1153  !----------------------------------------------
1154  INTEGER :: masterrank
1155  INTEGER :: mpierr
1156 
1157  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveMatrix started ',subnrows,' ',subncols; CALL fl(ofu)
1158 
1159 #if defined(MPI_OPT)
1160  masterrank = blacs%pgrid%map(1,1)
1161 
1162  CALL bsystemclock(pstats%comm%t1)
1163 
1164  IF ( doblacscomm ) THEN
1165  CALL dgerv2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1166  ELSE
1167  CALL mpi_recv( suba, subnrows*subncols, mpi_real8, &
1168  masterrank, pblas%mpitag, pblas%mpicomm, mpistatus, mpierr )
1169  END IF
1170 
1171  CALL bsystemclock(pstats%comm%t2)
1172  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1173 #endif
1174 
1175  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveMatrix done'; CALL fl(ofu)
1176 END SUBROUTINE slavereceivematrix
1177 
1178 !-------------------------------------------------------------------------------
1182 !-------------------------------------------------------------------------------
1183 SUBROUTINE masterbcastvalue( val )
1184  !----------------------------------------------
1185  ! Formal arguments
1186  !----------------------------------------------
1187  REAL(dp), INTENT(IN) :: val
1188 
1189 #if defined(MPI_OPT)
1190  CALL dgebs2d( blacs%levelcontext, 'All', ' ', 1, 1, val, 1 );
1191 #endif
1192  IF(kpdbg) WRITE(ofu,*) 'MasterBcastValue bcast to slaves'; CALL fl(ofu)
1193 
1194 END SUBROUTINE masterbcastvalue
1195 
1196 !-------------------------------------------------------------------------------
1200 !-------------------------------------------------------------------------------
1201 SUBROUTINE slavereceivevalue( val )
1202  !----------------------------------------------
1203  ! Formal arguments
1204  !----------------------------------------------
1205  REAL(dp), INTENT(OUT) :: val
1206 
1207 #if defined(MPI_OPT)
1208  CALL dgebr2d( blacs%levelcontext, 'All', ' ', 1, 1, val, 1, 0, 0 )
1209 #endif
1210  IF(kpdbg) WRITE(ofu,*) 'SlaveReceiveValue bcast from master'
1211  CALL fl(ofu)
1212 
1213 END SUBROUTINE slavereceivevalue
1214 
1215 !-------------------------------------------------------------------------------
1219 !-------------------------------------------------------------------------------
1220 SUBROUTINE injectsubmatrix( bszr, bszc, pnr, pnc, pi, pj, &
1221  A, nrows, ncols, subA, subnrows, subncols )
1222  !----------------------------------------------
1223  ! Formal arguments
1224  !----------------------------------------------
1225  INTEGER, INTENT(IN) :: bszr, bszc
1226  INTEGER, INTENT(IN) :: pnr, pnc
1227  INTEGER, INTENT(IN) :: pi, pj
1228  REAL(dp), INTENT(OUT) :: A(:,:)
1229  INTEGER, INTENT(IN) :: nrows, ncols
1230  REAL(dp), INTENT(IN) :: subA(:)
1231  INTEGER, INTENT(IN) :: subnrows, subncols
1232  !----------------------------------------------
1233  ! Local variables
1234  !----------------------------------------------
1235  INTEGER :: I, J, K, Q, R
1236  INTEGER :: thisblkr, thisblkc
1237 
1238  IF(kpdbg) WRITE(ofu,*) 'InjectSubMatrix NR=', subnrows, ' NC=', subncols; CALL fl(ofu)
1239 
1240  k = 0
1241  DO j = 1, ncols, bszc
1242  thisblkc = (j-1) / bszc
1243  IF ( mod(thisblkc, pnc) .EQ. pj-1 ) THEN
1244  DO r = 1, bszc
1245  IF ( j+r-1 .LE. ncols ) THEN
1246  DO i = 1, nrows, bszr
1247  thisblkr = (i-1) / bszr
1248  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
1249  DO q = 1, bszr
1250  IF ( i+q-1 .LE. nrows ) THEN
1251  k = k + 1
1252  a(i+q-1,j+r-1) = suba(k)
1253  END IF
1254  END DO
1255  END IF
1256  END DO
1257  END IF
1258  END DO
1259  END IF
1260  END DO
1261 
1262  !----------------------------------------------
1263  IF ( k .NE. subnrows*subncols ) THEN
1264  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
1265  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows, ' subnc=', subncols; CALL fl(ofu)
1266  CALL assert(.false.,'blocktri #6')
1267  END IF
1268 
1269  IF(kpdbg) WRITE(ofu,*) 'InjectSubMatrix done K', k; CALL fl(ofu)
1270 
1271 END SUBROUTINE injectsubmatrix
1272 
1273 !-------------------------------------------------------------------------------
1277 !-------------------------------------------------------------------------------
1278 SUBROUTINE injectsubvector( bszr, pnr, pi, V, nrows, subV, subnrows )
1279  !----------------------------------------------
1280  ! Formal arguments
1281  !----------------------------------------------
1282  INTEGER, INTENT(IN) :: bszr
1283  INTEGER, INTENT(IN) :: pnr
1284  INTEGER, INTENT(IN) :: pi
1285  INTEGER, INTENT(OUT) :: V(:)
1286  INTEGER, INTENT(IN) :: nrows
1287  INTEGER, INTENT(IN) :: subV(:)
1288  INTEGER, INTENT(IN) :: subnrows
1289  !----------------------------------------------
1290  ! Local variables
1291  !----------------------------------------------
1292  INTEGER :: I, K, Q
1293  INTEGER :: thisblkr
1294 
1295  IF(kpdbg) WRITE(ofu,*) 'InjectSubVector NR=', subnrows; CALL fl(ofu)
1296 
1297  k = 0
1298  DO i = 1, nrows, bszr
1299  thisblkr = (i-1) / bszr
1300  IF ( mod(thisblkr, pnr) .EQ. pi-1 ) THEN
1301  DO q = 1, bszr
1302  IF ( i+q-1 .LE. nrows ) THEN
1303  k = k + 1
1304  v(i+q-1) = subv(k)
1305  END IF
1306  END DO
1307  END IF
1308  END DO
1309 
1310  !----------------------------------------------
1311  IF ( k .NE. subnrows ) THEN
1312  IF(kpdbg) WRITE(ofu,*) 'Sanity check failed '; CALL fl(ofu)
1313  IF(kpdbg) WRITE(ofu,*) 'K=', k, ' subnr=', subnrows; CALL fl(ofu)
1314  CALL assert(.false.,'blocktri #7')
1315  END IF
1316 
1317  IF(kpdbg) WRITE(ofu,*) 'InjectSubVector done K', k; CALL fl(ofu)
1318 
1319 END SUBROUTINE injectsubvector
1320 
1321 !-------------------------------------------------------------------------------
1327 !-------------------------------------------------------------------------------
1328 SUBROUTINE masterrecvmatrix( A, nrows, ncols, ssubA, ssubnrows, ssubncols )
1329  !----------------------------------------------
1330  ! Formal arguments
1331  !----------------------------------------------
1332  REAL(dp), INTENT(OUT) :: A(:,:)
1333  INTEGER, INTENT(IN) :: nrows, ncols
1334  REAL(dp), INTENT(IN) :: ssubA(:)
1335  INTEGER, INTENT(IN) :: ssubnrows, ssubncols
1336  !----------------------------------------------
1337  ! Local variables
1338  !----------------------------------------------
1339  INTEGER :: I, J, K
1340  INTEGER :: aslaverank
1341  REAL(dp), ALLOCATABLE :: subA(:)
1342  INTEGER :: subnrows, subncols
1343  INTEGER :: bszr, bszc
1344  INTEGER :: pnr, pnc
1345 #if defined(MPI_OPT)
1346  INTEGER, EXTERNAL :: NUMROC
1347 #endif
1348 
1349  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix started'; CALL fl(ofu)
1350  CALL bsystemclock(pstats%comm%t1)
1351 
1352 #if defined(MPI_OPT)
1353  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1354  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1355 
1356  DO i = 1, pnr
1357  DO j = 1, pnc
1358  aslaverank = blacs%pgrid%map(i,j)
1359 
1360  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1361  subncols = numroc( ncols, bszc, j-1, 0, pnc )
1362 
1363  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix from ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1364 
1365  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1366  !Self (local)
1367  IF ( aslaverank .NE. rank ) THEN
1368  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1369  CALL assert(.false.,'blocktri #8')
1370  END IF
1371  IF ( (ssubnrows .NE. subnrows) .OR. (ssubncols .NE. subncols) ) THEN
1372  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1373  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows, ' SSNC ', ssubncols; CALL fl(ofu)
1374  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows, ' SNC ', subncols; CALL fl(ofu)
1375  CALL assert(.false.,'blocktri #9')
1376  END IF
1377 
1378  !Use the given copy of this submatrix (like sent+received to/by self)
1379  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injecting self submatrix'; CALL fl(ofu)
1380  CALL injectsubmatrix(bszr, bszc, pnr, pnc, &
1381  i, j, a, nrows, ncols, ssuba, subnrows, subncols)
1382  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix kept self submatrix'; CALL fl(ofu)
1383  ELSE
1384  !A remote slave; receive its corresponding matrix fragment
1385  ALLOCATE( suba( 1 : subnrows*subncols ) )
1386  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix receiving slave submatrix'; CALL fl(ofu)
1387  CALL bsystemclock(pstats%wait%t1)
1388  CALL dgerv2d( blacs%levelcontext, subnrows, subncols, &
1389  suba, subnrows, i-1, j-1 )
1390  CALL bsystemclock(pstats%wait%t2)
1391  CALL chargetime( pstats%wait%tm, pstats%wait%t2, pstats%wait%t1, pstats%wait%cnt )
1392  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injecting submatrix',i,j; CALL fl(ofu)
1393  CALL injectsubmatrix( bszr, bszc, pnr, pnc, &
1394  i, j, a, nrows, ncols, suba, subnrows, subncols )
1395  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix injected submatrix'; CALL fl(ofu)
1396  DEALLOCATE( suba )
1397  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix deallocated temp submatrix'; CALL fl(ofu)
1398  END IF
1399  END DO
1400  END DO
1401 
1402  CALL bsystemclock(pstats%comm%t2)
1403  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1404 #endif
1405 
1406  IF(kpdbg) WRITE(ofu,*) 'MasterRecvMatrix done'; CALL fl(ofu)
1407 
1408 END SUBROUTINE masterrecvmatrix
1409 
1410 !-------------------------------------------------------------------------------
1414 !-------------------------------------------------------------------------------
1415 SUBROUTINE slavesendmatrix( nrows, ncols, subA, subnrows, subncols )
1416  !----------------------------------------------
1417  ! Formal arguments
1418  !----------------------------------------------
1419  INTEGER, INTENT(IN) :: nrows, ncols
1420  REAL(dp), INTENT(IN) :: subA(:)
1421  INTEGER, INTENT(IN) :: subnrows, subncols
1422 
1423  IF(kpdbg) WRITE(ofu,*) 'SlaveSendMatrix started ',subnrows,' ',subncols; CALL fl(ofu)
1424 
1425  CALL bsystemclock(pstats%comm%t1)
1426 #if defined(MPI_OPT)
1427  CALL dgesd2d( blacs%levelcontext, subnrows, subncols, suba, subnrows, 0, 0 )
1428 #endif
1429  CALL bsystemclock(pstats%comm%t2)
1430  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1431 
1432  IF(kpdbg) WRITE(ofu,*) 'SlaveSendMatrix done'; CALL fl(ofu)
1433 END SUBROUTINE slavesendmatrix
1434 
1435 !-------------------------------------------------------------------------------
1441 !-------------------------------------------------------------------------------
1442 SUBROUTINE masterrecvvector( V, nrows, ssubV, ssubnrows )
1443  !----------------------------------------------
1444  ! Formal arguments
1445  !----------------------------------------------
1446  INTEGER, INTENT(OUT) :: V(:)
1447  INTEGER, INTENT(IN) :: nrows
1448  INTEGER, INTENT(IN) :: ssubV(:)
1449  INTEGER, INTENT(IN) :: ssubnrows
1450  !----------------------------------------------
1451  ! Local variables
1452  !----------------------------------------------
1453  INTEGER :: I, J, K
1454  INTEGER :: aslaverank
1455  INTEGER, ALLOCATABLE :: subV(:)
1456  INTEGER :: subnrows
1457  INTEGER :: bszr
1458  INTEGER :: pnr
1459 #if defined(MPI_OPT)
1460  INTEGER, EXTERNAL :: NUMROC
1461 #endif
1462 
1463  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector started'; CALL fl(ofu)
1464  CALL bsystemclock(pstats%comm%t1)
1465 
1466 #if defined(MPI_OPT)
1467  bszr = blacs%pgrid%blockszrows
1468  pnr = blacs%pgrid%nrows
1469 
1470  DO i = 1, pnr
1471  DO j = 1, 1
1472  aslaverank = blacs%pgrid%map(i,j)
1473 
1474  subnrows = numroc( nrows, bszr, i-1, 0, pnr )
1475 
1476  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector from ',i,' ',j,' ',aslaverank; CALL fl(ofu)
1477 
1478  IF ( i .EQ. 1 .AND. j .EQ. 1 ) THEN
1479  !Self (local)
1480  IF ( aslaverank .NE. rank ) THEN
1481  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in slave rank of master'; CALL fl(ofu)
1482  CALL assert(.false.,'blocktri #10')
1483  END IF
1484  IF ( ssubnrows .NE. subnrows ) THEN
1485  IF(kpdbg) WRITE(ofu,*) 'Inconsistency in ssub dimensions'; CALL fl(ofu)
1486  IF(kpdbg) WRITE(ofu,*) 'SSNR ', ssubnrows; CALL fl(ofu)
1487  IF(kpdbg) WRITE(ofu,*) 'SNR ', subnrows; CALL fl(ofu)
1488  CALL assert(.false.,'blocktri #11')
1489  END IF
1490 
1491  !Use the given copy of this subvector (like sent+received to/by self)
1492  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injecting self subvector'; CALL fl(ofu)
1493  CALL injectsubvector(bszr, pnr, i, v, nrows, ssubv, subnrows)
1494  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector kept self subvector'; CALL fl(ofu)
1495  ELSE
1496  !A remote slave; receive its corresponding vector fragment
1497  ALLOCATE( subv( 1 : subnrows ) )
1498  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector receiving slave subvector'; CALL fl(ofu)
1499  CALL igerv2d( blacs%levelcontext, subnrows, 1, subv, subnrows, i-1, 0 )
1500  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injecting subvector',i,j; CALL fl(ofu)
1501  CALL injectsubvector( bszr, pnr, i, v, nrows, subv, subnrows )
1502  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector injected subvector'; CALL fl(ofu)
1503  DEALLOCATE( subv )
1504  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector deallocated temp subvector'; CALL fl(ofu)
1505  END IF
1506  END DO
1507  END DO
1508 
1509  CALL bsystemclock(pstats%comm%t2)
1510  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1511 #endif
1512 
1513  IF(kpdbg) WRITE(ofu,*) 'MasterRecvVector done'; CALL fl(ofu)
1514 
1515 END SUBROUTINE masterrecvvector
1516 
1517 !-------------------------------------------------------------------------------
1523 !-------------------------------------------------------------------------------
1524 SUBROUTINE slavesendvector( nrows, subV, subnrows )
1525  !----------------------------------------------
1526  ! Formal arguments
1527  !----------------------------------------------
1528  INTEGER, INTENT(IN) :: nrows
1529  INTEGER, INTENT(IN) :: subV(:)
1530  INTEGER, INTENT(IN) :: subnrows
1531  INTEGER :: pj
1532 
1533  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector started ', subnrows; CALL fl(ofu)
1534 
1535  CALL bsystemclock(pstats%comm%t1)
1536 
1537  pj = blacs%pgrid%mycol
1538  IF ( pj .GT. 0 ) THEN
1539  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector skipping since mycol>0 ',pj; CALL fl(ofu)
1540  ELSE
1541 #if defined(MPI_OPT)
1542  CALL igesd2d( blacs%levelcontext, subnrows, 1, subv, subnrows, 0, 0 )
1543 #endif
1544  END IF
1545 
1546  CALL bsystemclock(pstats%comm%t2)
1547  CALL chargetime( pstats%comm%tm, pstats%comm%t2, pstats%comm%t1, pstats%comm%cnt )
1548 
1549  IF(kpdbg) WRITE(ofu,*) 'SlaveSendVector done'; CALL fl(ofu)
1550 END SUBROUTINE slavesendvector
1551 
1552 !-------------------------------------------------------------------------------
1556 !-------------------------------------------------------------------------------
1557 SUBROUTINE plbdgemm( alpha, A, B, beta, C )
1558  !----------------------------------------------
1559  ! Formal arguments
1560  !----------------------------------------------
1561  REAL(dp), INTENT(IN) :: alpha, beta
1562  REAL(dp), INTENT(IN) :: A(:,:), B(:,:)
1563  REAL(dp), INTENT(INOUT) :: C(:,:)
1564  !----------------------------------------------
1565  ! Local variables
1566  !----------------------------------------------
1567  REAL(dp), ALLOCATABLE :: subA(:), subB(:), subC(:)
1568  INTEGER :: descA(9), descB(9), descC(9)
1569  INTEGER :: lldA, lldB, lldC
1570  INTEGER :: nrows, ncols, subnrows, subncols
1571  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1572  LOGICAL :: useblas
1573  INTEGER :: ctxt, info
1574 #if defined(MPI_OPT)
1575  INTEGER, EXTERNAL :: NUMROC
1576 #endif
1577 
1578  IF(kpdbg) WRITE(ofu,*) 'MasterGEMM started'; CALL fl(ofu)
1579 
1580  !----------------------------------------------
1581  useblas = ( doblasonly ) .OR. &
1582  ( pblas%msmap%nslaves .EQ. 1) .OR. &
1583  ( m .LE. blacs%pgrid%blockszrows ) .OR. &
1584  ( m .LE. blacs%pgrid%blockszcols )
1585 
1586  IF ( useblas ) THEN
1587  IF(kpdbg) WRITE(ofu,*) 'BLAS DGEMM only (not using PBLAS)'; CALL fl(ofu)
1588  CALL dgemm( 'N', 'N', m, m, m, alpha, a, m, b, m, beta, c, m )
1589  ELSE
1590 #if defined(MPI_OPT)
1591  CALL bsystemclock(pstats%mm%t1)
1592  nrows = m; ncols = m
1593  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1594  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1595  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1596  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1597  subncols = numroc( ncols, bszc, pj, 0, pnc )
1598 
1599  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM allocating subABC'; CALL fl(ofu)
1600  ALLOCATE( suba(1:subnrows*subncols) )
1601  ALLOCATE( subb(1:subnrows*subncols) )
1602  ALLOCATE( subc(1:subnrows*subncols) )
1603  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM allocated subABC'; CALL fl(ofu)
1604 
1605  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM desciniting subABC'; CALL fl(ofu)
1606  ctxt = blacs%levelcontext
1607  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1608  lldb = llda; lldc = llda
1609  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1610  CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1611  CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1612  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM desciniting subABC'; CALL fl(ofu)
1613 
1614  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending OP_DGEMM'; CALL fl(ofu)
1615  CALL masterbcastnextop( op_dgemm )
1616 
1617  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending A'; CALL fl(ofu)
1618  CALL bsystemclock(pstats%mma%t1)
1619  CALL mastersendmatrix( a, nrows, ncols, suba, subnrows, subncols )
1620  CALL bsystemclock(pstats%mma%t2)
1621  CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1622 
1623  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending B'; CALL fl(ofu)
1624  CALL bsystemclock(pstats%mmb%t1)
1625  CALL mastersendmatrix( b, nrows, ncols, subb, subnrows, subncols )
1626  CALL bsystemclock(pstats%mmb%t2)
1627  CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1628 
1629  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending C'; CALL fl(ofu)
1630  CALL bsystemclock(pstats%mmc%t1)
1631  CALL mastersendmatrix( c, nrows, ncols, subc, subnrows, subncols )
1632  CALL bsystemclock(pstats%mmc%t2)
1633  CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1634 
1635  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending alpha', alpha; CALL fl(ofu)
1636  CALL bsystemclock(pstats%mmalpha%t1)
1637  CALL masterbcastvalue( alpha )
1638  CALL bsystemclock(pstats%mmalpha%t2)
1639  CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1640 
1641  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM sending beta', beta; CALL fl(ofu)
1642  CALL bsystemclock(pstats%mmbeta%t1)
1643  CALL masterbcastvalue( beta )
1644  CALL bsystemclock(pstats%mmbeta%t2)
1645  CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1646 
1647  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM invoking PDGEMM'; CALL fl(ofu)
1648  CALL bsystemclock(pstats%comp%t1)
1649  CALL pdgemm( 'N', 'N', m, m, m, alpha, &
1650  suba, 1, 1, desca, &
1651  subb, 1, 1, descb, &
1652  beta, &
1653  subc, 1, 1, descc )
1654  CALL bsystemclock(pstats%comp%t2)
1655  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1656  CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1657  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM done PDGEMM'; CALL fl(ofu)
1658 
1659  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM receiving slave matrices'; CALL fl(ofu)
1660  CALL bsystemclock(pstats%mmrc%t1)
1661  CALL masterrecvmatrix( c, nrows, ncols, subc, subnrows, subncols )
1662  CALL bsystemclock(pstats%mmrc%t2)
1663  CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1664  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM received slave matrices'; CALL fl(ofu)
1665 
1666  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM deallocating subABC'; CALL fl(ofu)
1667  DEALLOCATE( suba )
1668  DEALLOCATE( subb )
1669  DEALLOCATE( subc )
1670  IF(kpdbg) WRITE(ofu,*) 'MasterDGEMM deallocated subABC'; CALL fl(ofu)
1671  CALL bsystemclock(pstats%mm%t2)
1672  CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1673 #endif
1674  END IF
1675 
1676  IF(kpdbg) WRITE(ofu,*) 'MasterGEMM done'; CALL fl(ofu)
1677 END SUBROUTINE plbdgemm
1678 
1679 !-------------------------------------------------------------------------------
1683 !-------------------------------------------------------------------------------
1684 SUBROUTINE slavedgemm()
1685  !----------------------------------------------
1686  ! Local variables
1687  !----------------------------------------------
1688  REAL(dp), ALLOCATABLE :: subA(:), subB(:), subC(:)
1689  REAL(dp) :: alpha, beta
1690  INTEGER :: descA(9), descB(9), descC(9)
1691  INTEGER :: lldA, lldB, lldC
1692  INTEGER :: nrows, ncols, subnrows, subncols
1693  INTEGER :: bszr, bszc, pnr, pnc
1694  INTEGER :: pi, pj
1695  INTEGER :: ctxt, info
1696 #if defined(MPI_OPT)
1697  INTEGER, EXTERNAL :: NUMROC
1698 #endif
1699 
1700  CALL bsystemclock(pstats%mm%t1)
1701 
1702 #if defined(MPI_OPT)
1703  nrows = m; ncols = m
1704  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1705  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1706  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1707 
1708  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1709  subncols = numroc( ncols, bszc, pj, 0, pnc )
1710 
1711  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM allocating subABC'; CALL fl(ofu)
1712  ALLOCATE( suba(1:subnrows*subncols) )
1713  ALLOCATE( subb(1:subnrows*subncols) )
1714  ALLOCATE( subc(1:subnrows*subncols) )
1715  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM allocated subABC'; CALL fl(ofu)
1716 
1717  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM desciniting subABC'; CALL fl(ofu)
1718  ctxt = blacs%levelcontext
1719  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1720  lldb = llda; lldc = llda
1721  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1722  CALL descinit(descb, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldb, info )
1723  CALL descinit(descc, nrows, ncols, bszr, bszc, 0, 0, ctxt, lldc, info )
1724  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM desciniting subABC'; CALL fl(ofu)
1725 
1726  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving A'; CALL fl(ofu)
1727  CALL bsystemclock(pstats%mma%t1)
1728  CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1729  CALL bsystemclock(pstats%mma%t2)
1730  CALL chargetime( pstats%mma%tm, pstats%mma%t2, pstats%mma%t1, pstats%mma%cnt )
1731 
1732  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving B'; CALL fl(ofu)
1733  CALL bsystemclock(pstats%mmb%t1)
1734  CALL slavereceivematrix( nrows, ncols, subb, subnrows, subncols )
1735  CALL bsystemclock(pstats%mmb%t2)
1736  CALL chargetime( pstats%mmb%tm, pstats%mmb%t2, pstats%mmb%t1, pstats%mmb%cnt )
1737 
1738  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving C'; CALL fl(ofu)
1739  CALL bsystemclock(pstats%mmc%t1)
1740  CALL slavereceivematrix( nrows, ncols, subc, subnrows, subncols )
1741  CALL bsystemclock(pstats%mmc%t2)
1742  CALL chargetime( pstats%mmc%tm, pstats%mmc%t2, pstats%mmc%t1, pstats%mmc%cnt )
1743 
1744  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving alpha'; CALL fl(ofu)
1745  CALL bsystemclock(pstats%mmalpha%t1)
1746  CALL slavereceivevalue( alpha )
1747  CALL bsystemclock(pstats%mmalpha%t2)
1748  CALL chargetime( pstats%mmalpha%tm, pstats%mmalpha%t2, pstats%mmalpha%t1, pstats%mmalpha%cnt )
1749 
1750  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM receiving beta'; CALL fl(ofu)
1751  CALL bsystemclock(pstats%mmbeta%t1)
1752  CALL slavereceivevalue( beta )
1753  CALL bsystemclock(pstats%mmbeta%t2)
1754  CALL chargetime( pstats%mmbeta%tm, pstats%mmbeta%t2, pstats%mmbeta%t1, pstats%mmbeta%cnt )
1755 
1756  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM invoking PDGEMM'; CALL fl(ofu)
1757  CALL bsystemclock(pstats%comp%t1)
1758  CALL pdgemm( 'N', 'N', m, m, m, alpha, &
1759  suba, 1, 1, desca, &
1760  subb, 1, 1, descb, &
1761  beta, &
1762  subc, 1, 1, descc )
1763  CALL bsystemclock(pstats%comp%t2)
1764  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1765  CALL chargetime( pstats%pmm%tm, pstats%comp%t2, pstats%comp%t1, pstats%pmm%cnt )
1766  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM done PDGEMM'; CALL fl(ofu)
1767 
1768  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM sending result matrix to master'; CALL fl(ofu)
1769  CALL bsystemclock(pstats%mmrc%t1)
1770  CALL slavesendmatrix( nrows, ncols, subc, subnrows, subncols )
1771  CALL bsystemclock(pstats%mmrc%t2)
1772  CALL chargetime( pstats%mmrc%tm, pstats%mmrc%t2, pstats%mmrc%t1, pstats%mmrc%cnt )
1773  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM sent result matrix to master'; CALL fl(ofu)
1774 
1775  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM deallocating subABC'; CALL fl(ofu)
1776  DEALLOCATE( suba )
1777  DEALLOCATE( subb )
1778  DEALLOCATE( subc )
1779  IF(kpdbg) WRITE(ofu,*) 'SlaveDGEMM deallocated subABC'; CALL fl(ofu)
1780 #endif
1781 
1782  CALL bsystemclock(pstats%mm%t2)
1783  CALL chargetime( pstats%mm%tm, pstats%mm%t2, pstats%mm%t1, pstats%mm%cnt )
1784 
1785 END SUBROUTINE slavedgemm
1786 
1787 !-------------------------------------------------------------------------------
1791 !-------------------------------------------------------------------------------
1792 SUBROUTINE plbdgemv( alpha, A, x, beta, y )
1793  !----------------------------------------------
1794  ! Formal arguments
1795  !----------------------------------------------
1796  REAL(dp), INTENT(IN) :: alpha, beta
1797  REAL(dp), INTENT(IN) :: A(:,:)
1798  REAL(dp), INTENT(IN) :: x(:)
1799  REAL(dp), INTENT(INOUT) :: y(:)
1800 
1801  !----------------------------------------------
1802  !Just do this locally (not worth asking slaves' help)
1803  CALL dgemv( 'N', m, m, alpha, a, m, x, 1, beta, y, 1 )
1804 END SUBROUTINE plbdgemv
1805 
1806 !-------------------------------------------------------------------------------
1810 !-------------------------------------------------------------------------------
1811 SUBROUTINE plbdgetrf( A, piv, info )
1812  !----------------------------------------------
1813  ! Formal arguments
1814  !----------------------------------------------
1815  REAL(dp), INTENT(INOUT) :: A(:,:)
1816  INTEGER, INTENT(OUT) :: piv(:)
1817  INTEGER, INTENT(INOUT) :: info
1818  !----------------------------------------------
1819  ! Local variables
1820  !----------------------------------------------
1821  REAL(dp), ALLOCATABLE :: subA(:)
1822  INTEGER, ALLOCATABLE :: subpiv(:)
1823  INTEGER :: descA(9)
1824  INTEGER :: lldA
1825  INTEGER :: nrows, ncols, subnrows, subncols
1826  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1827  INTEGER :: ctxt
1828  LOGICAL :: useblas
1829 #if defined(MPI_OPT)
1830  INTEGER, EXTERNAL :: NUMROC
1831 #endif
1832 
1833  IF(kpdbg) WRITE(ofu,*) 'MasterGETRF started'; CALL fl(ofu)
1834 
1835  !----------------------------------------------
1836  useblas = ( doblasonly ) .OR. &
1837  ( pblas%msmap%nslaves .EQ. 1) .OR. &
1838  ( m .LE. blacs%pgrid%blockszrows ) .OR. &
1839  ( m .LE. blacs%pgrid%blockszcols )
1840 
1841  !-----------------------------------------------------------------------------
1842  IF ( useblas ) THEN
1843  !IF(KPDBG) WRITE(OFU,*) 'BLAS DGETRF only (not using PBLAS)' ; CALL FL(OFU)
1844  IF(kpdbg) WRITE(ofu,*) 'BLAS DGETRF only (not using PBLAS) with M=',m ; CALL fl(ofu)
1845  CALL dgetrf( m, m, a, m, piv, info )
1846  ELSE
1847 #if defined(MPI_OPT)
1848  CALL bsystemclock(pstats%trf%t1)
1849  nrows = m; ncols = m
1850  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1851  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1852  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1853  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1854  subncols = numroc( ncols, bszc, pj, 0, pnc )
1855 
1856  ctxt = blacs%levelcontext
1857  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1858  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1859 
1860  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF allocating subAPiv'; CALL fl(ofu)
1861  ALLOCATE( suba(1:subnrows*subncols) )
1862  ALLOCATE( subpiv(1:subnrows+bszr) )
1863  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF allocated subAPiv'; CALL fl(ofu)
1864 
1865  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF sending OP_GETRF'; CALL fl(ofu)
1866  CALL masterbcastnextop( op_dgetrf )
1867  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF sending A'; CALL fl(ofu)
1868  CALL mastersendmatrix( a, nrows, ncols, suba, subnrows, subncols )
1869  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF invoking PDGETRF'; CALL fl(ofu)
1870  CALL bsystemclock(pstats%comp%t1)
1871  CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1872  CALL bsystemclock(pstats%comp%t2)
1873  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1874  CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1875  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF done PDGETRF'; CALL fl(ofu)
1876 
1877  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF receiving slave submatrices'; CALL fl(ofu)
1878  CALL masterrecvmatrix( a, nrows, ncols, suba, subnrows, subncols )
1879  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF received slave submatrices'; CALL fl(ofu)
1880  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF receiving slave vectors'; CALL fl(ofu)
1881  CALL masterrecvvector( piv, nrows, subpiv, subnrows )
1882  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF received slave vectors'; CALL fl(ofu)
1883 
1884  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF deallocating subAPiv'; CALL fl(ofu)
1885  DEALLOCATE( subpiv )
1886  DEALLOCATE( suba )
1887  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF deallocated subAPiv'; CALL fl(ofu)
1888  CALL bsystemclock(pstats%trf%t2)
1889  CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1890 #endif
1891  END IF
1892 
1893  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF done'; CALL fl(ofu)
1894 END SUBROUTINE plbdgetrf
1895 
1896 !-------------------------------------------------------------------------------
1900 !-------------------------------------------------------------------------------
1901 SUBROUTINE slavedgetrf()
1902  !----------------------------------------------
1903  ! Local variables
1904  !----------------------------------------------
1905  REAL(dp), ALLOCATABLE :: subA(:)
1906  INTEGER, ALLOCATABLE :: subpiv(:)
1907  INTEGER :: descA(9)
1908  INTEGER :: lldA
1909  INTEGER :: nrows, ncols, subnrows, subncols
1910  INTEGER :: bszr, bszc, pnr, pnc, pi, pj
1911  INTEGER :: ctxt, info
1912 #if defined(MPI_OPT)
1913  INTEGER, EXTERNAL :: NUMROC
1914 #endif
1915 
1916  CALL bsystemclock(pstats%trf%t1)
1917 
1918 #if defined(MPI_OPT)
1919  nrows = m; ncols = m
1920  bszr = blacs%pgrid%blockszrows; bszc = blacs%pgrid%blockszcols
1921  pnr = blacs%pgrid%nrows; pnc = blacs%pgrid%ncols
1922  pi = blacs%pgrid%myrow; pj = blacs%pgrid%mycol
1923  subnrows = numroc( nrows, bszr, pi, 0, pnr )
1924  subncols = numroc( ncols, bszc, pj, 0, pnc )
1925 
1926  ctxt = blacs%levelcontext
1927  llda = subnrows; IF( llda .LT. 1 ) llda = 1
1928  CALL descinit(desca, nrows, ncols, bszr, bszc, 0, 0, ctxt, llda, info )
1929 
1930  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF allocating subAPiv'; CALL fl(ofu)
1931  ALLOCATE( suba(1:subnrows*subncols) )
1932  ALLOCATE( subpiv(1:subnrows+bszr) )
1933  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF allocated subAPiv'; CALL fl(ofu)
1934 
1935  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF receiving A submatrix'; CALL fl(ofu)
1936  CALL slavereceivematrix( nrows, ncols, suba, subnrows, subncols )
1937  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF received A submatrix'; CALL fl(ofu)
1938 
1939  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF invoking PDGETRF'; CALL fl(ofu)
1940  CALL bsystemclock(pstats%comp%t1)
1941  CALL pdgetrf( nrows, ncols, suba, 1, 1, desca, subpiv, info )
1942  CALL bsystemclock(pstats%comp%t2)
1943  CALL chargetime( pstats%comp%tm, pstats%comp%t2, pstats%comp%t1, pstats%comp%cnt )
1944  CALL chargetime( pstats%ptrf%tm, pstats%comp%t2, pstats%comp%t1, pstats%ptrf%cnt )
1945  IF(kpdbg) WRITE(ofu,*) 'MasterDGETRF done PDGETRF'; CALL fl(ofu)
1946 
1947  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sending result matrix to master'; CALL fl(ofu)
1948  CALL slavesendmatrix( nrows, ncols, suba, subnrows, subncols )
1949  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sent result matrix to master'; CALL fl(ofu)
1950  CALL slavesendvector( nrows, subpiv, subnrows )
1951  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF sent result vector to master'; CALL fl(ofu)
1952 
1953  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF deallocating subAPiv'; CALL fl(ofu)
1954  DEALLOCATE( subpiv )
1955  DEALLOCATE( suba )
1956  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRF deallocated subAPiv'; CALL fl(ofu)
1957 
1958 #endif
1959 
1960  CALL bsystemclock(pstats%trf%t2)
1961  CALL chargetime( pstats%trf%tm, pstats%trf%t2, pstats%trf%t1, pstats%trf%cnt )
1962 
1963 END SUBROUTINE slavedgetrf
1964 
1965 !-------------------------------------------------------------------------------
1969 !-------------------------------------------------------------------------------
1970 SUBROUTINE plbdgetrs( nrhs, A, piv, B, info )
1971  !----------------------------------------------
1972  ! Formal arguments
1973  !----------------------------------------------
1974  INTEGER, INTENT(IN) :: nrhs
1975  REAL(dp), INTENT(IN) :: A(:,:)
1976  INTEGER, INTENT(IN) :: piv(:)
1977  REAL(dp), INTENT(INOUT) :: B(:,:)
1978  INTEGER :: info
1979 
1980  !----------------------------------------------
1981  !Just do this locally (not worth asking slaves' help)
1982  IF(kpdbg) WRITE(ofu,*) 'BLAS DGETRS only (not using PBLAS)'; CALL fl(ofu)
1983  CALL dgetrs( 'N', m, nrhs, a, m, piv, b, m, info )
1984 END SUBROUTINE plbdgetrs
1985 
1986 !-------------------------------------------------------------------------------
1990 !-------------------------------------------------------------------------------
1991 SUBROUTINE slavedgetrs()
1993  IF(kpdbg) WRITE(ofu,*) 'SlaveDGETRS not implemented'; CALL fl(ofu)
1994  CALL assert(.false.,'blocktri #12') !To be completed
1995 
1996 END SUBROUTINE slavedgetrs
1997 !-------------------------------------------------------------------------------
1998 !-------------------------------------------------------------------------------
1999 
2000 !-------------------------------------------------------------------------------
2004 !-------------------------------------------------------------------------------
2005 SUBROUTINE initialize( inN, inM )
2006  !-----------------------------------------------------------------------------
2007  ! Formal arguments
2008  !-----------------------------------------------------------------------------
2009  INTEGER, INTENT(IN) :: inn
2010  INTEGER, INTENT(IN) :: inm
2011 
2012  !-----------------------------------------------------------------------------
2013  ! Local variables
2014  !-----------------------------------------------------------------------------
2015  INTEGER :: nrowsperrank
2016  INTEGER :: nspillrows
2017  INTEGER :: mpierr
2018  INTEGER :: globrow
2019  INTEGER :: globrowoff
2020  INTEGER :: flag
2021  REAL :: lognbase2
2022 
2023  !-----------------------------------------------------------------------------
2024  lbcylic_init=.true.
2025 #if defined(MPI_OPT)
2026  CALL mpi_initialized (flag, mpierr)
2027  IF ( flag .EQ. 0 ) THEN
2028  CALL mpi_init( mpierr )
2029  END IF
2030  use_mpiwtime = .true.
2031  CALL mpi_comm_rank( siesta_comm, rank, mpierr )
2032  CALL mpi_comm_size( siesta_comm, nranks, mpierr)
2033 #endif
2034 
2035  n = inn
2036  m = inm
2037  IF (n.EQ.0 .OR. m.EQ.0) return
2038 
2039  !-----------------------------------------------------------------------------
2040  ! Set output file unit, so we get output of each rank in its own file
2041  ! Extremely useful for debugging -- makes it so much easier to see progress!
2042  !-----------------------------------------------------------------------------
2043  ofu = rank+1000
2044  p = nranks
2045  IF ( p .GT. n ) THEN
2046  IF(kpdbg) WRITE(ofu,*) 'Detected extra ', p-n, ' ranks'; CALL fl(ofu)
2047  p = n
2048  END IF
2049 
2050  !-----------------------------------------------------------------------------
2051  !Set problem output file
2052  !-----------------------------------------------------------------------------
2053  pfu = ofu+nranks
2054 
2055  !-----------------------------------------------------------------------------
2056  ! Check to see if debugging output is to be written : SKS on Nov 9, 2011
2057  !-----------------------------------------------------------------------------
2058  kpdbg=.false.; kenvvar='KPDBG'
2059  CALL getenv(kenvvar,kenvval)
2060  IF (kenvval.NE.'') THEN
2061  IF (kenvval.EQ.'TRUE') THEN
2062  kpdbg=.true.
2063  ELSE
2064  kpdbg=.false.
2065  END IF
2066  END IF
2067 
2068  !-----------------------------------------------------------------------------
2069  IF(kpdbg) WRITE(ofu,*) 'Rank ', rank, ' NRanks ', nranks; CALL fl(ofu)
2070  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
2071  IF(kpdbg) WRITE(ofu,*) '------ Initialization start ------'; CALL fl(ofu)
2072  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
2073 
2074  !-----------------------------------------------------------------------------
2075  IF ( n .LT. 1 ) THEN
2076  IF(kpdbg) WRITE(ofu,*) 'Bad N', n; CALL fl(ofu)
2077  CALL assert(.false.,'blocktri #13')
2078  END IF
2079 
2080  !-----------------------------------------------------------------------------
2081  !Determine where we start and end in row list at level 1 on this processor
2082  !This processor's global row numbers span [startglobrow,endglobrow] inclusive
2083  !-----------------------------------------------------------------------------
2084  nrowsperrank = n/p !Number of rows evenly split across processors
2085  nspillrows = mod(n, p) !Some left over after even split
2086  IF ( rank .LT. nspillrows ) THEN !The first few ranks get one extra row each
2087  startglobrow = rank*nrowsperrank + rank + 1
2088  endglobrow = startglobrow + nrowsperrank
2089  ELSE IF ( rank .LT. p ) THEN !The other active ranks
2090  startglobrow = rank*nrowsperrank + nspillrows + 1
2091  endglobrow = startglobrow + nrowsperrank - 1
2092  ELSE !The rest (unnecessary, inactive) ranks
2093  startglobrow = -1
2094  endglobrow = -2
2095  END IF
2096 
2097  !-----------------------------------------------------------------------------
2098  !Allocate array to store unmodified diagonal elements: SKS
2099  !-----------------------------------------------------------------------------
2100  IF (.NOT.ALLOCATED(origdiagelement)) ALLOCATE (origdiagelement((endglobrow-startglobrow+1)*m))
2101 
2102  !-----------------------------------------------------------------------------
2103  !No. of levels is 1+ceil(lg(N)); e.g., 6 for N=23
2104  !-----------------------------------------------------------------------------
2105  lognbase2 = log(real(n))/log(2.0)
2106  nlevels = 1 + int(lognbase2)
2107  IF ( real(int(lognbase2)) .NE. lognbase2 ) THEN
2108  nlevels = nlevels + 1
2109  END IF
2110  IF(kpdbg) WRITE(ofu,*) 'NLEVELS=', nlevels; CALL fl(ofu)
2111 
2112  !---------------------------------------------------------------------------
2113  matdirtied = .true.
2114  rhsdirtied = .true.
2115 
2116  !---------------------------------------------------------------------------
2117  !Not sure if barriers are needed between levels, but they can be optionally
2118  !turned on by setting usebarriers to TRUE
2119  !---------------------------------------------------------------------------
2120  usebarriers = .false.
2121 
2122  !---------------------------------------------------------------------------
2123  writeproblemfile = .false.
2124  writesolution = .false.
2125 
2126  !---------------------------------------------------------------------------
2127  membytes = 0
2128  dpsz = 8 !8 bytes for double precision
2129  intsz = 4 !4 bytes for a single integer
2130  ptrsz = 8 !Assuming 64-bit addresses in the worst case
2131  one = 1
2132  zero = 0
2133 
2134  tottime = 0
2135  totcommtime = 0
2136  totinvtime = 0
2137  totinvcount = 0
2138  totmatmultime = 0
2139  totmatmulcount = 0
2140  totmatsoltime = 0
2141  totmatsolcount = 0
2142  CALL bclockinit()
2143 
2144  !---------------------------------------------------------------------------
2145  !The +2 in nlocrows+2 is for storing incoming values from neighbors, if any
2146  !The zero'th element stores the incoming element from top neighbor, the
2147  !last element stores the incoming element from the bottom neighbor.
2148  !---------------------------------------------------------------------------
2149  ! SKS
2150  IF (.NOT.ALLOCATED(lelement)) ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
2151  !ALLOCATE( lelement(nlevels, 0:endglobrow-startglobrow+2) )
2152  CALL chargememory( nlevels*(endglobrow-startglobrow+3)*5*ptrsz )
2153 
2154  ! SKS
2155  IF (.NOT.ALLOCATED(selement)) ALLOCATE( selement(n) ) !Have a place for all x; only some will be allocated
2156  ! ALLOCATE( selement(N) ) !Have a place for all x; only some will be allocated
2157  CALL chargememory( n*ptrsz )
2158 
2159  !---------------------------------------------------------------------------
2160  !Allocate (L,D,U,b,pivot) at level 1
2161  !---------------------------------------------------------------------------
2162  DO globrow = startglobrow, endglobrow, 1
2163  globrowoff = globrow-startglobrow+1
2164  !SKS start
2165  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%L)) ALLOCATE( lelement(1, globrowoff)%L(m,m) )
2166  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%D)) ALLOCATE( lelement(1, globrowoff)%D(m,m) )
2167  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%U)) ALLOCATE( lelement(1, globrowoff)%U(m,m) )
2168  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%b)) ALLOCATE( lelement(1, globrowoff)%b(m,1) )
2169  IF(.NOT.ALLOCATED(lelement(1,globrowoff)%pivot)) ALLOCATE( lelement(1, globrowoff)%pivot(m) )
2170  !SKS end
2171  !ALLOCATE( lelement(1, globrowoff)%L(M,M) )
2172  !ALLOCATE( lelement(1, globrowoff)%D(M,M) )
2173  !ALLOCATE( lelement(1, globrowoff)%U(M,M) )
2174  !ALLOCATE( lelement(1, globrowoff)%b(M,1) )
2175  !ALLOCATE( lelement(1, globrowoff)%pivot(M) )
2176  CALL chargememory( (3*m*m+m)*dpsz + m*intsz )
2177  lelement(1, globrowoff)%L = 0
2178  lelement(1, globrowoff)%D = 0
2179  lelement(1, globrowoff)%U = 0
2180  lelement(1, globrowoff)%b = 0
2181  lelement(1, globrowoff)%pivot = 0
2182  END DO
2183 
2184  !---------------------------------------------------------------------------
2185  !Allocate (L,D,U,b) for saving the original problem
2186  !---------------------------------------------------------------------------
2187  !SKS
2188  IF (.NOT.ALLOCATED(orig)) ALLOCATE( orig(endglobrow-startglobrow+1) )
2189  ! ALLOCATE( orig(endglobrow-startglobrow+1) )
2190  DO globrow = startglobrow, endglobrow, 1
2191  globrowoff = globrow-startglobrow+1
2192  !SKS start
2193  IF(.NOT.ALLOCATED(orig(globrowoff)%L)) ALLOCATE( orig(globrowoff)%L(m,m) )
2194  IF(.NOT.ALLOCATED(orig(globrowoff)%D)) ALLOCATE( orig(globrowoff)%D(m,m) )
2195  IF(.NOT.ALLOCATED(orig(globrowoff)%U)) ALLOCATE( orig(globrowoff)%U(m,m) )
2196  IF(.NOT.ALLOCATED(orig(globrowoff)%b)) ALLOCATE( orig(globrowoff)%b(m,1) )
2197  !SKS end
2198  !ALLOCATE( orig(globrowoff)%L(M,M) )
2199  !ALLOCATE( orig(globrowoff)%D(M,M) )
2200  !ALLOCATE( orig(globrowoff)%U(M,M) )
2201  !ALLOCATE( orig(globrowoff)%b(M,1) )
2202  !-- DONT CHARGE MEMORY COST FOR THIS!!! --
2203  orig(globrowoff)%L = 0
2204  orig(globrowoff)%D = 0
2205  orig(globrowoff)%U = 0
2206  orig(globrowoff)%b = 0
2207  END DO
2208 
2209  !-----------------------------------------------------------------------------
2210  CALL plbinitialize()
2211 
2212  !-----------------------------------------------------------------------------
2213  IF(kpdbg) WRITE(ofu,*) ' P=',p,' N=',n,' M=',m; CALL fl(ofu)
2214  IF(kpdbg) WRITE(ofu,*) 'SROW=',startglobrow,' EROW=', endglobrow; CALL fl(ofu)
2215  IF(kpdbg) WRITE(ofu,*) '------ Initialization end ------'; CALL fl(ofu)
2216  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
2217 END SUBROUTINE initialize
2218 
2219 !-------------------------------------------------------------------------------
2220 !SPH - Add this since now rank, nranks is PRIVATE to avoid conflict with nscalingtools
2221 ! Must call Initialize first
2222 SUBROUTINE getranks(irank, inranks)
2223 INTEGER, INTENT(OUT) :: irank, inranks
2224 irank = rank; inranks = nranks
2225 END SUBROUTINE getranks
2226 
2227 !-------------------------------------------------------------------------------
2231 !-------------------------------------------------------------------------------
2232 SUBROUTINE setmatrixrow( globrow, L, D, U )
2233  !-----------------------------------------------------------------------------
2234  ! Formal arguments
2235  !-----------------------------------------------------------------------------
2236  INTEGER, INTENT(IN) :: globrow
2237  REAL(dp), INTENT(IN) :: L(:,:), D(:,:), U(:,:)
2238  !-----------------------------------------------------------------------------
2239  ! Local variables
2240  !-----------------------------------------------------------------------------
2241  REAL(dp) :: val
2242  INTEGER :: i, j, globrowoff
2243 
2244  !-------------------------------------------
2245  ! Sanity checks on globrow
2246  !-------------------------------------------
2247  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2248  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRow: Bad input globrow ',globrow; CALL fl(ofu)
2249  CALL assert(.false.,'blocktri #14')
2250  END IF
2251  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2252  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRow: Non-local globrow ',globrow; CALL fl(ofu)
2253  CALL assert(.false.,'blocktri #15')
2254  END IF
2255 
2256  globrowoff = globrow-startglobrow+1
2257 
2258  !-------------------------------------------
2259  ! Copy given blocks into allocated matrix
2260  !-------------------------------------------
2261  DO i = 1, m
2262  DO j = 1, m
2263  IF ( globrow .EQ. 1 ) THEN
2264  val = 0
2265  ELSE
2266  val = l(i,j)
2267  END IF
2268  lelement(1, globrowoff)%L(i,j) = val
2269 
2270  val = d(i,j)
2271  lelement(1, globrowoff)%D(i,j) = val
2272 
2273  IF ( globrow .EQ. n ) THEN
2274  val = 0
2275  ELSE
2276  val = u(i,j)
2277  END IF
2278  lelement(1, globrowoff)%U(i,j) = val
2279  END DO
2280  END DO
2281 
2282  !-------------------------------------------
2283  !Save a copy of this original problem
2284  !-------------------------------------------
2285  orig(globrowoff)%L = lelement(1,globrowoff)%L
2286  orig(globrowoff)%D = lelement(1,globrowoff)%D
2287  orig(globrowoff)%U = lelement(1,globrowoff)%U
2288 
2289  !-------------------------------------------
2290  matdirtied = .true.
2291 
2292 END SUBROUTINE setmatrixrow
2293 
2294 !-------------------------------------------------------------------------------
2298 !-------------------------------------------------------------------------------
2299 SUBROUTINE setmatrixrowl( globrow, L )
2300  !-----------------------------------------------------------------------------
2301  ! Formal arguments
2302  !-----------------------------------------------------------------------------
2303  INTEGER, INTENT(IN) :: globrow
2304  REAL(dp), INTENT(IN) :: L(:,:)
2305  !-----------------------------------------------------------------------------
2306  ! Local variables
2307  !-----------------------------------------------------------------------------
2308  REAL(dp) :: val
2309  INTEGER :: i, j, globrowoff
2310 
2311  !-------------------------------------------
2312  ! Sanity checks on globrow
2313  !-------------------------------------------
2314  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2315  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowL: Bad input globrow ',globrow; CALL fl(ofu)
2316  CALL assert(.false.,'blocktri #16')
2317  END IF
2318  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2319  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowL: Non-local globrow ',globrow; CALL fl(ofu)
2320  CALL assert(.false.,'blocktri #17')
2321  END IF
2322 
2323  globrowoff = globrow-startglobrow+1
2324 
2325  !-------------------------------------------
2326  ! Copy given L block into allocated matrix
2327  !-------------------------------------------
2328  DO i = 1, m
2329  DO j = 1, m
2330  IF ( globrow .EQ. 1 ) THEN
2331  val = 0
2332  ELSE
2333  val = l(i,j)
2334  END IF
2335  lelement(1, globrowoff)%L(i,j) = val
2336  END DO
2337  END DO
2338 
2339  !-------------------------------------------
2340  !Save a copy of this original problem
2341  !-------------------------------------------
2342  orig(globrowoff)%L = lelement(1,globrowoff)%L
2343 
2344  !-------------------------------------------
2345  matdirtied = .true.
2346 
2347 END SUBROUTINE setmatrixrowl
2348 
2349 !-------------------------------------------------------------------------------
2353 !-------------------------------------------------------------------------------
2354 SUBROUTINE setmatrixrowd( globrow, D )
2355  !-----------------------------------------------------------------------------
2356  ! Formal arguments
2357  !-----------------------------------------------------------------------------
2358  INTEGER, INTENT(IN) :: globrow
2359  REAL(dp), INTENT(IN) :: D(:,:)
2360  !-----------------------------------------------------------------------------
2361  ! Local variables
2362  !-----------------------------------------------------------------------------
2363  REAL(dp) :: val
2364  INTEGER :: i, j, globrowoff
2365 
2366  !-------------------------------------------
2367  ! Sanity checks on globrow
2368  !-------------------------------------------
2369  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2370  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowD: Bad input globrow ',globrow; CALL fl(ofu)
2371  CALL assert(.false.,'blocktri #18')
2372  END IF
2373  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2374  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowD: Non-local globrow ',globrow; CALL fl(ofu)
2375  CALL assert(.false.,'blocktri #19')
2376  END IF
2377 
2378  globrowoff = globrow-startglobrow+1
2379 
2380  !-------------------------------------------
2381  ! Copy given D block into allocated matrix
2382  !-------------------------------------------
2383  DO i = 1, m
2384  DO j = 1, m
2385  val = d(i,j)
2386  lelement(1, globrowoff)%D(i,j) = val
2387  END DO
2388  END DO
2389 
2390  !-------------------------------------------
2391  !Save a copy of this original problem
2392  !-------------------------------------------
2393  orig(globrowoff)%D = lelement(1,globrowoff)%D
2394 
2395  !-------------------------------------------
2396  matdirtied = .true.
2397 
2398 END SUBROUTINE setmatrixrowd
2399 
2400 !-------------------------------------------------------------------------------
2404 !-------------------------------------------------------------------------------
2405 SUBROUTINE setmatrixrowu( globrow, U )
2406  !-----------------------------------------------------------------------------
2407  ! Formal arguments
2408  !-----------------------------------------------------------------------------
2409  INTEGER, INTENT(IN) :: globrow
2410  REAL(dp), INTENT(IN) :: U(:,:)
2411  !-----------------------------------------------------------------------------
2412  ! Local variables
2413  !-----------------------------------------------------------------------------
2414  REAL(dp) :: val
2415  INTEGER :: i, j, globrowoff
2416 
2417  !-------------------------------------------
2418  ! Sanity checks on globrow
2419  !-------------------------------------------
2420  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2421  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowU: Bad input globrow ',globrow; CALL fl(ofu)
2422  CALL assert(.false.,'blocktri #20')
2423  END IF
2424  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2425  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowU: Non-local globrow ',globrow; CALL fl(ofu)
2426  CALL assert(.false.,'blocktri #21')
2427  END IF
2428 
2429  globrowoff = globrow-startglobrow+1
2430 
2431  !-------------------------------------------
2432  ! Copy given U block into allocated matrix
2433  !-------------------------------------------
2434  DO i = 1, m
2435  DO j = 1, m
2436  IF ( globrow .EQ. n ) THEN
2437  val = 0
2438  ELSE
2439  val = u(i,j)
2440  END IF
2441  lelement(1, globrowoff)%U(i,j) = val
2442  END DO
2443  END DO
2444 
2445  !-------------------------------------------
2446  !Save a copy of this original problem
2447  !-------------------------------------------
2448  orig(globrowoff)%U = lelement(1,globrowoff)%U
2449 
2450  !-------------------------------------------
2451  matdirtied = .true.
2452 
2453 END SUBROUTINE setmatrixrowu
2454 
2455 !-------------------------------------------------------------------------------
2459 !-------------------------------------------------------------------------------
2460 SUBROUTINE setmatrixrowcoll( globrow, Lj, j )
2461  !-----------------------------------------------------------------------------
2462  ! Formal arguments
2463  !-----------------------------------------------------------------------------
2464  INTEGER, INTENT(IN) :: globrow
2465  REAL(dp), INTENT(IN) :: lj(:)
2466  INTEGER, INTENT(IN) :: j
2467  !-----------------------------------------------------------------------------
2468  ! Local variables
2469  !-----------------------------------------------------------------------------
2470  REAL(dp) :: val
2471  INTEGER :: i, globrowoff
2472 
2473  !-------------------------------------------
2474  ! Sanity checks on globrow
2475  !-------------------------------------------
2476  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2477  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Bad input globrow ',globrow; CALL fl(ofu)
2478  CALL assert(.false.,'blocktri #22')
2479  END IF
2480  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2481  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Non-local globrow ',globrow; CALL fl(ofu)
2482  CALL assert(.false.,'blocktri #23')
2483  END IF
2484  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2485  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2486  CALL assert(.false.,'blocktri #24')
2487  END IF
2488 
2489  globrowoff = globrow-startglobrow+1
2490 
2491  !-------------------------------------------
2492  ! Copy given L column into allocated matrix
2493  !-------------------------------------------
2494  DO i = 1, m
2495  IF ( globrow .EQ. 1 ) THEN
2496  val = 0
2497  ELSE
2498  val = lj(i)
2499  END IF
2500  lelement(1, globrowoff)%L(i,j) = val
2501  END DO
2502 
2503  !-------------------------------------------
2504  !Save a copy of this original problem
2505  !-------------------------------------------
2506  orig(globrowoff)%L(:,j) = lelement(1,globrowoff)%L(:,j)
2507 
2508  !-------------------------------------------
2509  matdirtied = .true.
2510 
2511 END SUBROUTINE setmatrixrowcoll
2512 
2513 !-------------------------------------------------------------------------------
2517 !-------------------------------------------------------------------------------
2518 SUBROUTINE setmatrixrowcold( globrow, Dj, j )
2519  !-----------------------------------------------------------------------------
2520  ! Formal arguments
2521  !-----------------------------------------------------------------------------
2522  INTEGER, INTENT(IN) :: globrow
2523  REAL(dp), INTENT(IN) :: dj(:)
2524  INTEGER, INTENT(IN) :: j
2525  !-----------------------------------------------------------------------------
2526  ! Local variables
2527  !-----------------------------------------------------------------------------
2528  REAL(dp) :: val
2529  INTEGER :: i, globrowoff
2530 
2531  !-------------------------------------------
2532  ! Sanity checks on globrow
2533  !-------------------------------------------
2534  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2535  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColD: Bad input globrow ',globrow; CALL fl(ofu)
2536  CALL assert(.false.,'blocktri #25')
2537  END IF
2538  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2539  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColD: Non-local globrow ',globrow; CALL fl(ofu)
2540  CALL assert(.false.,'blocktri #26')
2541  END IF
2542  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2543  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColD: Bad j column ',j; CALL fl(ofu)
2544  CALL assert(.false.,'blocktri #27')
2545  END IF
2546 
2547  globrowoff = globrow-startglobrow+1
2548 
2549  !-------------------------------------------
2550  ! Copy given D column into allocated matrix
2551  !-------------------------------------------
2552  DO i = 1, m
2553  val = dj(i)
2554  lelement(1, globrowoff)%D(i,j) = val
2555  END DO
2556 
2557  !-------------------------------------------
2558  !Save a copy of this original problem
2559  !-------------------------------------------
2560  orig(globrowoff)%D(:,j) = lelement(1,globrowoff)%D(:,j)
2561 
2562  !-------------------------------------------
2563  matdirtied = .true.
2564 
2565 END SUBROUTINE setmatrixrowcold
2566 
2567 !-------------------------------------------------------------------------------
2571 !-------------------------------------------------------------------------------
2572 SUBROUTINE setmatrixrowcolu( globrow, Uj, j )
2573  !-----------------------------------------------------------------------------
2574  ! Formal arguments
2575  !-----------------------------------------------------------------------------
2576  INTEGER, INTENT(IN) :: globrow
2577  REAL(dp), INTENT(IN) :: uj(:)
2578  INTEGER, INTENT(IN) :: j
2579  !-----------------------------------------------------------------------------
2580  ! Local variables
2581  !-----------------------------------------------------------------------------
2582  REAL(dp) :: val
2583  INTEGER :: i, globrowoff
2584 
2585  !-------------------------------------------
2586  ! Sanity checks on globrow
2587  !-------------------------------------------
2588  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2589  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColU: Bad input globrow ',globrow; CALL fl(ofu)
2590  CALL assert(.false.,'blocktri #28')
2591  END IF
2592  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2593  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColU: Non-local globrow ',globrow; CALL fl(ofu)
2594  CALL assert(.false.,'blocktri #29')
2595  END IF
2596  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2597  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2598  CALL assert(.false.,'blocktri #30')
2599  END IF
2600 
2601  globrowoff = globrow-startglobrow+1
2602 
2603  !-------------------------------------------
2604  ! Copy given U column into allocated matrix
2605  !-------------------------------------------
2606  DO i = 1, m
2607  IF ( globrow .EQ. n ) THEN
2608  val = 0
2609  ELSE
2610  val = uj(i)
2611  END IF
2612  lelement(1, globrowoff)%U(i,j) = val
2613  END DO
2614 
2615  !-------------------------------------------
2616  !Save a copy of this original problem
2617  !-------------------------------------------
2618  orig(globrowoff)%U(:,j) = lelement(1,globrowoff)%U(:,j)
2619 
2620  !-------------------------------------------
2621  matdirtied = .true.
2622 
2623 END SUBROUTINE setmatrixrowcolu
2624 
2625 !-------------------------------------------------------------------------------
2629 !-------------------------------------------------------------------------------
2630 SUBROUTINE setmatrixrhs( globrow, b )
2631  !-----------------------------------------------------------------------------
2632  ! Formal arguments
2633  !-----------------------------------------------------------------------------
2634  INTEGER, INTENT(IN) :: globrow
2635  REAL(dp), INTENT(IN) :: b(:)
2636  !-----------------------------------------------------------------------------
2637  ! Local variables
2638  !-----------------------------------------------------------------------------
2639  REAL(dp) :: val
2640  INTEGER :: i, globrowoff
2641 
2642  !-------------------------------------------
2643  ! Sanity checks on globrow
2644  !-------------------------------------------
2645  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2646  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRHS: Bad input globrow ',globrow; CALL fl(ofu)
2647  CALL assert(.false.,'blocktri #31')
2648  END IF
2649  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2650  IF(kpdbg) WRITE(ofu,*) 'SetMatrixRHS: Non-local globrow ',globrow; CALL fl(ofu)
2651  CALL assert(.false.,'blocktri #32')
2652  END IF
2653 
2654  globrowoff = globrow-startglobrow+1
2655 
2656  !-------------------------------------------
2657  ! Copy given values into allocated RHS
2658  !-------------------------------------------
2659  DO i = 1, m
2660  val = b(i)
2661  lelement(1, globrowoff)%b(i,1) = val
2662  END DO
2663 
2664  !-------------------------------------------
2665  !Save a copy of this original problem
2666  !-------------------------------------------
2667  orig(globrowoff)%b = lelement(1,globrowoff)%b
2668 
2669  !-------------------------------------------
2670  rhsdirtied = .true.
2671 
2672 END SUBROUTINE setmatrixrhs
2673 
2674 !-------------------------------------------------------------------------------
2678 !-------------------------------------------------------------------------------
2679 SUBROUTINE getmatrixrowcoll( globrow, Lj, j )
2680  !-----------------------------------------------------------------------------
2681  ! Formal arguments
2682  !-----------------------------------------------------------------------------
2683  INTEGER, INTENT(IN) :: globrow
2684  REAL(dp), INTENT(OUT) :: lj(:)
2685  INTEGER, INTENT(IN) :: j
2686  !-----------------------------------------------------------------------------
2687  ! Local variables
2688  !-----------------------------------------------------------------------------
2689  REAL(dp) :: val
2690  INTEGER :: i, globrowoff
2691 
2692  !-------------------------------------------
2693  ! Sanity checks on globrow
2694  !-------------------------------------------
2695  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2696  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad input globrow ',globrow; CALL fl(ofu)
2697  CALL assert(.false.,'blocktri #33')
2698  END IF
2699  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2700  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Non-local globrow ',globrow; CALL fl(ofu)
2701  CALL assert(.false.,'blocktri #34')
2702  END IF
2703  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2704  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2705  CALL assert(.false.,'blocktri #35')
2706  END IF
2707 
2708  globrowoff = globrow-startglobrow+1
2709 
2710  !-------------------------------------------
2711  ! Copy L from matrix
2712  !-------------------------------------------
2713  DO i = 1, m
2714  IF ( globrow .EQ. 1 ) THEN
2715  val = 0
2716  ELSE
2717  val = lelement(1, globrowoff)%L(i,j)
2718  END IF
2719  lj(i) = val
2720  END DO
2721 
2722 END SUBROUTINE getmatrixrowcoll
2723 
2724 !-------------------------------------------------------------------------------
2728 !-------------------------------------------------------------------------------
2729 SUBROUTINE getmatrixrowcold( globrow, Dj, j )
2730  !-----------------------------------------------------------------------------
2731  ! Formal arguments
2732  !-----------------------------------------------------------------------------
2733  INTEGER, INTENT(IN) :: globrow
2734  REAL(dp), INTENT(OUT) :: dj(:)
2735  INTEGER, INTENT(IN) :: j
2736  !-----------------------------------------------------------------------------
2737  ! Local variables
2738  !-----------------------------------------------------------------------------
2739  REAL(dp) :: val
2740  INTEGER :: i, globrowoff
2741 
2742  !-------------------------------------------
2743  ! Sanity checks on globrow
2744  !-------------------------------------------
2745  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2746  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Bad input globrow ',globrow; CALL fl(ofu)
2747  CALL assert(.false.,'blocktri #36')
2748  END IF
2749  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2750  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Non-local globrow ',globrow; CALL fl(ofu)
2751  CALL assert(.false.,'blocktri #37')
2752  END IF
2753  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2754  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColD: Bad j column ',j; CALL fl(ofu)
2755  CALL assert(.false.,'blocktri #38')
2756  END IF
2757 
2758  globrowoff = globrow-startglobrow+1
2759 
2760  !-------------------------------------------
2761  ! Copy D from matrix
2762  !-------------------------------------------
2763  DO i = 1, m
2764  val = lelement(1, globrowoff)%D(i,j)
2765  dj(i) = val
2766  END DO
2767 
2768 END SUBROUTINE getmatrixrowcold
2769 
2770 !-------------------------------------------------------------------------------
2774 !-------------------------------------------------------------------------------
2775 SUBROUTINE getmatrixrowcolu( globrow, Uj, j )
2776  !-----------------------------------------------------------------------------
2777  ! Formal arguments
2778  !-----------------------------------------------------------------------------
2779  INTEGER, INTENT(IN) :: globrow
2780  REAL(dp), INTENT(OUT) :: uj(:)
2781  INTEGER, INTENT(IN) :: j
2782  !-----------------------------------------------------------------------------
2783  ! Local variables
2784  !-----------------------------------------------------------------------------
2785  REAL(dp) :: val
2786  INTEGER :: i, globrowoff
2787 
2788  !-------------------------------------------
2789  ! Sanity checks on globrow
2790  !-------------------------------------------
2791  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2792  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColU: Bad input globrow ',globrow; CALL fl(ofu)
2793  CALL assert(.false.,'blocktri #39')
2794  END IF
2795  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2796  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColU: Non-local globrow ',globrow; CALL fl(ofu)
2797  CALL assert(.false.,'blocktri #40')
2798  END IF
2799  IF ( .NOT. ((1 .LE. j) .AND. (j .LE. m)) ) THEN
2800  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRowColL: Bad j column ',j; CALL fl(ofu)
2801  CALL assert(.false.,'blocktri #41')
2802  END IF
2803 
2804  globrowoff = globrow-startglobrow+1
2805 
2806  !-------------------------------------------
2807  ! Copy U from matrix
2808  !-------------------------------------------
2809  DO i = 1, m
2810  IF ( globrow .EQ. n ) THEN
2811  val = 0
2812  ELSE
2813  val = lelement(1, globrowoff)%U(i,j)
2814  END IF
2815  uj(i) = val
2816  END DO
2817 
2818 END SUBROUTINE getmatrixrowcolu
2819 
2820 !-------------------------------------------------------------------------------
2824 !-------------------------------------------------------------------------------
2825 SUBROUTINE getmatrixrhs( globrow, b )
2826  !-----------------------------------------------------------------------------
2827  ! Formal arguments
2828  !-----------------------------------------------------------------------------
2829  INTEGER, INTENT(IN) :: globrow
2830  REAL(dp), INTENT(OUT) :: b(:)
2831  !-----------------------------------------------------------------------------
2832  ! Local variables
2833  !-----------------------------------------------------------------------------
2834  REAL(dp) :: val
2835  INTEGER :: i, globrowoff
2836 
2837  !-------------------------------------------
2838  ! Sanity checks on globrow
2839  !-------------------------------------------
2840  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2841  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRHS: Bad input globrow ',globrow; CALL fl(ofu)
2842  CALL assert(.false.,'blocktri #42')
2843  END IF
2844  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2845  IF(kpdbg) WRITE(ofu,*) 'GetMatrixRHS: Non-local globrow ',globrow; CALL fl(ofu)
2846  CALL assert(.false.,'blocktri #43')
2847  END IF
2848 
2849  globrowoff = globrow-startglobrow+1
2850 
2851  !-------------------------------------------
2852  ! Copy RHS
2853  !-------------------------------------------
2854  DO i = 1, m
2855  val = lelement(1, globrowoff)%b(i,1)
2856  b(i) = val
2857  END DO
2858 
2859 END SUBROUTINE getmatrixrhs
2860 
2861 !-------------------------------------------------------------------------------
2865 !-------------------------------------------------------------------------------
2866 SUBROUTINE getsolutionvector( globrow, x )
2867  !-----------------------------------------------------------------------------
2868  ! Formal arguments
2869  !-----------------------------------------------------------------------------
2870  INTEGER, INTENT(IN) :: globrow
2871  REAL(dp), INTENT(OUT) :: x(:)
2872 
2873  !-------------------------------------------
2874  ! Sanity checks on globrow
2875  !-------------------------------------------
2876  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
2877  IF(kpdbg) WRITE(ofu,*) 'SetSolutionVector: Bad input globrow ',globrow; CALL fl(ofu)
2878  CALL assert(.false.,'blocktri #44')
2879  END IF
2880  IF ( (globrow .LT. startglobrow) .OR. (globrow .GT. endglobrow) ) THEN
2881  IF(kpdbg) WRITE(ofu,*) 'SetSolutionVector: Non-local globrow ',globrow; CALL fl(ofu)
2882  CALL assert(.false.,'blocktri #45')
2883  END IF
2884 
2885  !-------------------------------------------
2886  ! Copy solution into given vector
2887  !-------------------------------------------
2888  x = selement(globrow)%x
2889 
2890 END SUBROUTINE getsolutionvector
2891 
2892 !-------------------------------------------------------------------------------
2896 !-------------------------------------------------------------------------------
2897 SUBROUTINE setidentitytestcase
2898  !-----------------------------------------------------------------------------
2899  ! Local variables
2900  !-----------------------------------------------------------------------------
2901  INTEGER :: i, j, globrow, globrowoff
2902 
2903  IF(kpdbg) WRITE(ofu,*) '------ Using Identity Test Case ------'; CALL fl(ofu)
2904 
2905  !-----------------------------------------
2906  !Initialize the arrays with identity
2907  !-----------------------------------------
2908  DO globrow = startglobrow, endglobrow, 1
2909  globrowoff = globrow-startglobrow+1
2910  DO i = 1, m
2911  DO j = 1, m
2912  lelement(1, globrowoff)%L(i,j) = zero
2913  IF ( i .EQ. j ) THEN
2914  lelement(1, globrowoff)%D(i,j) = one
2915  ELSE
2916  lelement(1, globrowoff)%D(i,j) = zero
2917  END IF
2918  lelement(1, globrowoff)%U(i,j) = zero
2919  END DO
2920  lelement(1, globrowoff)%b(i,1) = one
2921  END DO
2922  !-------------------------------------------
2923  !Save a copy of this original problem
2924  !-------------------------------------------
2925  orig(globrowoff)%L = lelement(1,globrowoff)%L
2926  orig(globrowoff)%D = lelement(1,globrowoff)%D
2927  orig(globrowoff)%U = lelement(1,globrowoff)%U
2928  orig(globrowoff)%b = lelement(1,globrowoff)%b
2929  END DO
2930 
2931  !-------------------------------------------
2932  matdirtied = .true.
2933  rhsdirtied = .true.
2934 
2935 END SUBROUTINE setidentitytestcase
2936 
2937 !-------------------------------------------------------------------------------
2942 !-------------------------------------------------------------------------------
2943 SUBROUTINE setidentityrhs
2944  !-----------------------------------------------------------------------------
2945  ! Local variables
2946  !-----------------------------------------------------------------------------
2947  INTEGER :: i, j, globrow, globrowoff
2948 
2949  IF(kpdbg) WRITE(ofu,*) '------ Using Identity Test Case RHS ------'; CALL fl(ofu)
2950 
2951  !-----------------------------------------
2952  !Initialize the arrays with identity
2953  !-----------------------------------------
2954  DO globrow = startglobrow, endglobrow, 1
2955  globrowoff = globrow-startglobrow+1
2956  DO i = 1, m
2957  lelement(1, globrowoff)%b(i,1) = one
2958  END DO
2959  !-------------------------------------------
2960  !Save a copy of this original RHS
2961  !-------------------------------------------
2962  orig(globrowoff)%b = lelement(1,globrowoff)%b
2963  END DO
2964 
2965  !-------------------------------------------
2966  rhsdirtied = .true.
2967 
2968 END SUBROUTINE setidentityrhs
2969 
2970 !-------------------------------------------------------------------------------
2974 !-------------------------------------------------------------------------------
2975 SUBROUTINE setrandomtestcase
2976  !-----------------------------------------------------------------------------
2977  ! Local variables
2978  !-----------------------------------------------------------------------------
2979  REAL(dp) :: randval, rmin, rmax
2980  INTEGER :: rngwidth
2981  INTEGER, ALLOCATABLE :: seeds(:)
2982  INTEGER :: i, j, globrow, globrowoff
2983 
2984  IF(kpdbg) WRITE(ofu,*) '------ Using Random Test Case ------'; CALL fl(ofu)
2985 
2986  !-----------------------------------------
2987  !Initialize random number seeds
2988  !-----------------------------------------
2989  CALL random_seed( size=rngwidth)
2990  ALLOCATE( seeds(rngwidth) )
2991  DO i = 1, rngwidth
2992  seeds(i) = i*(rank+100)*p
2993  END DO
2994  CALL random_seed( put=seeds(1:rngwidth) )
2995  DEALLOCATE( seeds )
2996 
2997  !-----------------------------------------
2998  !Write the header information for problem
2999  !-----------------------------------------
3000  IF( writeproblemfile ) THEN !Write the dimensions N, NZ
3001  IF(kpdbg) WRITE(pfu,*) n*m; CALL fl(pfu)
3002  IF(kpdbg) WRITE(pfu,*) (3*n-2)*m*m; CALL fl(pfu)
3003  END IF
3004 
3005  !-----------------------------------------
3006  !Initialize the arrays with random values
3007  !-----------------------------------------
3008  rmin = 0.01
3009  rmax = 1.00
3010  DO globrow = startglobrow, endglobrow, 1
3011  globrowoff = globrow-startglobrow+1
3012  DO i = 1, m
3013  DO j = 1, m
3014  IF ( globrow .EQ. 1 ) THEN
3015  randval = 0
3016  ELSE
3017  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3018  IF( writeproblemfile ) THEN
3019  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow-2)*m+j, randval
3020  END IF
3021  END IF
3022  lelement(1, globrowoff)%L(i,j) = randval
3023 
3024  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3025  lelement(1, globrowoff)%D(i,j) = randval
3026  IF( writeproblemfile ) THEN
3027  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow-1)*m+j, randval
3028  END IF
3029 
3030  IF ( globrow .EQ. n ) THEN
3031  randval = 0
3032  ELSE
3033  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3034  IF( writeproblemfile ) THEN
3035  IF(kpdbg) WRITE(pfu,*) (globrow-1)*m+i, (globrow)*m+j, randval
3036  END IF
3037  END IF
3038  lelement(1, globrowoff)%U(i,j) = randval
3039  END DO
3040  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3041  lelement(1, globrowoff)%b(i,1) = randval
3042  END DO
3043 
3044  !-------------------------------------------
3045  !Save a copy of this original problem
3046  !-------------------------------------------
3047  orig(globrowoff)%L = lelement(1,globrowoff)%L
3048  orig(globrowoff)%D = lelement(1,globrowoff)%D
3049  orig(globrowoff)%U = lelement(1,globrowoff)%U
3050  orig(globrowoff)%b = lelement(1,globrowoff)%b
3051  END DO
3052 
3053  IF (writeproblemfile) THEN
3054  DO globrow = startglobrow, endglobrow, 1
3055  globrowoff = globrow-startglobrow+1
3056  DO i = 1, m
3057  IF(kpdbg) WRITE(pfu,*) lelement(1,globrowoff)%b(i,1); CALL fl(pfu)
3058  END DO
3059  END DO
3060  END IF
3061 
3062  !-------------------------------------------
3063  matdirtied = .true.
3064  rhsdirtied = .true.
3065 
3066 END SUBROUTINE setrandomtestcase
3067 
3068 !-------------------------------------------------------------------------------
3073 !-------------------------------------------------------------------------------
3074 SUBROUTINE setrandomrhs( randseedoff )
3075  !-----------------------------------------------------------------------------
3076  ! Formal arguments
3077  !-----------------------------------------------------------------------------
3078  INTEGER, INTENT(IN) :: randseedoff
3079  !-----------------------------------------------------------------------------
3080  ! Local variables
3081  !-----------------------------------------------------------------------------
3082  REAL(dp) :: randval, rmin, rmax
3083  INTEGER :: rngwidth
3084  INTEGER, ALLOCATABLE :: seeds(:)
3085  INTEGER :: i, j, globrow, globrowoff
3086 
3087  IF(kpdbg) WRITE(ofu,*) '------ Using Random Test Case RHS ------'; CALL fl(ofu)
3088 
3089  !-----------------------------------------
3090  !Initialize random number seeds
3091  !-----------------------------------------
3092  CALL random_seed( size=rngwidth)
3093  ALLOCATE( seeds(rngwidth) )
3094  DO i = 1, rngwidth
3095  seeds(i) = i*(rank+100+34+randseedoff)*p
3096  END DO
3097  CALL random_seed( put=seeds(1:rngwidth) )
3098  DEALLOCATE( seeds )
3099 
3100  !-----------------------------------------
3101  !Initialize the arrays with random values
3102  !-----------------------------------------
3103  rmin = 0.01
3104  rmax = 1.00
3105  DO globrow = startglobrow, endglobrow, 1
3106  globrowoff = globrow-startglobrow+1
3107  DO i = 1, m
3108  CALL random_number(randval); randval = rmin+(rmax-rmin)*randval
3109  lelement(1, globrowoff)%b(i,1) = randval
3110  END DO
3111 
3112  !-------------------------------------------
3113  !Save a copy of this original RHS
3114  !-------------------------------------------
3115  orig(globrowoff)%b = lelement(1,globrowoff)%b
3116  END DO
3117 
3118  !-------------------------------------------
3119  rhsdirtied = .true.
3120 
3121 END SUBROUTINE setrandomrhs
3122 
3123 !-------------------------------------------------------------------------------
3127 !-------------------------------------------------------------------------------
3128 SUBROUTINE finalize( do_mpifinalize )
3129  !-----------------------------------------------------------------------------
3130  ! Formal arguments
3131  !-----------------------------------------------------------------------------
3132  LOGICAL, INTENT(IN) :: do_mpifinalize
3133  !-----------------------------------------------------------------------------
3134  ! Local variables
3135  !-----------------------------------------------------------------------------
3136  INTEGER :: level
3137  INTEGER :: globrow
3138  INTEGER :: mpierr
3139  INTEGER :: tempinvcount, tempmulcount, tempsolcount
3140 
3141  !-----------------------------------------------------------------------------
3142  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3143  IF(kpdbg) WRITE(ofu,*) '------ Finalizing start ------'; CALL fl(ofu)
3144 
3145  !-----------------------------------------------------------------------------
3146  CALL plbfinalize()
3147 
3148  !-----------------------------------------------------------------------------
3149  ! Deallocate mats and hats, if any
3150  !-----------------------------------------------------------------------------
3151  IF ( ALLOCATED(lelement) ) THEN
3152  DO level = lbound(lelement,1), ubound(lelement,1)
3153  DO globrow = lbound(lelement,2), ubound(lelement,2)
3154  IF ( ALLOCATED(lelement(level,globrow)%L) ) THEN
3155  DEALLOCATE( lelement(level, globrow)%L )
3156  END IF
3157  IF ( ALLOCATED(lelement(level,globrow)%D) ) THEN
3158  DEALLOCATE( lelement(level, globrow)%D )
3159  END IF
3160  IF ( ALLOCATED(lelement(level,globrow)%U) ) THEN
3161  DEALLOCATE( lelement(level, globrow)%U )
3162  END IF
3163  IF ( ALLOCATED(lelement(level,globrow)%b) ) THEN
3164  DEALLOCATE( lelement(level, globrow)%b )
3165  END IF
3166  IF ( ALLOCATED(lelement(level,globrow)%pivot) ) THEN
3167  DEALLOCATE( lelement(level, globrow)%pivot )
3168  END IF
3169  END DO
3170  END DO
3171  DEALLOCATE(lelement)
3172  END IF
3173  IF ( ALLOCATED(orig) ) THEN
3174  DO globrow = lbound(orig,1), ubound(orig,1)
3175  IF ( ALLOCATED(orig(globrow)%L) ) THEN
3176  DEALLOCATE( orig(globrow)%L )
3177  END IF
3178  IF ( ALLOCATED(orig(globrow)%D) ) THEN
3179  DEALLOCATE( orig(globrow)%D )
3180  END IF
3181  IF ( ALLOCATED(orig(globrow)%U) ) THEN
3182  DEALLOCATE( orig(globrow)%U )
3183  END IF
3184  IF ( ALLOCATED(orig(globrow)%b) ) THEN
3185  DEALLOCATE( orig(globrow)%b )
3186  END IF
3187  IF ( ALLOCATED(orig(globrow)%pivot) ) THEN
3188  DEALLOCATE( orig(globrow)%pivot )
3189  END IF
3190  END DO
3191  DEALLOCATE(orig)
3192  END IF
3193 
3194  IF ( ALLOCATED(selement) ) THEN
3195  DO globrow = 1, n, 1
3196  IF ( ALLOCATED(selement(globrow)%x) ) THEN
3197  DEALLOCATE( selement(globrow)%x )
3198  END IF
3199  END DO
3200  DEALLOCATE(selement)
3201  END IF
3202 
3203  !-----------------------------------------------------------------------------
3204 #if defined(MPI_OPT)
3205  IF ( usebarriers ) THEN
3206  IF(kpdbg) WRITE(ofu,*) 'Barrier in finalize'; CALL fl(ofu)
3207  CALL mpi_barrier( siesta_comm, mpierr )
3208  IF(kpdbg) WRITE(ofu,*) 'Done barrier in finalize'; CALL fl(ofu)
3209  END IF
3210  !-----------------------------------------------------------------------------
3211  IF ( do_mpifinalize ) THEN
3212  CALL mpi_finalize( mpierr )
3213  use_mpiwtime = .false.
3214  END IF
3215 #endif
3216 
3217  !-----------------------------------------------------------------------------
3218  tempmulcount=totmatmulcount; IF ( tempmulcount .LE. 0 ) tempmulcount = 1
3219  tempinvcount=totinvcount; IF ( tempinvcount .LE. 0 ) tempinvcount = 1
3220  tempsolcount=totmatsolcount; IF ( tempsolcount .LE. 0 ) tempsolcount = 1
3221  IF(kpdbg) WRITE(ofu,*) 'N=', n, ' M=', m, ' P=', p, ' rank=', rank
3222  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory ', membytes/1e6, ' MB'
3223  IF(kpdbg) WRITE(ofu,'(A,F8.4,A)') 'Computation ', tottime-totcommtime,' sec'
3224  IF(kpdbg) WRITE(ofu,'(A,F8.4,A)') 'Communication ', totcommtime,' sec'
3225  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix inv ',totinvcount, ' * ', &
3226  totinvtime/tempinvcount,' sec = ', totinvtime, ' sec'
3227  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix mul ',totmatmulcount, ' * ', &
3228  totmatmultime/tempmulcount, ' sec = ', totmatmultime, ' sec'
3229  IF(kpdbg) WRITE(ofu,'(A,I5.1,A,F8.4,A,F8.4,A)') 'Matrix sol ',totmatsolcount, ' * ', &
3230  totmatsoltime/tempsolcount, ' sec = ', totmatsoltime, ' sec'
3231  IF(kpdbg) WRITE(ofu,*) 'Finalized rank ', rank
3232  IF(kpdbg) WRITE(ofu,*) '------ Finalization end ------'; CALL fl(ofu)
3233 END SUBROUTINE finalize
3234 
3235 !-------------------------------------------------------------------------------
3239 !-------------------------------------------------------------------------------
3240 LOGICAL FUNCTION iseven( num )
3241  INTEGER, INTENT(IN) :: num
3242  iseven = mod(num,2) .EQ. 0
3243 END FUNCTION iseven
3244 
3245 !-------------------------------------------------------------------------------
3249 !-------------------------------------------------------------------------------
3250 LOGICAL FUNCTION isodd( num )
3251  INTEGER, INTENT(IN) :: num
3252  isodd = (.NOT. iseven(num))
3253 END FUNCTION isodd
3254 
3255 !-------------------------------------------------------------------------------
3259 !-------------------------------------------------------------------------------
3260 FUNCTION lr2gr( locrow, level ) result ( globrow )
3261  !-----------------------------------------------------------------------------
3262  ! Formal arguments
3263  !-----------------------------------------------------------------------------
3264  INTEGER, INTENT(IN) :: locrow
3265  INTEGER, INTENT(IN) :: level
3266  INTEGER :: globrow
3267 
3268  !-----------------------------------------------------------------------------
3269  ! Local variables
3270  !-----------------------------------------------------------------------------
3271  INTEGER :: i
3272 
3273  !-----------------------------------------------------------------------------
3274  ! Invert the rule r=(r+1)/2 backward for all levels
3275  !-----------------------------------------------------------------------------
3276  globrow = locrow
3277  levelloop: DO i = level-1, 1, -1
3278  globrow = 2*globrow - 1
3279  END DO levelloop
3280 
3281  !-----------------------------------------------------------------------------
3282  ! DEBUGGING: Just double-check
3283  !-----------------------------------------------------------------------------
3284  IF ( ((2.0 ** (level-1)) * (locrow-1) + 1) .NE. globrow ) THEN
3285  CALL assert(.false.,'blocktri #46') !Consistency check failed
3286  END IF
3287 
3288  RETURN
3289 END FUNCTION lr2gr
3290 
3291 !-------------------------------------------------------------------------------
3297 !-------------------------------------------------------------------------------
3298 FUNCTION gr2lr( globrow, level ) result ( locrow )
3299  !-----------------------------------------------------------------------------
3300  ! Formal arguments
3301  !-----------------------------------------------------------------------------
3302  INTEGER, INTENT(IN) :: globrow
3303  INTEGER, INTENT(IN) :: level
3304  INTEGER :: locrow
3305 
3306  !-----------------------------------------------------------------------------
3307  ! Local variables
3308  !-----------------------------------------------------------------------------
3309  INTEGER :: i
3310 
3311  !-----------------------------------------------------------------------------
3312  ! Apply the rule r=(r+1)/2 for all odd-numbered rows
3313  ! Any time r becomes even, give up
3314  !-----------------------------------------------------------------------------
3315  locrow = globrow
3316  levelloop: DO i = 1, level-1, 1
3317  IF ( iseven(locrow) ) THEN !Even-numbered rows don't go any further in level
3318  locrow = 0
3319  EXIT levelloop !Stop looping
3320  END IF
3321  locrow = (locrow+1) / 2
3322  END DO levelloop
3323 
3324  RETURN
3325 END FUNCTION gr2lr
3326 
3327 !-------------------------------------------------------------------------------
3331 !-------------------------------------------------------------------------------
3332 FUNCTION gr2rank( globrow ) result ( outrank )
3333  !-----------------------------------------------------------------------------
3334  ! Formal arguments
3335  !-----------------------------------------------------------------------------
3336  INTEGER, INTENT(IN) :: globrow
3337  INTEGER :: outrank
3338 
3339  !-----------------------------------------------------------------------------
3340  ! Local variables
3341  !-----------------------------------------------------------------------------
3342  INTEGER :: m !Average integral number of rows per processor
3343  INTEGER :: spill !Spill over beyond prefect loading of rows to processors
3344 
3345  IF ( (globrow .LT. 1) .OR. (globrow .GT. n) ) THEN
3346  outrank = -1
3347  ELSE
3348  m = n / p
3349  spill = mod( n, p )
3350  IF ( globrow .LE. (m+1)*spill ) THEN
3351  outrank = (globrow-1)/(m+1)
3352  ELSE
3353  outrank = (globrow-1 - (m+1)*spill)/m + spill
3354  END IF
3355  END IF
3356 
3357  RETURN
3358 END FUNCTION gr2rank
3359 
3360 !-------------------------------------------------------------------------------
3364 !-------------------------------------------------------------------------------
3365 FUNCTION lr2rank( locrow, level ) result ( outrank )
3366  !-----------------------------------------------------------------------------
3367  ! Formal arguments
3368  !-----------------------------------------------------------------------------
3369  INTEGER, INTENT(IN) :: locrow
3370  INTEGER, INTENT(IN) :: level
3371  INTEGER :: outrank
3372 
3373  !-----------------------------------------------------------------------------
3374  ! Local variables
3375  !-----------------------------------------------------------------------------
3376  INTEGER :: globrow !Global row number of given locrow
3377 
3378  globrow = lr2gr( locrow, level )
3379  outrank = gr2rank( globrow )
3380 
3381  RETURN
3382 END FUNCTION lr2rank
3383 
3384 !-------------------------------------------------------------------------------
3390 !-------------------------------------------------------------------------------
3391 #if defined(MPI_OPT)
3392 SUBROUTINE computeforwardoddrowhats(locrow, level, startlocrow, endlocrow,bonly)
3393  !-----------------------------------------------------------------------------
3394  ! Formal arguments
3395  !-----------------------------------------------------------------------------
3396  INTEGER, INTENT(IN) :: locrow
3397  INTEGER, INTENT(IN) :: level
3398  INTEGER, INTENT(IN) :: startlocrow
3399  INTEGER, INTENT(IN) :: endlocrow
3400  LOGICAL, INTENT(IN) :: bonly
3401  !-----------------------------------------------------------------------------
3402  ! Local variables
3403  !-----------------------------------------------------------------------------
3404  INTEGER :: globrow
3405  INTEGER :: globrowoff
3406  INTEGER :: prevglobrowoff
3407  INTEGER :: nextglobrowoff
3408 
3409  !-------------------------------------------------------------
3410  ! Nothing to do if last level
3411  !-------------------------------------------------------------
3412  IF ( level .GE. nlevels ) THEN
3413  IF(kpdbg) WRITE(ofu,*) 'mat-mul last lvl: no op '; CALL fl(ofu)
3414  IF ( rank .NE. 0 ) THEN
3415  IF(kpdbg) WRITE(ofu,*) 'mat-mul last lvl: impossible ',rank; CALL fl(ofu)
3416  CALL assert(.false.,'blocktri #47')
3417  END IF
3418  RETURN
3419  END IF
3420 
3421  !-------------------------------------------------------------
3422  ! Sanity check
3423  !-------------------------------------------------------------
3424  IF ( iseven( locrow ) ) THEN
3425  IF(kpdbg) WRITE(ofu,*) 'mat-mul: odd only ',globrow,' ',locrow; CALL fl(ofu); CALL assert(.false.,'blocktri #48')
3426  END IF
3427  IF ( .NOT. ( (startlocrow .LE. locrow) .AND. (locrow .LE. endlocrow) ) ) THEN
3428  IF(kpdbg) WRITE(ofu,*) 'mat-mul: locrow prob ',globrow,' ',locrow;CALL fl(ofu);CALL assert(.false.,'blocktri #49')
3429  END IF
3430 
3431  !-------------------------------------------------------------
3432  globrow = lr2gr( locrow, level )
3433  globrowoff = globrow - startglobrow + 1
3434  IF(kpdbg) WRITE(ofu,*) ' Compute mat-mul ',globrow,' ',locrow; CALL fl(ofu)
3435 
3436  !-------------------------------------------------------------
3437  ! Ensure there are mats at (thislevel, thisrow)
3438  !-------------------------------------------------------------
3439  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D ) ) THEN
3440  IF(kpdbg) WRITE(ofu,*) ' Copying bad D'; CALL fl(ofu); CALL assert(.false.,'blocktri #50')
3441  END IF
3442  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L ) ) THEN
3443  IF(kpdbg) WRITE(ofu,*) ' Copying bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #51')
3444  END IF
3445  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U ) ) THEN
3446  IF(kpdbg) WRITE(ofu,*) ' Copying bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #52')
3447  END IF
3448  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b ) ) THEN
3449  IF(kpdbg) WRITE(ofu,*) ' Copying bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #53')
3450  END IF
3451 
3452  !-------------------------------------------------------------
3453  ! Ensure there is space at (nextlevel, thisrow)
3454  !-------------------------------------------------------------
3455  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%D ) ) THEN
3456  IF(kpdbg) WRITE(ofu,*) ' Copying bad D +1'; CALL fl(ofu); CALL assert(.false.,'blocktri #54')
3457  END IF
3458  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%L ) ) THEN
3459  IF(kpdbg) WRITE(ofu,*) ' Copying bad L +1'; CALL fl(ofu); CALL assert(.false.,'blocktri #55')
3460  END IF
3461  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%U ) ) THEN
3462  IF(kpdbg) WRITE(ofu,*) ' Copying bad U +1'; CALL fl(ofu); CALL assert(.false.,'blocktri #56')
3463  END IF
3464  IF ( .NOT. ALLOCATED(lelement(level+1,globrowoff)%b ) ) THEN
3465  IF(kpdbg) WRITE(ofu,*) ' Copying bad b +1'; CALL fl(ofu); CALL assert(.false.,'blocktri #57')
3466  END IF
3467 
3468  !-------------------------------------------------------------
3469  ! Copy over the D and b as is first
3470  !-------------------------------------------------------------
3471  IF ( .NOT. bonly ) THEN
3472  lelement(level+1,globrowoff)%D = lelement(level,globrowoff)%D
3473  END IF
3474  lelement(level+1,globrowoff)%b = lelement(level,globrowoff)%b
3475 
3476  !-------------------------------------------------------------
3477  ! Check if this row has a top neighbor at this level
3478  !-------------------------------------------------------------
3479  IF ( lr2rank(locrow-1,level) .LT. 0 ) THEN
3480  ! No rank before ours
3481  IF ( .NOT. bonly ) THEN
3482  lelement(level+1,globrowoff)%U = 0
3483  IF(kpdbg) WRITE(ofu,*) ' ZERO L'; CALL fl(ofu)
3484  END IF
3485  ELSE
3486  IF ( locrow .EQ. startlocrow ) THEN
3487  prevglobrowoff = 0
3488  ELSE IF ( locrow .GT. startlocrow ) THEN
3489  prevglobrowoff = lr2gr( locrow-1, level ) - startglobrow + 1
3490  ELSE
3491  IF(kpdbg) WRITE(ofu,*) 'mat-mul: impossible prev'; CALL fl(ofu); CALL assert(.false.,'blocktri #58')
3492  END IF
3493 
3494  IF ( .NOT. bonly ) THEN
3495  !------------------------------------------------
3496  ! D = D - L*Uhat(prev)
3497  !------------------------------------------------
3498  CALL bsystemclock(mattimer1)
3499  CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3500  lelement(level,prevglobrowoff)%U, &
3501  one, lelement(level+1,globrowoff)%D )
3502  CALL bsystemclock(mattimer2)
3503  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3504  IF(kpdbg) WRITE(ofu,*) ' Dtop'; CALL fl(ofu)
3505 
3506  !------------------------------------------------
3507  ! L = -L*Lhat(prev)
3508  !------------------------------------------------
3509  CALL bsystemclock(mattimer1)
3510  CALL plbdgemm( -one, lelement(level,globrowoff)%L, &
3511  lelement(level,prevglobrowoff)%L, &
3512  zero, lelement(level+1,globrowoff)%L )
3513  CALL bsystemclock(mattimer2)
3514  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3515  IF(kpdbg) WRITE(ofu,*) ' L'; CALL fl(ofu)
3516  END IF
3517 
3518  !------------------------------------------------
3519  ! b = b - L*bhat(prev)
3520  !------------------------------------------------
3521  CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
3522  lelement(level,prevglobrowoff)%b(:,1), &
3523  one, lelement(level+1,globrowoff)%b(:,1) )
3524  IF(kpdbg) WRITE(ofu,*) ' btop'; CALL fl(ofu)
3525  END IF
3526 
3527  !-------------------------------------------------------------
3528  ! Check if this row has a bottom neighbor at this level
3529  !-------------------------------------------------------------
3530  IF ( lr2rank(locrow+1,level) .LT. 0 ) THEN
3531  ! No rank after ours
3532  IF ( .NOT. bonly ) THEN
3533  lelement(level+1,globrowoff)%U = 0
3534  IF(kpdbg) WRITE(ofu,*) ' ZERO U'; CALL fl(ofu)
3535  END IF
3536  ELSE
3537  IF ( locrow .EQ. endlocrow ) THEN
3538  nextglobrowoff = endglobrow - startglobrow + 2
3539  ELSE IF ( locrow .LT. endlocrow ) THEN
3540  nextglobrowoff = lr2gr( locrow+1, level ) - startglobrow + 1
3541  ELSE
3542  IF(kpdbg) WRITE(ofu,*) 'mat-mul: impossible next'; CALL fl(ofu); CALL assert(.false.,'blocktri #59')
3543  END IF
3544 
3545  IF ( .NOT. bonly ) THEN
3546  !------------------------------------------------
3547  ! D = D - U*Lhat(next)
3548  !------------------------------------------------
3549  CALL bsystemclock(mattimer1)
3550  CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3551  lelement(level,nextglobrowoff)%L, &
3552  one, lelement(level+1,globrowoff)%D )
3553  CALL bsystemclock(mattimer2)
3554  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3555  IF(kpdbg) WRITE(ofu,*) ' Dbot'; CALL fl(ofu)
3556 
3557  !------------------------------------------------
3558  ! U = -U*Uhat(next)
3559  !------------------------------------------------
3560  CALL bsystemclock(mattimer1)
3561  CALL plbdgemm( -one, lelement(level,globrowoff)%U, &
3562  lelement(level,nextglobrowoff)%U, &
3563  zero, lelement(level+1,globrowoff)%U )
3564  CALL bsystemclock(mattimer2)
3565  CALL chargetime( totmatmultime, mattimer2, mattimer1, totmatmulcount )
3566  IF(kpdbg) WRITE(ofu,*) ' U'; CALL fl(ofu)
3567  END IF
3568 
3569  !------------------------------------------------
3570  ! b = b - U*bhat(next)
3571  !------------------------------------------------
3572  CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
3573  lelement(level,nextglobrowoff)%b(:,1), &
3574  one, lelement(level+1,globrowoff)%b(:,1) )
3575  IF(kpdbg) WRITE(ofu,*) ' bbot'; CALL fl(ofu)
3576  END IF
3577 END SUBROUTINE computeforwardoddrowhats
3578 
3579 !-------------------------------------------------------------------------------
3583 !-------------------------------------------------------------------------------
3584 SUBROUTINE forwardsolve
3585  !-----------------------------------------------------------------------------
3586  ! Local variables
3587  !-----------------------------------------------------------------------------
3588  INTEGER :: level
3589  INTEGER :: locrow
3590  INTEGER :: globrow
3591  INTEGER :: globrowoff
3592  INTEGER :: startlocrow
3593  INTEGER :: endlocrow
3594  INTEGER :: rowoffset
3595  INTEGER :: nbrrank
3596  INTEGER :: nbrmpireqindx
3597  INTEGER :: nbrmpireqcnt
3598  INTEGER :: nbrmpinirecvs
3599  INTEGER :: nbrmpinisends
3600  INTEGER :: nbrmpireq(6)
3601  INTEGER :: nbrmpierr(6)
3602  INTEGER :: mpiwaiterr
3603  INTEGER :: mpierr
3604  INTEGER :: nbrmpistatuses(mpi_status_size,6)
3605  INTEGER :: nbrmpistatus(mpi_status_size)
3606  INTEGER :: waitmatchindex
3607  INTEGER :: stag
3608  INTEGER :: reqi
3609  INTEGER :: blaserr
3610 
3611  CALL bsystemclock(globtimer1)
3612 
3613  !-----------------------------------------------------------------------------
3614  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3615  IF(kpdbg) WRITE(ofu,*) '------ Forward solve start ------'; CALL fl(ofu)
3616  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
3617 
3618  !-----------------------------------------------------------------------------
3619  CALL plbforwardinitialize()
3620 
3621  !-----------------------------------------------------------------------------
3622  ! Forward solving loop in which even rows compute inverses and odd rows
3623  ! compute matrix-matrix products for new terms
3624  !-----------------------------------------------------------------------------
3625  forwardsolveloop: DO level = 1, nlevels, 1
3626  !---------------------------------------------------------------------------
3627  IF ( usebarriers ) THEN
3628  IF(kpdbg) WRITE(ofu,*) 'Barrier at forward level ', level; CALL fl(ofu)
3629  CALL mpi_barrier( siesta_comm, mpierr )
3630  IF(kpdbg) WRITE(ofu,*) 'Done barrier at forward level ', level; CALL fl(ofu)
3631  END IF
3632 
3633  !---------------------------------------------------------------------------
3634  !Determine start and end local rows at current level
3635  !---------------------------------------------------------------------------
3636  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
3637  endlocrow = -1 !Start with an invalid value (less than startlocrow)
3638  DO globrow = startglobrow, endglobrow, 1
3639  locrow = gr2lr( globrow, level ) !globrow's place at current level
3640  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
3641  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
3642  startlocrow = locrow
3643  END IF
3644  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
3645  endlocrow = locrow
3646  END IF
3647  END IF
3648  END DO
3649 
3650  !---------------------------------------------------------------------------
3651  !We may have run out of work; see if we have a valid range of rows left
3652  !---------------------------------------------------------------------------
3653  IF ( startlocrow .GT. endlocrow ) THEN !No rows left at/above this level
3654  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at forward level ', level; CALL fl(ofu)
3655  CALL plbforwardinitializelevel( level, .false. )
3656  CALL plbforwardfinalizelevel( level, .false. )
3657  cycle forwardsolveloop !Move on to next level; don't EXIT (for barriers)
3658  ELSE
3659  CALL plbforwardinitializelevel( level, .true. )
3660  END IF
3661 
3662  IF(kpdbg) WRITE(ofu,*) '**** Forward level ', level, ' ****'; CALL fl(ofu)
3663 
3664  !---------------------------------------------------------------------------
3665  !Allocate memory for incoming nbr values, if any, at this level
3666  !---------------------------------------------------------------------------
3667  IF ( isodd(startlocrow) .AND. &
3668  (lr2rank(startlocrow-1,level) .GE. 0) ) THEN
3669  globrowoff = 0
3670  IF ( .NOT. ALLOCATED( lelement(level, globrowoff)%L ) ) THEN
3671  ALLOCATE( lelement(level, globrowoff)%L(m,m) )
3672  ALLOCATE( lelement(level, globrowoff)%U(m,m) )
3673  ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3674  CALL chargememory( (2*m*m+m)*dpsz )
3675  END IF
3676  lelement(level, globrowoff)%L = 0
3677  lelement(level, globrowoff)%U = 0
3678  lelement(level, globrowoff)%b = 0
3679  END IF
3680  IF ( isodd(endlocrow) .AND. &
3681  (lr2rank(endlocrow+1,level) .GE. 0) ) THEN
3682  globrowoff = endglobrow-startglobrow+2
3683  IF ( .NOT. ALLOCATED( lelement(level, globrowoff)%L ) ) THEN
3684  ALLOCATE( lelement(level, globrowoff)%L(m,m) )
3685  ALLOCATE( lelement(level, globrowoff)%U(m,m) )
3686  ALLOCATE( lelement(level, globrowoff)%b(m,1) )
3687  CALL chargememory( (2*m*m+m)*dpsz )
3688  END IF
3689  lelement(level, globrowoff)%L = 0
3690  lelement(level, globrowoff)%U = 0
3691  lelement(level, globrowoff)%b = 0
3692  END IF
3693 
3694  !---------------------------------------------------------------------------
3695  !Allocate memory at next level for those rows that are odd at current level
3696  !---------------------------------------------------------------------------
3697  IF ( level .LT. nlevels ) THEN
3698  DO locrow = startlocrow, endlocrow, 1
3699  IF ( isodd(locrow) ) THEN
3700  globrow = lr2gr( locrow, level )
3701  globrowoff = globrow-startglobrow+1
3702  IF ( .NOT. ALLOCATED( lelement(level+1, globrowoff)%L ) ) THEN
3703  ALLOCATE( lelement(level+1, globrowoff)%L(m,m) )
3704  ALLOCATE( lelement(level+1, globrowoff)%D(m,m) )
3705  ALLOCATE( lelement(level+1, globrowoff)%U(m,m) )
3706  ALLOCATE( lelement(level+1, globrowoff)%b(m,1) )
3707  CALL chargememory( (2*m*m+m)*dpsz )
3708  END IF
3709  lelement(level+1, globrowoff)%L = 0
3710  lelement(level+1, globrowoff)%D = 0
3711  lelement(level+1, globrowoff)%U = 0
3712  lelement(level+1, globrowoff)%b = 0
3713  END IF
3714  END DO
3715  END IF
3716 
3717  !---------------------------------------------------------------------------
3718  !Reset requests
3719  !---------------------------------------------------------------------------
3720  DO nbrmpireqindx = 1, 6
3721  nbrmpireq(nbrmpireqindx) = mpi_request_null
3722  END DO
3723 
3724  !---------------------------------------------------------------------------
3725  !Pre-post the expected MPI receives via a non-blocking recv primitive
3726  !Pre-posting can help overlap communication with computing inverses
3727  !---------------------------------------------------------------------------
3728  nbrmpireqindx = 1
3729  nbrmpinirecvs = 0
3730  nbrrank = lr2rank(startlocrow-1,level)
3731  IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
3732  !-------------------------------------------------------------------------
3733  !Our top row at this level is odd and top row's previous is valid
3734  !We will get the previous (even-numbered) row's L-hat, U-hat, and b-hat
3735  !-------------------------------------------------------------------------
3736  globrowoff = 0
3737  IF(kpdbg) WRITE(ofu,*) ' Irecv ',startlocrow-1,' ',nbrrank,' '; CALL fl(ofu)
3738 
3739  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3740  IF(kpdbg) WRITE(ofu,*)' Irecv bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #60')
3741  END IF
3742  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3743  IF(kpdbg) WRITE(ofu,*)' Irecv bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #61')
3744  END IF
3745  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3746  IF(kpdbg) WRITE(ofu,*)' Irecv bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #62')
3747  END IF
3748 
3749  CALL bsystemclock(loctimer1)
3750  CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3751  nbrrank, 1, siesta_comm, nbrmpireq(nbrmpireqindx), &
3752  nbrmpierr(nbrmpireqindx) )
3753  nbrmpireqindx = nbrmpireqindx + 1
3754  nbrmpinirecvs = nbrmpinirecvs + 1
3755  IF(kpdbg) WRITE(ofu,*) ' Irecv 1 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3756 
3757  CALL mpi_irecv( lelement(level, globrowoff)%U, m*m, mpi_real8, &
3758  nbrrank, 2, siesta_comm, nbrmpireq(nbrmpireqindx), &
3759  nbrmpierr(nbrmpireqindx))
3760  nbrmpireqindx = nbrmpireqindx + 1
3761  nbrmpinirecvs = nbrmpinirecvs + 1
3762  IF(kpdbg) WRITE(ofu,*) ' Irecv 2 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3763 
3764  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3765  nbrrank, 3, siesta_comm, nbrmpireq(nbrmpireqindx), &
3766  nbrmpierr(nbrmpireqindx))
3767  nbrmpireqindx = nbrmpireqindx + 1
3768  nbrmpinirecvs = nbrmpinirecvs + 1
3769  IF(kpdbg) WRITE(ofu,*) ' Irecv 3 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3770  CALL bsystemclock(loctimer2)
3771  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3772  END IF
3773 
3774  nbrrank = lr2rank(endlocrow+1,level)
3775  IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
3776  !-------------------------------------------------------------------------
3777  !Our bottom row at this level is odd and bottom row's next is valid
3778  !We will get the next (even-numbered) row's L-hat, U-hat, and b-hat
3779  !-------------------------------------------------------------------------
3780  globrowoff = endglobrow-startglobrow+2
3781  IF(kpdbg) WRITE(ofu,*) ' Irecv ',endlocrow+1,' ', nbrrank; CALL fl(ofu)
3782 
3783  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3784  IF(kpdbg) WRITE(ofu,*)'Irecv bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #63')
3785  END IF
3786  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3787  IF(kpdbg) WRITE(ofu,*)'Irecv bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #64')
3788  END IF
3789  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3790  IF(kpdbg) WRITE(ofu,*)'Irecv bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #65')
3791  END IF
3792 
3793  CALL bsystemclock(loctimer1)
3794  CALL mpi_irecv( lelement(level, globrowoff)%L, m*m, mpi_real8, &
3795  nbrrank, 4, siesta_comm, nbrmpireq(nbrmpireqindx), &
3796  nbrmpierr(nbrmpireqindx))
3797  nbrmpireqindx = nbrmpireqindx + 1
3798  nbrmpinirecvs = nbrmpinirecvs + 1
3799  IF(kpdbg) WRITE(ofu,*) ' Irecv 4 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3800 
3801  CALL mpi_irecv( lelement(level, globrowoff)%U, m*m, mpi_real8, &
3802  nbrrank, 5, siesta_comm, nbrmpireq(nbrmpireqindx), &
3803  nbrmpierr(nbrmpireqindx))
3804  nbrmpireqindx = nbrmpireqindx + 1
3805  nbrmpinirecvs = nbrmpinirecvs + 1
3806  IF(kpdbg) WRITE(ofu,*) ' Irecv 5 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3807 
3808  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
3809  nbrrank, 6, siesta_comm, nbrmpireq(nbrmpireqindx), &
3810  nbrmpierr(nbrmpireqindx))
3811  nbrmpireqindx = nbrmpireqindx + 1
3812  nbrmpinirecvs = nbrmpinirecvs + 1
3813  IF(kpdbg) WRITE(ofu,*) ' Irecv 6 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
3814  CALL bsystemclock(loctimer2)
3815  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3816  END IF
3817 
3818  !---------------------------------------------------------------------------
3819  !Compute inverses for even-numbered rows at this level
3820  !---------------------------------------------------------------------------
3821  nbrmpinisends = 0
3822  DO locrow = startlocrow, endlocrow, 1
3823  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
3824  globrow = lr2gr( locrow, level )
3825  globrowoff = globrow-startglobrow+1
3826 
3827  IF(kpdbg) WRITE(ofu,*) ' Compute even hats ',globrow,' ', locrow; CALL fl(ofu)
3828 
3829  !-----------------------------------------------------------------------
3830  !Sanity checks
3831  !-----------------------------------------------------------------------
3832  IF ( level .EQ. nlevels ) THEN
3833  IF ( (rank .NE. 0) .OR. &
3834  (startlocrow .NE. endlocrow) .OR. &
3835  (locrow .NE. 1) .OR. (globrow .NE. 1) ) THEN
3836  IF(kpdbg) WRITE(ofu,*) ' EVEN ERROR ',globrow,' ', locrow; CALL fl(ofu)
3837  CALL assert(.false.,'blocktri #66')
3838  END IF
3839  END IF
3840  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D) ) THEN
3841  IF(kpdbg) WRITE(ofu,*)'Chats bad D'; CALL fl(ofu); CALL assert(.false.,'blocktri #67')
3842  END IF
3843  IF ( .NOT. ALLOCATED(lelement(1,globrowoff)%pivot) ) THEN
3844  IF(kpdbg) WRITE(ofu,*)'Chats bad pivot'; CALL fl(ofu); CALL assert(.false.,'blocktri #68')
3845  END IF
3846  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3847  IF(kpdbg) WRITE(ofu,*)'Chats bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #69')
3848  END IF
3849  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3850  IF(kpdbg) WRITE(ofu,*)'Chats bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #70')
3851  END IF
3852  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3853  IF(kpdbg) WRITE(ofu,*)'Chats bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #71')
3854  END IF
3855 
3856  !-----------------------------------------------------------------------
3857  !Do LU factorization of D (in place into D itself)
3858  !-----------------------------------------------------------------------
3859  CALL bsystemclock(mattimer1)
3860  CALL plbdgetrf( lelement(level,globrowoff)%D, &
3861  lelement(1,globrowoff)%pivot, blaserr )
3862  CALL bsystemclock(mattimer2)
3863  CALL chargetime( totinvtime, mattimer2, mattimer1, totinvcount )
3864  IF (blaserr .NE. 0) THEN
3865  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRF failed ', blaserr; CALL fl(ofu); CALL assert(.false.,'blocktri #72')
3866  END IF
3867 
3868  !-----------------------------------------------------------------------
3869  !Compute hats D^(-1)L and D^(-1)U if not last level
3870  !-----------------------------------------------------------------------
3871  IF ( level .LT. nlevels ) THEN
3872  CALL bsystemclock(mattimer1)
3873  CALL plbdgetrs( m, lelement(level,globrowoff)%D, &
3874  lelement(1,globrowoff)%pivot, &
3875  lelement(level,globrowoff)%L, blaserr )
3876  CALL bsystemclock(mattimer2)
3877  CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3878  IF (blaserr .NE. 0) THEN
3879  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRS L failed'; CALL fl(ofu); CALL assert(.false.,'blocktri #73')
3880  END IF
3881  CALL bsystemclock(mattimer1)
3882  CALL plbdgetrs( m, lelement(level,globrowoff)%D, &
3883  lelement(1,globrowoff)%pivot, &
3884  lelement(level,globrowoff)%U, blaserr )
3885  CALL bsystemclock(mattimer2)
3886  CALL chargetime( totmatsoltime, mattimer2, mattimer1, totmatsolcount )
3887  IF (blaserr .NE. 0) THEN
3888  IF(kpdbg) WRITE(ofu,*) ' PLBDGETRS U failed'; CALL fl(ofu); CALL assert(.false.,'blocktri #74')
3889  END IF
3890  END IF
3891 
3892  !-----------------------------------------------------------------------
3893  !Compute b-hats D^(-1)b
3894  !-----------------------------------------------------------------------
3895  CALL plbdgetrs( 1, lelement(level,globrowoff)%D, &
3896  lelement(1,globrowoff)%pivot, &
3897  lelement(level,globrowoff)%b, blaserr )
3898  IF (blaserr .NE. 0) THEN
3899  IF(kpdbg) WRITE(ofu,*) 'PLBDGETRS b failed'; CALL fl(ofu); CALL assert(.false.,'blocktri #75')
3900  END IF
3901 
3902  !-----------------------------------------------------------------------
3903  !Send my Lhats, Uhats and bhats to neighbor row if that neighbor exists
3904  !and that neighbor row is hosted outside this rank
3905  !-----------------------------------------------------------------------
3906  DO rowoffset = -1, 1, 2
3907  nbrrank = lr2rank(locrow+rowoffset, level)
3908  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
3909  CALL bsystemclock(loctimer1)
3910  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
3911  IF(kpdbg) WRITE(ofu,*)'ISend bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #76')
3912  END IF
3913  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
3914  IF(kpdbg) WRITE(ofu,*)'ISend bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #77')
3915  END IF
3916  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
3917  IF(kpdbg) WRITE(ofu,*)'ISend bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #78')
3918  END IF
3919 
3920  IF(kpdbg) WRITE(ofu,*)' ISend ',globrow,' ',locrow,' ',nbrrank;CALL fl(ofu)
3921  globrowoff = globrow-startglobrow+1
3922  stag = (((1-rowoffset))/2)*3
3923  CALL mpi_isend( lelement(level,globrowoff)%L, m*m, mpi_real8, &
3924  nbrrank, stag+1, siesta_comm, &
3925  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3926  nbrmpireqindx = nbrmpireqindx + 1
3927  nbrmpinisends = nbrmpinisends + 1
3928  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+1,' ',globrowoff; CALL fl(ofu)
3929 
3930  CALL mpi_isend( lelement(level,globrowoff)%U, m*m, mpi_real8, &
3931  nbrrank, stag+2, siesta_comm, &
3932  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3933  nbrmpireqindx = nbrmpireqindx + 1
3934  nbrmpinisends = nbrmpinisends + 1
3935  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+2,' ',globrowoff; CALL fl(ofu)
3936 
3937  CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
3938  nbrrank, stag+3, siesta_comm, &
3939  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
3940  nbrmpireqindx = nbrmpireqindx + 1
3941  nbrmpinisends = nbrmpinisends + 1
3942  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+3,' ',globrowoff; CALL fl(ofu)
3943  CALL bsystemclock(loctimer2)
3944  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3945  END IF
3946  END DO
3947  END IF
3948  END DO
3949 
3950  !---------------------------------------------------------------------------
3951  !Compute the matrix-matrix multiplications for non-boundary odd rows
3952  !---------------------------------------------------------------------------
3953  DO locrow = startlocrow, endlocrow, 1
3954  globrow = lr2gr( locrow, level )
3955  IF ( isodd( locrow ) .AND. (level .NE. nlevels) ) THEN
3956  IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) ) THEN
3957  IF(kpdbg) WRITE(ofu,*) ' Precomp mat-mul ',globrow,' ',locrow; CALL fl(ofu)
3958  CALL computeforwardoddrowhats( locrow, level, &
3959  startlocrow, endlocrow, .false. )
3960  END IF
3961  END IF
3962  END DO
3963 
3964  !---------------------------------------------------------------------------
3965  !We have to wait for incoming even-numbered neighboring rows, before
3966  !computing inverses for the odd boundaries, if any.
3967  !Complete the send-receive of inverses, by doing a "wait all"
3968  !---------------------------------------------------------------------------
3969  nbrmpireqcnt = nbrmpireqindx - 1
3970  IF ( nbrmpireqcnt .GT. 0 ) THEN
3971  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
3972  CALL bsystemclock(loctimer1)
3973  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
3974  CALL bsystemclock(loctimer2)
3975  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
3976  END IF
3977 
3978  !---------------------------------------------------------------------------
3979  !Now we can compute inverses for odd boundaries, if any
3980  !---------------------------------------------------------------------------
3981  IF ( isodd(startlocrow) ) THEN
3982  globrow = lr2gr( startlocrow, level )
3983  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',startlocrow;CALL fl(ofu)
3984  CALL computeforwardoddrowhats( startlocrow, level, &
3985  startlocrow, endlocrow, .false. )
3986  END IF
3987  IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) ) THEN
3988  globrow = lr2gr( endlocrow, level )
3989  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',endlocrow;CALL fl(ofu)
3990  CALL computeforwardoddrowhats( endlocrow, level, &
3991  startlocrow, endlocrow, .false. )
3992  END IF
3993  CALL plbforwardfinalizelevel( level, .true. )
3994  END DO forwardsolveloop
3995 
3996  !-----------------------------------------------------------------------------
3997  CALL plbforwardfinalize()
3998 
3999  !-----------------------------------------------------------------------------
4000  matdirtied = .false.
4001  rhsdirtied = .false.
4002 
4003  !-----------------------------------------------------------------------------
4004  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
4005  IF(kpdbg) WRITE(ofu,*) '------ Forward solve end ------'; CALL fl(ofu)
4006 
4007  CALL bsystemclock(globtimer2)
4008  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4009 
4010 END SUBROUTINE forwardsolve
4011 
4012 !-------------------------------------------------------------------------------
4017 !-------------------------------------------------------------------------------
4018 SUBROUTINE forwardupdateb
4019  !-----------------------------------------------------------------------------
4020  ! Local variables
4021  !-----------------------------------------------------------------------------
4022  INTEGER :: level
4023  INTEGER :: locrow
4024  INTEGER :: globrow
4025  INTEGER :: globrowoff
4026  INTEGER :: startlocrow
4027  INTEGER :: endlocrow
4028  INTEGER :: rowoffset
4029  INTEGER :: nbrrank
4030  INTEGER :: nbrmpireqindx
4031  INTEGER :: nbrmpireqcnt
4032  INTEGER :: nbrmpinirecvs
4033  INTEGER :: nbrmpinisends
4034  INTEGER :: nbrmpireq(6)
4035  INTEGER :: nbrmpierr(6)
4036  INTEGER :: mpiwaiterr
4037  INTEGER :: mpierr
4038  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,6)
4039  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4040  INTEGER :: waitmatchindex
4041  INTEGER :: stag
4042  INTEGER :: reqi
4043  INTEGER :: blaserr
4044 
4045  !-----------------------------------------------------------------------------
4046  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4047  IF(kpdbg) WRITE(ofu,*) '------ Forward updateb start ------'; CALL fl(ofu)
4048  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4049 
4050  !-----------------------------------------------------------------------------
4051  IF ( matdirtied ) THEN
4052  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4053  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied matrix ------'; CALL fl(ofu)
4054  IF(kpdbg) WRITE(ofu,*) ' ------ Forward solve, not updateb... ------'; CALL fl(ofu)
4055  CALL forwardsolve()
4056  RETURN
4057  END IF
4058 
4059  CALL bsystemclock(globtimer1)
4060 
4061  !-----------------------------------------------------------------------------
4062  CALL plbforwardinitialize()
4063 
4064  !-----------------------------------------------------------------------------
4065  ! Forward solving loop in which even rows have already computed inverses and
4066  ! odd rows recompute matrix-vector products with new rhs
4067  !-----------------------------------------------------------------------------
4068  forwardsolveloop: DO level = 1, nlevels, 1
4069  !---------------------------------------------------------------------------
4070  IF ( usebarriers ) THEN
4071  IF(kpdbg) WRITE(ofu,*) 'Barrier at forward updateb level ', level; CALL fl(ofu)
4072  CALL mpi_barrier( siesta_comm, mpierr )
4073  IF(kpdbg) WRITE(ofu,*) 'Done barrier at forward updateb level ', level; CALL fl(ofu)
4074  END IF
4075 
4076  !---------------------------------------------------------------------------
4077  !Determine start and end local rows at current level
4078  !---------------------------------------------------------------------------
4079  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
4080  endlocrow = -1 !Start with an invalid value (less than startlocrow)
4081  DO globrow = startglobrow, endglobrow, 1
4082  locrow = gr2lr( globrow, level ) !globrow's place at current level
4083  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
4084  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
4085  startlocrow = locrow
4086  END IF
4087  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
4088  endlocrow = locrow
4089  END IF
4090  END IF
4091  END DO
4092 
4093  !---------------------------------------------------------------------------
4094  !We may have run out of work; see if we have a valid range of rows left
4095  !---------------------------------------------------------------------------
4096  IF ( startlocrow .GT. endlocrow ) THEN !No rows left at/above this level
4097  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at forward updateb level ',level; CALL fl(ofu)
4098  CALL plbforwardinitializelevel( level, .false. )
4099  CALL plbforwardfinalizelevel( level, .false. )
4100  cycle forwardsolveloop !Move on to next level; don't EXIT (for barriers)
4101  ELSE
4102  CALL plbforwardinitializelevel( level, .true. )
4103  END IF
4104 
4105  IF(kpdbg) WRITE(ofu,*) '**** Forward updateb level ', level, ' ****'; CALL fl(ofu)
4106 
4107  !---------------------------------------------------------------------------
4108  !Memory has already been allocated in ForwardSolve for incoming nbr values,
4109  !if any, at this level
4110  !---------------------------------------------------------------------------
4111  IF ( isodd(startlocrow) .AND. &
4112  (lr2rank(startlocrow-1,level) .GE. 0) ) THEN
4113  globrowoff = 0
4114  IF( .NOT. ( ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
4115  ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
4116  ALLOCATED( lelement(level, globrowoff)%b ) ) ) THEN
4117  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +0 memory error'; CALL fl(ofu); CALL assert(.false.,'blocktri #79')
4118  END IF
4119  lelement(level, globrowoff)%b = 0
4120  END IF
4121  IF ( isodd(endlocrow) .AND. &
4122  (lr2rank(endlocrow+1,level) .GE. 0) ) THEN
4123  globrowoff = endglobrow-startglobrow+2
4124  IF ( .NOT. ( ALLOCATED( lelement(level, globrowoff)%L ) .AND. &
4125  ALLOCATED( lelement(level, globrowoff)%U ) .AND. &
4126  ALLOCATED( lelement(level, globrowoff)%b ) ) ) THEN
4127  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +2 memory error'; CALL fl(ofu); CALL assert(.false.,'blocktri #80')
4128  END IF
4129  lelement(level, globrowoff)%b = 0
4130  END IF
4131 
4132  !---------------------------------------------------------------------------
4133  !Memory has already been allocated in ForwardSolve at next level for those
4134  !rows that are odd at current level
4135  !---------------------------------------------------------------------------
4136  IF ( level .LT. nlevels ) THEN
4137  DO locrow = startlocrow, endlocrow, 1
4138  IF ( isodd(locrow) ) THEN
4139  globrow = lr2gr( locrow, level )
4140  globrowoff = globrow-startglobrow+1
4141  IF ( .NOT. ( ALLOCATED( lelement(level+1, globrowoff)%L ) .AND. &
4142  ALLOCATED( lelement(level+1, globrowoff)%D ) .AND. &
4143  ALLOCATED( lelement(level+1, globrowoff)%U ) .AND. &
4144  ALLOCATED( lelement(level+1, globrowoff)%b ) ) ) THEN
4145  IF(kpdbg) WRITE(ofu,*)'ForwardUpdateb +1 memory error'; CALL fl(ofu); CALL assert(.false.,'blocktri #81')
4146  END IF
4147  lelement(level+1, globrowoff)%b = 0
4148  END IF
4149  END DO
4150  END IF
4151 
4152  !---------------------------------------------------------------------------
4153  !Reset requests
4154  !---------------------------------------------------------------------------
4155  DO nbrmpireqindx = 1, 6
4156  nbrmpireq(nbrmpireqindx) = mpi_request_null
4157  END DO
4158 
4159  !---------------------------------------------------------------------------
4160  !Pre-post the expected MPI receives via a non-blocking recv primitive
4161  !Pre-posting can help overlap communication with computing inverses
4162  !---------------------------------------------------------------------------
4163  nbrmpireqindx = 1
4164  nbrmpinirecvs = 0
4165  nbrrank = lr2rank(startlocrow-1,level)
4166  IF ( isodd(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
4167  !-------------------------------------------------------------------------
4168  !Our top row at this level is odd and top row's previous is valid
4169  !We will get the previous (even-numbered) row's L-hat, U-hat, and b-hat
4170  !-------------------------------------------------------------------------
4171  globrowoff = 0
4172  IF(kpdbg) WRITE(ofu,*) ' Irecv ',startlocrow-1,' ',nbrrank,' '; CALL fl(ofu)
4173 
4174  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
4175  IF(kpdbg) WRITE(ofu,*)' Irecv bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #82')
4176  END IF
4177  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
4178  IF(kpdbg) WRITE(ofu,*)' Irecv bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #83')
4179  END IF
4180  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
4181  IF(kpdbg) WRITE(ofu,*)' Irecv bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #84')
4182  END IF
4183 
4184  CALL bsystemclock(loctimer1)
4185  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
4186  nbrrank, 3, siesta_comm, nbrmpireq(nbrmpireqindx), &
4187  nbrmpierr(nbrmpireqindx))
4188  nbrmpireqindx = nbrmpireqindx + 1
4189  nbrmpinirecvs = nbrmpinirecvs + 1
4190  IF(kpdbg) WRITE(ofu,*) ' Irecv 3 top ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
4191  CALL bsystemclock(loctimer2)
4192  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4193  END IF
4194 
4195  nbrrank = lr2rank(endlocrow+1,level)
4196  IF ( isodd(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
4197  !-------------------------------------------------------------------------
4198  !Our bottom row at this level is odd and bottom row's next is valid
4199  !We will get the next (even-numbered) row's L-hat, U-hat, and b-hat
4200  !-------------------------------------------------------------------------
4201  globrowoff = endglobrow-startglobrow+2
4202  IF(kpdbg) WRITE(ofu,*) ' Irecv ',endlocrow+1,' ', nbrrank; CALL fl(ofu)
4203 
4204  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
4205  IF(kpdbg) WRITE(ofu,*)'Irecv bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #85')
4206  END IF
4207  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
4208  IF(kpdbg) WRITE(ofu,*)'Irecv bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #86')
4209  END IF
4210  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
4211  IF(kpdbg) WRITE(ofu,*)'Irecv bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #87')
4212  END IF
4213 
4214  CALL bsystemclock(loctimer1)
4215  CALL mpi_irecv( lelement(level, globrowoff)%b, m, mpi_real8, &
4216  nbrrank, 6, siesta_comm, nbrmpireq(nbrmpireqindx), &
4217  nbrmpierr(nbrmpireqindx))
4218  nbrmpireqindx = nbrmpireqindx + 1
4219  nbrmpinirecvs = nbrmpinirecvs + 1
4220  IF(kpdbg) WRITE(ofu,*) ' Irecv 6 bot ',nbrrank,' ',nbrmpireqindx-1; CALL fl(ofu)
4221  CALL bsystemclock(loctimer2)
4222  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4223  END IF
4224 
4225  !---------------------------------------------------------------------------
4226  !Inverses have already been computed for even-numbered rows at this level
4227  !---------------------------------------------------------------------------
4228  nbrmpinisends = 0
4229  DO locrow = startlocrow, endlocrow, 1
4230  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
4231  globrow = lr2gr( locrow, level )
4232  globrowoff = globrow-startglobrow+1
4233 
4234  IF(kpdbg) WRITE(ofu,*) ' Computed even hats ',globrow,' ', locrow; CALL fl(ofu)
4235 
4236  !-----------------------------------------------------------------------
4237  !Sanity checks
4238  !-----------------------------------------------------------------------
4239  IF ( level .EQ. nlevels ) THEN
4240  IF ( (rank .NE. 0) .OR. &
4241  (startlocrow .NE. endlocrow) .OR. &
4242  (locrow .NE. 1) .OR. (globrow .NE. 1) ) THEN
4243  IF(kpdbg) WRITE(ofu,*) ' EVEN ERROR ',globrow,' ', locrow; CALL fl(ofu)
4244  CALL assert(.false.,'blocktri #88')
4245  END IF
4246  END IF
4247  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%D) ) THEN
4248  IF(kpdbg) WRITE(ofu,*)'Chats bad D'; CALL fl(ofu); CALL assert(.false.,'blocktri #89')
4249  END IF
4250  IF ( .NOT. ALLOCATED(lelement(1,globrowoff)%pivot) ) THEN
4251  IF(kpdbg) WRITE(ofu,*)'Chats bad pivot'; CALL fl(ofu); CALL assert(.false.,'blocktri #90')
4252  END IF
4253  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
4254  IF(kpdbg) WRITE(ofu,*)'Chats bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #91')
4255  END IF
4256  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
4257  IF(kpdbg) WRITE(ofu,*)'Chats bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #92')
4258  END IF
4259  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
4260  IF(kpdbg) WRITE(ofu,*)'Chats bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #93')
4261  END IF
4262 
4263  !-----------------------------------------------------------------------
4264  !LU factorization of D has already been performed in place into D itself
4265  !-----------------------------------------------------------------------
4266 
4267  !-----------------------------------------------------------------------
4268  !Compute b-hats D^(-1)b
4269  !-----------------------------------------------------------------------
4270  CALL plbdgetrs( 1, lelement(level,globrowoff)%D, &
4271  lelement(1,globrowoff)%pivot, &
4272  lelement(level,globrowoff)%b, blaserr )
4273  IF (blaserr .NE. 0) THEN
4274  IF(kpdbg) WRITE(ofu,*) 'PLBDGETRS b failed'; CALL fl(ofu); CALL assert(.false.,'blocktri #94')
4275  END IF
4276 
4277  !-----------------------------------------------------------------------
4278  !Send my bhats to neighbor row if that neighbor exists
4279  !and that neighbor row is hosted outside this rank
4280  !-----------------------------------------------------------------------
4281  DO rowoffset = -1, 1, 2
4282  nbrrank = lr2rank(locrow+rowoffset, level)
4283  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
4284  CALL bsystemclock(loctimer1)
4285  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%L) ) THEN
4286  IF(kpdbg) WRITE(ofu,*)'ISend bad L'; CALL fl(ofu); CALL assert(.false.,'blocktri #95')
4287  END IF
4288  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%U) ) THEN
4289  IF(kpdbg) WRITE(ofu,*)'ISend bad U'; CALL fl(ofu); CALL assert(.false.,'blocktri #96')
4290  END IF
4291  IF ( .NOT. ALLOCATED(lelement(level,globrowoff)%b) ) THEN
4292  IF(kpdbg) WRITE(ofu,*)'ISend bad b'; CALL fl(ofu); CALL assert(.false.,'blocktri #97')
4293  END IF
4294 
4295  IF(kpdbg) WRITE(ofu,*)' ISend ',globrow,' ',locrow,' ',nbrrank;CALL fl(ofu)
4296  globrowoff = globrow-startglobrow+1
4297  stag = (((1-rowoffset))/2)*3
4298  CALL mpi_isend( lelement(level,globrowoff)%b, m, mpi_real8, &
4299  nbrrank, stag+3, siesta_comm, &
4300  nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx))
4301  nbrmpireqindx = nbrmpireqindx + 1
4302  nbrmpinisends = nbrmpinisends + 1
4303  IF(kpdbg) WRITE(ofu,*) ' Isend ',stag+3,' ',globrowoff; CALL fl(ofu)
4304  CALL bsystemclock(loctimer2)
4305  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4306  END IF
4307  END DO
4308  END IF
4309  END DO
4310 
4311  !---------------------------------------------------------------------------
4312  !Compute the matrix-vector multiplications for non-boundary odd rows
4313  !---------------------------------------------------------------------------
4314  DO locrow = startlocrow, endlocrow, 1
4315  globrow = lr2gr( locrow, level )
4316  IF ( isodd( locrow ) .AND. (level .NE. nlevels) ) THEN
4317  IF ( (locrow .NE. startlocrow) .AND. (locrow .NE. endlocrow) ) THEN
4318  IF(kpdbg) WRITE(ofu,*) ' Precomp mat-mul ',globrow,' ',locrow; CALL fl(ofu)
4319  CALL computeforwardoddrowhats( locrow, level, &
4320  startlocrow, endlocrow, .true. )
4321  END IF
4322  END IF
4323  END DO
4324 
4325  !---------------------------------------------------------------------------
4326  !We have to wait for incoming even-numbered neighboring rows, before
4327  !computing inverses for the odd boundaries, if any.
4328  !Complete the send-receive of inverses, by doing a "wait all"
4329  !---------------------------------------------------------------------------
4330  nbrmpireqcnt = nbrmpireqindx - 1
4331  IF ( nbrmpireqcnt .GT. 0 ) THEN
4332  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
4333  CALL bsystemclock(loctimer1)
4334  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4335  CALL bsystemclock(loctimer2)
4336  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4337  END IF
4338 
4339  !---------------------------------------------------------------------------
4340  !Now we can compute inverses for odd boundaries, if any
4341  !---------------------------------------------------------------------------
4342  IF ( isodd(startlocrow) ) THEN
4343  globrow = lr2gr( startlocrow, level )
4344  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',startlocrow;CALL fl(ofu)
4345  CALL computeforwardoddrowhats( startlocrow, level, &
4346  startlocrow, endlocrow, .true. )
4347  END IF
4348  IF ( (startlocrow .NE. endlocrow) .AND. isodd(endlocrow) ) THEN
4349  globrow = lr2gr( endlocrow, level )
4350  IF(kpdbg) WRITE(ofu,*) ' Postcomp mat-mul ',globrow,' ',endlocrow;CALL fl(ofu)
4351  CALL computeforwardoddrowhats( endlocrow, level, &
4352  startlocrow, endlocrow, .true. )
4353  END IF
4354  CALL plbforwardfinalizelevel( level, .true. )
4355  END DO forwardsolveloop
4356 
4357  !-----------------------------------------------------------------------------
4358  CALL plbforwardfinalize()
4359 
4360  !-----------------------------------------------------------------------------
4361  rhsdirtied = .false.
4362 
4363  !-----------------------------------------------------------------------------
4364  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
4365  IF(kpdbg) WRITE(ofu,*) '------ updateb solve end ------'; CALL fl(ofu)
4366 
4367  CALL bsystemclock(globtimer2)
4368  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4369 
4370 END SUBROUTINE forwardupdateb
4371 
4372 !-------------------------------------------------------------------------------
4376 !-------------------------------------------------------------------------------
4377 SUBROUTINE backwardsolve
4378  !-----------------------------------------------------------------------------
4379  ! Local variables
4380  !-----------------------------------------------------------------------------
4381  INTEGER :: level
4382  INTEGER :: locrow
4383  INTEGER :: globrow
4384  INTEGER :: globrowoff
4385  INTEGER :: prevglobrow
4386  INTEGER :: nextglobrow
4387  INTEGER :: startlocrow
4388  INTEGER :: endlocrow
4389  INTEGER :: rowoffset
4390  INTEGER :: nbrrank
4391  INTEGER :: nbrmpireqindx
4392  INTEGER :: nbrmpireqcnt
4393  INTEGER :: nbrmpinirecvs
4394  INTEGER :: nbrmpinisends
4395  INTEGER :: nbrmpireq(2)
4396  INTEGER :: nbrmpierr(2)
4397  INTEGER :: mpiwaiterr
4398  INTEGER :: mpierr
4399  INTEGER :: nbrmpistatuses(mpi_status_size,2)
4400  INTEGER :: nbrmpistatus(mpi_status_size)
4401  INTEGER :: waitmatchindex
4402  INTEGER :: stag
4403  INTEGER :: reqi
4404  INTEGER :: blaserr
4405 
4406  CALL bsystemclock(globtimer1)
4407 
4408  !-----------------------------------------------------------------------------
4409  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4410  IF(kpdbg) WRITE(ofu,*) '------ Backward solve start ------'; CALL fl(ofu)
4411 
4412  !-----------------------------------------------------------------------------
4413  IF ( matdirtied ) THEN
4414  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4415  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied matrix; updating... ------'; CALL fl(ofu)
4416  CALL forwardsolve()
4417  END IF
4418  IF ( rhsdirtied ) THEN
4419  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4420  IF(kpdbg) WRITE(ofu,*) ' ------ Dirtied RHS; updating... ------'; CALL fl(ofu)
4421  CALL forwardupdateb()
4422  END IF
4423 
4424  !-----------------------------------------------------------------------------
4425  CALL plbbackwardinitialize()
4426 
4427  !-----------------------------------------------------------------------------
4428  ! Make sure we have the forward solve data
4429  !-----------------------------------------------------------------------------
4430  IF ( (.NOT. ALLOCATED(lelement)) .OR. (.NOT. ALLOCATED(selement)) ) THEN
4431  IF(kpdbg) WRITE(ofu,*) 'Forward not called before backward?'; CALL fl(ofu)
4432  RETURN
4433  END IF
4434 
4435  !-----------------------------------------------------------------------------
4436  ! Backward solving loop in which odd rows communicate their solution to even
4437  ! rows and even rows compute their solution
4438  !-----------------------------------------------------------------------------
4439  backwardsolveloop: DO level = nlevels, 1, -1
4440  !---------------------------------------------------------------------------
4441  IF ( usebarriers ) THEN
4442  IF(kpdbg) WRITE(ofu,*) 'Barrier at backward level ', level; CALL fl(ofu)
4443  CALL mpi_barrier( siesta_comm, mpierr )
4444  IF(kpdbg) WRITE(ofu,*) 'Done barrier at backward level ', level; CALL fl(ofu)
4445  END IF
4446 
4447  !---------------------------------------------------------------------------
4448  !Determine start and end local rows at current level
4449  !---------------------------------------------------------------------------
4450  startlocrow = n+1 !Start with an invalid value (greater than endlocrow)
4451  endlocrow = -1 !Start with an invalid value (less than startlocrow)
4452  DO globrow = startglobrow, endglobrow, 1
4453  locrow = gr2lr( globrow, level ) !globrow's place at current level
4454  IF ( locrow .GT. 0 ) THEN !This global row participates at current level
4455  IF ( locrow .LT. startlocrow ) THEN !A valid, earlier local row
4456  startlocrow = locrow
4457  END IF
4458  IF ( locrow .GT. endlocrow ) THEN !A valid, later local row
4459  endlocrow = locrow
4460  END IF
4461  END IF
4462  END DO
4463 
4464  !---------------------------------------------------------------------------
4465  !We may have nothing to do at this level; if so, just go back another level
4466  !---------------------------------------------------------------------------
4467  IF ( startlocrow .GT. endlocrow ) THEN !No more rows left at this level
4468  IF(kpdbg) WRITE(ofu,*) 'Nothing to do at backward level', level; CALL fl(ofu)
4469  CALL plbbackwardinitializelevel( level, .false. )
4470  CALL plbbackwardfinalizelevel( level, .false. )
4471  cycle backwardsolveloop !Move on to previous level
4472  ELSE
4473  CALL plbbackwardinitializelevel( level, .true. )
4474  END IF
4475 
4476  IF(kpdbg) WRITE(ofu,*) '**** Backward level ', level, ' ****'; CALL fl(ofu)
4477 
4478  !---------------------------------------------------------------------------
4479  !Reset requests
4480  !---------------------------------------------------------------------------
4481  DO nbrmpireqindx = 1, 2
4482  nbrmpireq(nbrmpireqindx) = mpi_request_null
4483  END DO
4484 
4485  !---------------------------------------------------------------------------
4486  !Pre-post the expected MPI receives via a non-blocking recv primitive
4487  !Pre-posting can help overlap communication with computing inverses
4488  !---------------------------------------------------------------------------
4489  nbrmpireqindx = 1
4490  nbrmpinirecvs = 0
4491  nbrrank = lr2rank(startlocrow-1,level)
4492  IF( iseven(startlocrow) .AND. (nbrrank .GE. 0) ) THEN
4493  !-------------------------------------------------------------------------
4494  !Our top row at this level is even and top row's previous is valid
4495  !We will get the previous (odd-numbered) row's solution
4496  !-------------------------------------------------------------------------
4497  stag = 1
4498  globrow = lr2gr(startlocrow-1,level)
4499  IF(kpdbg) WRITE(ofu,*)' Irecv ',startlocrow-1,' ',globrow,' ',nbrrank;CALL fl(ofu)
4500  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4501  ALLOCATE( selement(globrow)%x(m) )
4502  CALL chargememory( m*dpsz )
4503  END IF
4504  CALL bsystemclock(loctimer1)
4505  CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4506  siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4507  nbrmpireqindx = nbrmpireqindx + 1
4508  nbrmpinirecvs = nbrmpinirecvs + 1
4509  CALL bsystemclock(loctimer2)
4510  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4511  END IF
4512  nbrrank = lr2rank(endlocrow+1,level)
4513  IF ( iseven(endlocrow) .AND. (nbrrank .GE. 0) ) THEN
4514  !-------------------------------------------------------------------------
4515  !Our bottom row at this level is even and bottom row's next is valid
4516  !We will get the next (odd-numbered) row's solution
4517  !-------------------------------------------------------------------------
4518  stag = 2
4519  globrow = lr2gr(endlocrow+1,level)
4520  IF(kpdbg) WRITE(ofu,*)' Irecv ',endlocrow+1,' ',globrow,' ',nbrrank;CALL fl(ofu)
4521  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4522  ALLOCATE( selement(globrow)%x(m) )
4523  CALL chargememory( m*dpsz )
4524  END IF
4525  CALL bsystemclock(loctimer1)
4526  CALL mpi_irecv( selement(globrow)%x, m, mpi_real8, nbrrank, stag, &
4527  siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4528  nbrmpireqindx = nbrmpireqindx + 1
4529  nbrmpinirecvs = nbrmpinirecvs + 1
4530  CALL bsystemclock(loctimer2)
4531  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4532  END IF
4533 
4534  !---------------------------------------------------------------------------
4535  !Send our odd-numbered rows' solutions
4536  !---------------------------------------------------------------------------
4537  nbrmpinisends = 0
4538  DO locrow = startlocrow, endlocrow, 1
4539  IF ( isodd( locrow ) ) THEN
4540  !-----------------------------------------------------------------------
4541  !Send my solution to neighbor row if that neighbor exists
4542  !and is hosted outside this rank; use non-blocking sends
4543  !-----------------------------------------------------------------------
4544  DO rowoffset = -1, 1, 2
4545  nbrrank = lr2rank(locrow+rowoffset, level)
4546  IF ( (nbrrank .GE. 0) .AND. (nbrrank .NE. rank) ) THEN
4547  IF(kpdbg) WRITE(ofu,*) ' Isend ', locrow, ' ', nbrrank; CALL fl(ofu)
4548  globrow = lr2gr(locrow,level)
4549  IF ( .NOT. ALLOCATED(selement(globrow)%x) ) THEN
4550  IF(kpdbg) WRITE(ofu,*) 'ERROR BISEND AT LEVEL', level; CALL fl(ofu)
4551  IF(kpdbg) WRITE(ofu,*) locrow, ' ', globrow, ' ', nbrrank; CALL fl(ofu)
4552  CALL assert(.false.,'blocktri #98')
4553  END IF
4554  stag = (1-rowoffset)/2+1
4555  CALL bsystemclock(loctimer1)
4556  CALL mpi_isend( selement(globrow)%x, m, mpi_real8, &
4557  nbrrank, stag, siesta_comm, nbrmpireq(nbrmpireqindx), &
4558  nbrmpierr(nbrmpireqindx) )
4559  nbrmpireqindx = nbrmpireqindx + 1
4560  nbrmpinisends = nbrmpinisends + 1
4561  CALL bsystemclock(loctimer2)
4562  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4563  END IF
4564  END DO
4565  END IF
4566  END DO
4567 
4568  !---------------------------------------------------------------------------
4569  !Complete the send-receive of solutions, by doing an MPI "wait all"
4570  !---------------------------------------------------------------------------
4571  nbrmpireqcnt = nbrmpireqindx - 1
4572  IF ( nbrmpireqcnt .GT. 0 ) THEN
4573  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
4574  CALL bsystemclock(loctimer1)
4575  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4576  CALL bsystemclock(loctimer2)
4577  CALL chargetime( totcommtime, loctimer2, loctimer1, totcommcount )
4578  END IF
4579 
4580  !---------------------------------------------------------------------------
4581  !Compute the solution for even rows at this level
4582  !---------------------------------------------------------------------------
4583  DO locrow = startlocrow, endlocrow, 1
4584  IF ( iseven( locrow ) .OR. (level .EQ. nlevels) ) THEN
4585  globrow = lr2gr( locrow, level )
4586  globrowoff = globrow - startglobrow + 1
4587  IF ( ( level .EQ. nlevels ) .AND. (locrow .NE. 1) ) THEN
4588  IF(kpdbg) WRITE(ofu,*) 'ERROR ',level, ' ',globrow, ' ', locrow; CALL fl(ofu)
4589  CALL assert(.false.,'blocktri #99')
4590  END IF
4591 
4592  IF(kpdbg) WRITE(ofu,*) ' Compute solution ', globrow, ' ', locrow; CALL fl(ofu)
4593 
4594  IF ( .NOT. ALLOCATED( selement(globrow)%x ) ) THEN
4595  ALLOCATE( selement(globrow)%x(m) )
4596  CALL chargememory( m*dpsz )
4597  END IF
4598 
4599  !----------------------------------------------------
4600  ! x = b-hat
4601  !----------------------------------------------------
4602  selement(globrow)%x = lelement(level,globrowoff)%b(:,1)
4603 
4604  !----------------------------------------------------
4605  ! x = x - L-hat*x(prev)
4606  !----------------------------------------------------
4607  nbrrank = lr2rank(locrow-1,level)
4608  IF ( nbrrank .GE. 0 ) THEN
4609  prevglobrow = lr2gr(locrow-1,level)
4610  CALL plbdgemv( -one, lelement(level,globrowoff)%L, &
4611  selement(prevglobrow)%x, &
4612  one, selement(globrow)%x )
4613  END IF
4614 
4615  !----------------------------------------------------
4616  ! x = x - U-hat*x(next)
4617  !----------------------------------------------------
4618  nbrrank = lr2rank(locrow+1,level)
4619  IF ( nbrrank .GE. 0 ) THEN
4620  nextglobrow = lr2gr(locrow+1,level)
4621  CALL plbdgemv( -one, lelement(level,globrowoff)%U, &
4622  selement(nextglobrow)%x, &
4623  one, selement(globrow)%x )
4624  END IF
4625 
4626  !----------------------------------------------------
4627  !Print the solution vector, if desired
4628  !----------------------------------------------------
4629  IF ( .false. ) THEN
4630  IF(kpdbg) WRITE(ofu,*) ' x[',globrow,']=',selement(globrow)%x; CALL fl(ofu)
4631  END IF
4632  END IF
4633  END DO
4634  CALL plbbackwardfinalizelevel( level, .false. )
4635  END DO backwardsolveloop
4636 
4637  !-----------------------------------------------------------------------------
4638  CALL plbbackwardfinalize()
4639 
4640  !-----------------------------------------------------------------------------
4641  IF(kpdbg) WRITE(ofu,'(A,F6.1,A)') 'Memory so far: ', membytes/1e6, ' MB'; CALL fl(ofu)
4642  IF(kpdbg) WRITE(ofu,*) '------ Backward solve end ------'; CALL fl(ofu)
4643 
4644  CALL bsystemclock(globtimer2)
4645  CALL chargetime( tottime, globtimer2, globtimer1, totcount )
4646 
4647 END SUBROUTINE backwardsolve
4648 
4649 !-------------------------------------------------------------------------------
4653 !-------------------------------------------------------------------------------
4654 SUBROUTINE verifysolution
4655  !-----------------------------------------------------------------------------
4656  ! Local variables
4657  !-----------------------------------------------------------------------------
4658  INTEGER :: i, k, globrow, globrowoff
4659  REAL(dp) :: term, totrmserr
4660  INTEGER :: nbrrank
4661  INTEGER :: nbrmpireqindx
4662  INTEGER :: nbrmpireqcnt
4663  INTEGER :: nbrmpinirecvs
4664  INTEGER :: nbrmpinisends
4665  INTEGER :: nbrmpireq(2)
4666  INTEGER :: nbrmpierr(2)
4667  INTEGER :: mpiwaiterr
4668  INTEGER :: mpierr
4669  INTEGER :: nbrmpistatuses(MPI_STATUS_SIZE,2)
4670  INTEGER :: nbrmpistatus(MPI_STATUS_SIZE)
4671  INTEGER :: waitmatchindex
4672 
4673  !-----------------------------------------------------------------------------
4674  IF(kpdbg) WRITE(ofu,*); CALL fl(ofu)
4675  IF(kpdbg) WRITE(ofu,*) '------ Verifying solution ------'; CALL fl(ofu)
4676 
4677  !-----------------------------------------------------------------------------
4678  !Alocate memory, do Irecvs/Isends, for boundary odd/even rows resp.
4679  !-----------------------------------------------------------------------------
4680  nbrmpireqindx = 1
4681  nbrmpinirecvs = 0
4682  nbrmpinisends = 0
4683  nbrrank = gr2rank(startglobrow-1)
4684  IF ( isodd(startglobrow) .AND. (nbrrank .GE. 0) ) THEN
4685  IF ( .NOT. ALLOCATED( selement(startglobrow-1)%x ) ) THEN
4686  ALLOCATE( selement(startglobrow-1)%x(m) )
4687  END IF
4688  CALL mpi_irecv( selement(startglobrow-1)%x, m, mpi_real8, nbrrank, 1, &
4689  siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4690  nbrmpireqindx = nbrmpireqindx + 1
4691  nbrmpinirecvs = nbrmpinirecvs + 1
4692  END IF
4693  nbrrank = gr2rank(endglobrow+1)
4694  IF ( isodd(endglobrow) .AND. (nbrrank .GE. 0) ) THEN
4695  IF ( .NOT. ALLOCATED( selement(endglobrow+1)%x ) ) THEN
4696  ALLOCATE( selement(endglobrow+1)%x(m) )
4697  END IF
4698  CALL mpi_irecv( selement(endglobrow+1)%x, m, mpi_real8, nbrrank, 2, &
4699  siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4700  nbrmpireqindx = nbrmpireqindx + 1
4701  nbrmpinirecvs = nbrmpinirecvs + 1
4702  END IF
4703  nbrrank = gr2rank(startglobrow-1)
4704  IF ( iseven(startglobrow) .AND. (nbrrank .GE. 0) ) THEN
4705  CALL mpi_isend( selement(startglobrow)%x, m, mpi_real8, nbrrank, 2, &
4706  siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4707  nbrmpireqindx = nbrmpireqindx + 1
4708  nbrmpinisends = nbrmpinisends + 1
4709  END IF
4710  nbrrank = gr2rank(endglobrow+1)
4711  IF ( iseven(endglobrow) .AND. (nbrrank .GE. 0) ) THEN
4712  CALL mpi_isend( selement(endglobrow)%x, m, mpi_real8, nbrrank, 1, &
4713  siesta_comm, nbrmpireq(nbrmpireqindx), nbrmpierr(nbrmpireqindx) )
4714  nbrmpireqindx = nbrmpireqindx + 1
4715  nbrmpinisends = nbrmpinisends + 1
4716  END IF
4717  nbrmpireqcnt = nbrmpireqindx - 1
4718  IF ( nbrmpireqcnt .GT. 0 ) THEN
4719  IF(kpdbg) WRITE(ofu,*) ' Wait ',nbrmpinirecvs,' ',nbrmpinisends; CALL fl(ofu)
4720  CALL mpi_waitall( nbrmpireqcnt, nbrmpireq, nbrmpistatuses, mpiwaiterr )
4721  END IF
4722 
4723  !-----------------------------------------------------------------------------
4724  totrmserr = 0
4725  DO globrow = startglobrow, endglobrow, 1
4726  globrowoff = globrow - startglobrow + 1
4727  DO i = 1, m, 1
4728  term = 0
4729  IF ( globrow .GT. 1 ) THEN
4730  term = term + sum( orig(globrowoff)%L(i,:) * selement(globrow-1)%x )
4731  END IF
4732 
4733  term = term + sum( orig(globrowoff)%D(i,:) * selement(globrow)%x )
4734 
4735  IF (writesolution) THEN
4736  IF( i .EQ. 1 ) THEN
4737  DO k = 1, m
4738  IF(kpdbg) WRITE(ofu,*) 'X[',(globrow-1)*m+k,']=', selement(globrow)%x(k);CALL fl(ofu)
4739  END DO
4740  END IF
4741  END IF
4742 
4743  IF ( globrow .LT. n ) THEN
4744  term = term + sum( orig(globrowoff)%U(i,:) * selement(globrow+1)%x )
4745  END IF
4746 
4747  totrmserr = totrmserr + (term - orig(globrowoff)%b(i,1))**2
4748  END DO
4749  END DO
4750 
4751  IF ( endglobrow .LT. startglobrow ) THEN
4752  totrmserr = 0
4753  ELSE
4754  totrmserr = sqrt( totrmserr / ((endglobrow-startglobrow+1) * m) )
4755  END IF
4756  IF(kpdbg) WRITE(ofu,'(A,E15.8E3)') 'TOTAL RMS ERROR = ', totrmserr; CALL fl(ofu)
4757  IF(kpdbg) WRITE(ofu,*) '------ Solution verified ------'; CALL fl(ofu)
4758 END SUBROUTINE verifysolution
4759 !-------------------------------------------------------------------------------
4760 
4761 !-------------------------------------------------------------------------------
4762 SUBROUTINE checkconditionnumber(nblkrow,bsize,anorm,rcond,info)
4763 
4764  INTEGER, INTENT(IN) :: nblkrow, bsize
4765  INTEGER, INTENT(OUT) :: info
4766  REAL(dp), INTENT(OUT) :: rcond, anorm
4767 
4768 
4769  INTEGER :: iblkrow, n1, i, j, offs, offsu, offk, kl, ku, ldab, icol, &
4770  rowmin, rowmax
4771  REAL(dp), ALLOCATABLE :: Band(:,:), Acol(:), work(:)
4772  INTEGER, ALLOCATABLE, DIMENSION(:) :: iwork, ipiv
4773 
4774 
4775  n1=nblkrow*bsize
4776  IF (nranks.NE.1) THEN
4777  print *, "Returning from CheckConditionNumber"
4778  info = -1
4779  RETURN
4780  END IF
4781 
4782  ku = 2*bsize-1
4783  kl = 2*bsize-1
4784  ldab = 2*kl+ku+1
4785  offk = kl+ku+1
4786 
4787  ALLOCATE(acol(n1))
4788  ALLOCATE(band(ldab, n1), ipiv(n1), iwork(n1), work(3*n1))
4789 
4790  offs = 0
4791  offsu = bsize
4792  anorm = 0
4793  band = 0
4794 
4795 !compute all cols (labelled by "j"), row-by-row Acol(1:n1)
4796  DO iblkrow = 1, nblkrow
4797  DO icol = 1, bsize
4798  j = offs+icol
4799  rowmin = offs-bsize+1; rowmax = offsu+bsize
4800  IF (iblkrow.GT.1) &
4801  acol(rowmin:offs)=orig(iblkrow-1)%U(:,icol)
4802  IF (iblkrow.LT.nblkrow) &
4803  acol(offsu+1:rowmax)=orig(iblkrow+1)%L(:,icol)
4804  acol(offs+1:offsu)=orig(iblkrow)%D(:,icol)
4805 
4806  IF (iblkrow.EQ.1) THEN
4807  rowmin = offs+1
4808  ELSE IF (iblkrow.EQ.nblkrow) THEN
4809  rowmax = offsu
4810  END IF
4811  anorm = max(anorm, sum(abs(acol(rowmin:rowmax))))
4812 
4813  DO i = max(1,j-ku,rowmin), min(n1,j+kl,rowmax)
4814  band(offk+i-j,j) = acol(i)
4815  END DO
4816 
4817  END DO
4818  offs = offsu
4819  offsu = offsu + bsize
4820  END DO
4821 
4822 
4823  DEALLOCATE (acol)
4824 
4825  CALL dgbtrf( n1, n1, kl, ku, band, ldab, ipiv, info )
4826  CALL dgbcon( '1', n1, kl, ku, band, ldab, ipiv, anorm, rcond, &
4827  work, iwork, info )
4828 
4829  IF (info.eq.0 .and. rcond.ne.zero) THEN
4830  rcond = one/rcond
4831  ELSE
4832  rcond = -one
4833  END IF
4834 
4835  DEALLOCATE(band,work,iwork,ipiv)
4836 
4837 END SUBROUTINE checkconditionnumber
4838 
4839 
4840 SUBROUTINE checksymmetry (asymIndx)
4841 
4842 REAL(dp), INTENT (OUT) :: asymIndx !On Processor 0 only
4843 REAL(dp) :: partdiff, eij, eji, summ
4844 REAL(dp), DIMENSION(2) :: temp
4845 REAL(dp), ALLOCATABLE, DIMENSION(:,:) :: sendBuf, bottomLeftMatrix
4846 
4847 INTEGER :: topNeighbor, bottomNeighbor
4848 
4849 INTEGER :: mpistatus(MPI_STATUS_SIZE), mpi_err
4850 INTEGER :: blkrow,blktype,i,j
4851 
4852 ALLOCATE(bottomleftmatrix(m,m), sendbuf(m,m), stat=i)
4853 CALL assert(i.EQ.0,'Allocation error in CheckSymmetry')
4854 bottomleftmatrix=0
4855 sendbuf=0
4856 
4857 IF(nranks.EQ.1) THEN
4858  topneighbor=mpi_proc_null
4859  bottomneighbor=mpi_proc_null
4860 ELSE
4861  IF (rank.EQ.0) THEN
4862  topneighbor=mpi_proc_null
4863  bottomneighbor=rank+1
4864  ELSE IF (rank.EQ.nranks-1) THEN
4865  topneighbor=rank-1
4866  bottomneighbor=mpi_proc_null
4867  sendbuf=lelement(1,1)%L
4868  ELSE
4869  topneighbor=rank-1
4870  bottomneighbor=rank+1
4871  sendbuf=lelement(1,1)%L
4872  END IF
4873 END IF
4874 
4875 CALL mpi_sendrecv(sendbuf,m*m,mpi_real8,topneighbor,1, &
4876  bottomleftmatrix,m*m,mpi_real8,bottomneighbor,1,siesta_comm, &
4877  mpistatus,mpi_err)
4878 
4879 DEALLOCATE(sendbuf)
4880 IF (rank.EQ.nranks-1) bottomleftmatrix=0
4881 
4882 summ = 0
4883 temp = 0
4884 
4885 DO blkrow=1,endglobrow-startglobrow+1
4886  ! Compute contributions from the main diagonal blocks
4887  partdiff=0
4888  DO i=1,m
4889  DO j=i,m
4890  eij=lelement(1,blkrow)%D(i,j)
4891  eji=lelement(1,blkrow)%D(j,i)
4892  partdiff = partdiff+(eij-eji)*(eij-eji)
4893 
4894  IF (i.NE.j) THEN
4895  ! Contribution from the off-diagonal elements of
4896  ! this main diagonal block
4897  temp(2) = temp(2) + eij*eij + eji*eji
4898  ELSE
4899  ! Contributions to the norm from just the diagonal
4900  ! elements of this block
4901  temp(2) = temp(2) + eij*eij
4902  END IF
4903  END DO
4904  END DO
4905  temp(1) = temp(1) + 2*partdiff
4906 
4907  ! Compute contributions from off diagonal blocks
4908  DO i = 1, m
4909  DO j = 1, m
4910  IF (blkrow.NE.endglobrow-startglobrow+1) THEN
4911  eij=lelement(1,blkrow)%U(i,j)
4912  eji=lelement(1,blkrow+1)%L(j,i)
4913  temp(1) = temp(1) + (eij - eji)*(eij - eji)
4914  temp(2) = temp(2) + eij*eij + eji*eji
4915  ELSE !Local bottom block row
4916  IF (rank.NE.nranks-1) THEN
4917  eij=lelement(1,blkrow)%U(i,j)
4918  eji=bottomleftmatrix(j,i)
4919  temp(1) = temp(1) + (eij - eji)*(eij - eji)
4920  temp(2) = temp(2) + eij*eij + eji*eji
4921  END IF
4922  END IF
4923  END DO
4924  END DO
4925 
4926 END DO
4927 
4928 CALL flush(360+rank)
4929 
4930 DEALLOCATE(bottomleftmatrix)
4931 
4932 IF (rank .eq. 0) THEN
4933  CALL mpi_reduce(mpi_in_place,temp,2,mpi_real8,mpi_sum,0,siesta_comm,mpi_err)
4934 ELSE
4935  CALL mpi_reduce(temp,temp,2,mpi_real8,mpi_sum,0,siesta_comm,mpi_err)
4936 END IF
4937 IF (rank.EQ.0) THEN
4938  asymindx=sqrt(temp(1)/temp(2))
4939  !print *,'New asymIndx:',asymIndx
4940 END IF
4941 
4942 END SUBROUTINE checksymmetry
4943 !-------------------------------------------------------------------------------
4944 
4945 !-------------------------------------------------------------------------------
4946 SUBROUTINE storediagonal(blkrow,colnum,buf)
4947 
4948 INTEGER, INTENT(IN) :: blkrow, colnum
4949 REAL(dp), DIMENSION(1:M), INTENT(IN) :: buf
4950 INTEGER :: indx
4951 
4952 IF(blkrow.LT.startglobrow) THEN
4953  IF(kpdbg) WRITE(ofu,*) 'Error in indexing global block row index'; CALL fl(ofu)
4954 END IF
4955 
4956 indx=(blkrow-startglobrow)*m+colnum
4957 origdiagelement(indx)=buf(colnum)
4958 
4959 END SUBROUTINE storediagonal
4960 !-------------------------------------------------------------------------------
4961 
4962 
4963 !-------------------------------------------------------------------------------
4964 SUBROUTINE writeblocks(flag)
4965 
4966 INTEGER, INTENT(IN) :: flag
4967 !LOGICAL, INTENT(IN) :: flag
4968 INTEGER :: row, col, localblkrow, globalrow
4969 
4970 CHARACTER(100) :: fname
4971 CHARACTER(50) :: ciam, cnprocs
4972 INTEGER :: WOFU, istat
4973 
4974 WRITE(ciam,*) rank; WRITE(cnprocs,*) nranks
4975 ciam=adjustl(ciam); cnprocs=adjustl(cnprocs)
4976 wofu = 4*nranks+rank
4977 
4978 fname='block-'//trim(ciam)//'-P-'//trim(cnprocs)//'.txt'
4979 OPEN(unit=wofu, file=fname, status="REPLACE", action="WRITE",&
4980 &form="FORMATTED",position="APPEND", iostat=istat)
4981 !OPEN(UNIT=WOFU, FILE=fname, STATUS="REPLACE", ACTION="WRITE",&
4982 !&POSITION="REWIND", IOSTAT=istat)
4983 
4984 IF(.true.) THEN
4985 DO row=1, (endglobrow-startglobrow+1)*m
4986  WRITE(wofu,*) 'SavedDiag:',row,origdiagelement(row), flag
4987  CALL fl(wofu)
4988 END DO
4989 END IF
4990 
4991 IF(.true.) THEN
4992 DO globalrow=startglobrow,endglobrow
4993  localblkrow=globalrow-startglobrow+1
4994 
4995  WRITE(wofu,*)
4996  CALL fl(wofu)
4997  DO row=1,m
4998  DO col=1,m
4999  WRITE(wofu,*) globalrow, 'L', row, col, orig(localblkrow)%L(row,col), flag
5000  CALL fl(wofu)
5001  END DO
5002  END DO
5003 
5004  WRITE(wofu,*)
5005  CALL fl(wofu)
5006  DO row=1,m
5007  DO col=1,m
5008  WRITE(wofu,*) globalrow, 'D', row, col, orig(localblkrow)%D(row,col), flag
5009  CALL fl(wofu)
5010  END DO
5011  END DO
5012 
5013  WRITE(wofu,*)
5014  CALL fl(wofu)
5015  DO row=1,m
5016  DO col=1,m
5017  WRITE(wofu,*) globalrow, 'U', row, col, orig(localblkrow)%U(row,col), flag
5018  CALL fl(wofu)
5019  END DO
5020  END DO
5021  WRITE(wofu,*)'--------------------------------------------------------'
5022  CALL fl(wofu)
5023 
5024 END DO
5025 END IF
5026 
5027 END SUBROUTINE writeblocks
5028 !-------------------------------------------------------------------------------
5029 
5030 
5031 !-------------------------------------------------------------------------------
5032 SUBROUTINE displayblocks(flag)
5033 
5034 INTEGER, INTENT(IN) :: flag
5035 INTEGER :: row, localblkrow, globalrow
5036 
5037 IF(kpdbg) WRITE(ofu,*) '------- Here ------- ', 1; CALL fl(ofu)
5038 
5039 IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
5040 DO row=1, (endglobrow-startglobrow+1)*m
5041  IF(kpdbg) WRITE(ofu,*) 'OrigDiagElement:',origdiagelement(row),'-',flag
5042  CALL fl(ofu)
5043 END DO
5044 IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
5045 
5046 IF(kpdbg) WRITE(ofu,*) '------- Here ------- ', 2; CALL fl(ofu)
5047 
5048 
5049 DO globalrow=startglobrow,endglobrow
5050  localblkrow=globalrow-startglobrow+1
5051 
5052  IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
5053 
5054  DO row=1,m
5055  IF(kpdbg) WRITE(ofu,*) 'orig-L:',orig(localblkrow)%L(row,1:m),'-',flag
5056  CALL fl(ofu)
5057  END DO
5058 
5059  IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
5060  DO row=1,m
5061  IF(kpdbg) WRITE(ofu,*) 'orig-D:',orig(localblkrow)%D(row,1:m),'-',flag
5062  CALL fl(ofu)
5063  END DO
5064 
5065  IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
5066  DO row=1,m
5067  IF(kpdbg) WRITE(ofu,*) 'orig-U:',orig(localblkrow)%U(row,1:m),'-',flag
5068  CALL fl(ofu)
5069  END DO
5070  IF(kpdbg) WRITE(ofu,*) ' '; CALL fl(ofu)
5071 END DO
5072 
5073 IF(kpdbg) WRITE(ofu,*) '------- Here ------- ', 3; CALL fl(ofu)
5074 
5075 
5076 END SUBROUTINE displayblocks
5077 !-------------------------------------------------------------------------------
5078 
5079 
5080 !-------------------------------------------------------------------------------
5081 SUBROUTINE parmatvec (invec,outvec,outveclength)
5082 
5083 INTEGER,INTENT(IN) :: outveclength
5084 REAL(dp), INTENT(IN) :: invec(1:N*M)
5085 REAL(dp), INTENT(OUT) :: outvec(1:outveclength)
5086 
5087 INTEGER :: col, row, offset
5088 INTEGER :: globalrow, localblkrow, cnt, dcnt
5089 REAL(dp) :: melement, velement, total
5090 
5091 outvec=0; cnt=0; dcnt=0
5092 DO globalrow=startglobrow,endglobrow
5093 
5094  localblkrow=globalrow-startglobrow+1
5095 
5096  IF (globalrow.EQ.1) THEN
5097  offset=0
5098  ELSE
5099  offset=(globalrow-2)*m
5100  END IF
5101 
5102  IF (globalrow.EQ.1) THEN
5103  DO row=1,m
5104  total=0
5105  DO col=1,2*m
5106  velement=invec(offset+col)
5107  IF (col.LE.m) THEN
5108  IF(row.EQ.col) THEN
5109  dcnt=dcnt+1
5110  melement=origdiagelement(dcnt)
5111  ELSE
5112  melement=orig(localblkrow)%D(row,col)
5113  END IF
5114  ELSE
5115  melement=orig(localblkrow)%U(row,col-m)
5116  END IF
5117  total=total+melement*velement
5118  END DO
5119  cnt=cnt+1
5120  outvec(cnt)=total
5121  END DO
5122  ELSE IF (globalrow.EQ.n) THEN
5123  DO row=1,m
5124  total=0
5125  DO col=1,2*m
5126  velement=invec(offset+col)
5127  IF (col.LE.m) THEN
5128  melement=orig(localblkrow)%L(row,col)
5129  ELSE
5130  IF (row.EQ.col-m) THEN
5131  dcnt=dcnt+1
5132  melement=origdiagelement(dcnt)
5133  ELSE
5134  melement=orig(localblkrow)%D(row,col-m)
5135  ENDIF
5136  END IF
5137  total=total+melement*velement
5138  END DO
5139  cnt=cnt+1
5140  outvec(cnt)=total
5141  END DO
5142  ELSE
5143  DO row=1,m
5144  total=0
5145  DO col=1,3*m
5146  velement=invec(offset+col)
5147  IF (col.LE.m) THEN
5148  melement=orig(localblkrow)%L(row,col)
5149  ELSE IF (col.GT.m.AND.col.LE.2*m) THEN
5150  IF (row.EQ.col-m) THEN
5151  dcnt=dcnt+1
5152  melement=origdiagelement(dcnt)
5153  ELSE
5154  melement=orig(localblkrow)%D(row,col-m)
5155  END IF
5156  ELSE
5157  melement=orig(localblkrow)%U(row,col-2*m)
5158  END IF
5159  total=total+melement*velement
5160  IF (globalrow.EQ.2.AND.row.EQ.1) THEN
5161  END IF
5162  END DO
5163  cnt=cnt+1
5164  outvec(cnt)=total
5165  END DO
5166  END IF
5167 END DO
5168 
5169 END SUBROUTINE parmatvec
5170 !------------------------------------------------------------------------------
5171 
5172 !-------------------------------------------------------------------------------
5173 SUBROUTINE getcolsum(cs)
5174 !-------------------------------------------------------------------------------
5175 real(dp), DIMENSION(M, N), INTENT(OUT) :: cs
5176 !-------------------------------------------------------------------------------
5177 INTEGER :: js
5178 real(dp) :: minscale
5179 real(dp) :: eps
5180 !-------------------------------------------------------------------------------
5181 IF (lequi1) THEN
5182  minscale = 1.0e-5_dp
5183 ELSE
5184  minscale = 1.0e-8_dp
5185 END IF
5186 CALL initscalefactors
5187 DO js = startglobrow, endglobrow
5188  CALL getscalefactors(js,cs(:,js))
5189  eps = minscale*maxval(cs(:,js))
5190  CALL assert(eps .gt. zero, ' cs == 0 in GetColSum')
5191  cs(:,js) = one/sqrt(max(cs(:,js), eps))
5192 END DO
5193 
5194 CALL finalizescalefactors
5195 
5196 END SUBROUTINE getcolsum
5197 !-------------------------------------------------------------------------------
5198 
5199 !-------------------------------------------------------------------------------
5200 SUBROUTINE initscalefactors
5201 
5202 INTEGER :: top, bot, MPI_ERR, ic, j, k
5203 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
5204 REAL(dp), DIMENSION(:), ALLOCATABLE :: tmp1, tmp2
5205 INTEGER :: numBlockRows
5206 
5207 numblockrows = endglobrow-startglobrow+1
5208 
5209 top=rank-1; IF(rank.EQ.0) top=mpi_proc_null
5210 bot=rank+1; IF(rank.EQ.nranks-1) bot=mpi_proc_null
5211 
5212 ALLOCATE (topscalefac(m), botscalefac(m), tmp1(m), tmp2(m), stat=ic)
5213 CALL assert(ic.eq.0,'STAT != 0 IN InitScaleFactors')
5214 
5215 DO ic=1, m
5216 IF (lequi1) THEN
5217  tmp1(ic)=sum(abs(orig(1)%L(:,ic)))
5218  tmp2(ic)=sum(abs(orig(numblockrows)%U(:,ic)))
5219 ELSE
5220  tmp1(ic)=maxval(abs(orig(1)%L(:,ic)))
5221  tmp2(ic)=maxval(abs(orig(numblockrows)%U(:,ic)))
5222 ENDIF
5223 END DO
5224 
5225 CALL mpi_sendrecv(tmp1,m,mpi_real8,top,1, &
5226  botscalefac,m,mpi_real8,bot,1,siesta_comm,mpi_stat,mpi_err)
5227 
5228 CALL mpi_sendrecv(tmp2,m,mpi_real8,bot,1, &
5229  topscalefac,m,mpi_real8,top,1,siesta_comm,mpi_stat,mpi_err)
5230 
5231 DEALLOCATE(tmp1, tmp2)
5232 
5233 END SUBROUTINE initscalefactors
5234 !-------------------------------------------------------------------------------
5235 
5236 !-------------------------------------------------------------------------------
5237 SUBROUTINE getscalefactors(js,scalevector)
5238 !-------------------------------------------------------------------------------
5239 INTEGER, INTENT(IN) :: js
5240 REAL(dp), INTENT(OUT) :: scalevector(M)
5241 !-------------------------------------------------------------------------------
5242 REAL(dp), ALLOCATABLE :: coltmp(:)
5243 REAL(dp) :: eps
5244 INTEGER :: ic, ir
5245 !-------------------------------------------------------------------------------
5246 
5247 ALLOCATE (coltmp(m))
5248 
5249 ir = js-startglobrow+1
5250 
5251 !COLUMN SUMS: 1-norm; MAXVAL: infinity norm
5252 DO ic=1, m
5253 
5254  coltmp = abs(orig(ir)%D(:,ic))
5255  IF (lequi1) THEN
5256  scalevector(ic) = sum(coltmp)
5257  ELSE
5258  scalevector(ic) = maxval(coltmp)
5259  END IF
5260 
5261  IF (js.GT.startglobrow) THEN
5262  coltmp = abs(orig(ir-1)%U(:,ic))
5263  IF (lequi1) THEN
5264  scalevector(ic) = scalevector(ic) + sum(coltmp)
5265  IF (js.EQ.endglobrow .AND. js.NE.n) &
5266  scalevector(ic) = scalevector(ic) + botscalefac(ic)
5267  ELSE
5268  scalevector(ic) = max(scalevector(ic),maxval(coltmp))
5269  IF (js.EQ.endglobrow .AND. js.NE.n) &
5270  scalevector(ic) = max(scalevector(ic),botscalefac(ic))
5271  END IF
5272  END IF
5273 
5274  IF (js.LT.endglobrow) THEN
5275  coltmp = abs(orig(ir+1)%L(:,ic))
5276  IF (lequi1) THEN
5277  scalevector(ic) = scalevector(ic) + sum(coltmp)
5278  IF (js.EQ.startglobrow .AND. js.NE.1) &
5279  scalevector(ic) = scalevector(ic) + topscalefac(ic)
5280  ELSE
5281  scalevector(ic) = max(scalevector(ic),maxval(coltmp))
5282  IF (js.EQ.startglobrow .AND. js.NE.1) &
5283  scalevector(ic) = max(scalevector(ic),topscalefac(ic))
5284  END IF
5285  END IF
5286 
5287 END DO
5288 
5289 DEALLOCATE (coltmp)
5290 CALL assert(all(scalevector.GT.zero),'SCALEVECTOR <= 0 IN GETSCALEFACTORS')
5291 
5292 END SUBROUTINE getscalefactors
5293 !-------------------------------------------------------------------------------
5294 
5295 !-------------------------------------------------------------------------------
5296 SUBROUTINE finalizescalefactors
5297  DEALLOCATE (topscalefac,botscalefac)
5298 END SUBROUTINE finalizescalefactors
5299 !-------------------------------------------------------------------------------
5300 
5301 !-------------------------------------------------------------------------------
5302 SUBROUTINE applyparallelscaling(lmp, colscale)
5303 !-------------------------------------------------------------------------------
5304 REAL(dp), INTENT(IN) :: lmp
5305 REAL(dp), INTENT(INOUT) :: colscale(M,N)
5306 !-------------------------------------------------------------------------------
5307 INTEGER :: icount, istat
5308 REAL(dp) :: ton, toff, colcnd, colcnd1, temp(2)
5309 REAL(dp), ALLOCATABLE :: colpad(:,:)
5310 !-------------------------------------------------------------------------------
5311 CALL second0(ton)
5312 
5313 icount = 0
5314 startrow1 = max(1, startglobrow-1); endrow1 = min(n, endglobrow+1)
5315 CALL assert(all(colscale(:,startrow1:endrow1).EQ.1), &
5316  'COLSCALE !=1 INITIALLY IN APPLYPARALLELSCALING')
5317 
5318 ALLOCATE (colpad(m,n), stat=istat)
5319 CALL assert(istat.eq.0,' Allocation error in ApplyParallelScaling')
5320 
5321 CALL getcolsum(colpad)
5322 CALL padsides(colpad, m, 1, 1)
5323 CALL vectorcopypar(colpad, colscale)
5324 CALL parallelscaling(colpad)
5325 
5326 icount = icount + 1
5327 !MAXIMUM COLUMN VARIATIONS (like an inverse "Condition number")
5328 temp(1) = maxval( colpad(:,startglobrow:endglobrow))
5329 temp(2) = maxval(-colpad(:,startglobrow:endglobrow))
5330 CALL mpi_allreduce(mpi_in_place, temp, 2, mpi_real8, mpi_max, siesta_comm, &
5331  mpi_err)
5332 colcnd1 = temp(1)
5333 colcnd = -temp(2)
5334 
5335 IF (colcnd1 .NE. zero) THEN
5336  colcnd = abs(colcnd/colcnd1)
5337 ELSE
5338  colcnd = 1
5339 ENDIF
5340 
5341 !IF (icount.LE.6 .AND. colcnd.LT.0.01_dp) GO TO 100 !dgeequ logic
5342 
5343 DEALLOCATE(colpad)
5344 
5345 !Add levenberg to diagonals
5346 CALL findminmax_tri(lmp)
5347 
5348 CALL second0(toff)
5349 IF (rank.EQ.0) print '(1x,3(a,1p,e10.3))', &
5350  'COLUMN-SCALING TIME: ', toff-ton, ' COLCND: ', colcnd, &
5351  ' DMINL: ', dmin_tri*lmp
5352 
5353 END SUBROUTINE applyparallelscaling
5354 !-------------------------------------------------------------------------------
5355 
5356 !-------------------------------------------------------------------------------
5357 SUBROUTINE parallelscaling(colsum)
5358 !-------------------------------------------------------------------------------
5359 REAL(dp), DIMENSION(M,N), INTENT(IN) :: colsum
5360 !-------------------------------------------------------------------------------
5361 INTEGER :: js, icol, ir, dcnt
5362 !-------------------------------------------------------------------------------
5363 
5364 l_colscale = .true.
5365 
5366 dcnt = 0
5367 DO js = startglobrow, endglobrow
5368  ir=js-startglobrow+1
5369  DO icol = 1, m
5370  lelement(1,ir)%D(:,icol) = colsum(:,js)*orig(ir)%D(:,icol)*colsum(icol,js)
5371  orig(ir)%D(:,icol) = lelement(1,ir)%D(:,icol)
5372  dcnt=dcnt+1
5373  origdiagelement(dcnt) = orig(ir)%D(icol,icol)
5374  IF (js .GT. 1) THEN
5375  lelement(1,ir)%L(:,icol) = colsum(:,js)*orig(ir)%L(:,icol)*colsum(icol,js-1)
5376  orig(ir)%L(:,icol) = lelement(1,ir)%L(:,icol)
5377  END IF
5378  IF (js .LT. n) THEN
5379  lelement(1,ir)%U(:,icol) = colsum(:,js)*orig(ir)%U(:,icol)*colsum(icol,js+1)
5380  orig(ir)%U(:,icol) = lelement(1,ir)%U(:,icol)
5381  END IF
5382  END DO
5383 END DO
5384 
5385 END SUBROUTINE parallelscaling
5386 !-------------------------------------------------------------------------------
5387 
5388 !-------------------------------------------------------------------------------
5389 SUBROUTINE findminmax_tri(lmp)
5390 !SPH 091616: Find min diagonal element and max eigenvalue (Gerschgorin Theorem)
5391 REAL(dp), PARAMETER :: minThreshold=1.e-6_dp
5392 REAL(dp), INTENT(IN) :: lmp
5393 INTEGER :: js, icol, ir, dcnt, imax_diag(1)
5394 REAL(dp) :: eps, temp(2), minDiag, sign_diag = -1
5395 
5396 dcnt=0
5397 maxeigen_tri = 0
5398 dmin_tri = huge(dmin_tri)
5399 DO js = startglobrow, endglobrow
5400  ir=js-startglobrow+1
5401 
5402 !Minimum allowable diagonal element after scaling
5403  mindiag = minthreshold*maxval(abs(origdiagelement(dcnt+1:dcnt+m)))
5404  IF (mindiag .EQ. zero) THEN
5405  mindiag = minthreshold
5406  ELSE
5407  imax_diag = maxloc(abs(origdiagelement(dcnt+1:dcnt+m)))
5408  sign_diag = sign(one, origdiagelement(imax_diag(1)))
5409  END IF
5410 
5411  DO icol = 1, m
5412  dcnt = dcnt+1
5413  eps = origdiagelement(dcnt)
5414 
5415  IF (abs(eps) .LE. mindiag) THEN !Can happen for l_colscale=FALSE
5416  eps = sign_diag*mindiag
5417  origdiagelement(dcnt) = eps
5418  orig(ir)%D(icol,icol) = eps
5419  lelement(1,ir)%D(icol,icol) = eps
5420  ELSE
5421  IF (js .LT. n) dmin_tri = min(max(1.e-12_dp,abs(eps)), dmin_tri)
5422  END IF
5423 
5424  maxeigen_tri = max(maxeigen_tri, sum(abs(orig(ir)%L(icol,:)) + &
5425  abs(orig(ir)%D(icol,:)) + abs(orig(ir)%U(icol,:))))
5426  END DO
5427 END DO
5428 
5429 temp(1)=-dmin_tri; temp(2)=maxeigen_tri
5430 CALL mpi_allreduce(mpi_in_place,temp,2,mpi_real8, mpi_max, siesta_comm,mpi_err)
5431 dmin_tri=sign(-temp(1), eps)
5432 maxeigen_tri=temp(2)
5433 
5434 
5435 !ADD LEVENBERG PARAMETER LM*DMIN TO DIAGONAL [OR D*(1+LM) IF NO COL-SCALING]
5436 CALL resetdiagonal(lmp, .false.)
5437 
5438 END SUBROUTINE findminmax_tri
5439 !-------------------------------------------------------------------------------
5440 
5441 
5442 !-------------------------------------------------------------------------------
5443 SUBROUTINE resetdiagonal(lmp, bReset)
5444 REAL(dp), INTENT(IN) :: lmp
5445 LOGICAL, INTENT(IN) :: bReset
5446 REAL(dp) :: DminL, eps, eps0
5447 INTEGER :: dcnt, js, ir, icol
5448 
5449 !ADD CONSTANT (SCALED BY DMIN) LEVENBERG PARAMETER TO DIAGONAL [NOT D*(1+LM)]
5450 eps0 = sign(one, origdiagelement(1))
5451 dmin_tri = sign(dmin_tri,eps0)
5452 dminl = dmin_tri*abs(lmp)
5453 
5454 dcnt = 0
5455 
5456 DO js = startglobrow, endglobrow
5457  ir=js-startglobrow+1
5458  DO icol = 1, m
5459  dcnt=dcnt+1
5460  eps = origdiagelement(dcnt)
5461  CALL assert(sign(one,eps).EQ.eps0,'WRONG SIGN IN RESET DIAGONAL')
5462  IF (l_colscale) THEN
5463  lelement(1,ir)%D(icol,icol) = eps + dminl
5464  ELSE
5465  lelement(1,ir)%D(icol,icol) = eps*(1 + abs(lmp))
5466  END IF
5467  orig(ir)%D(icol,icol) = lelement(1,ir)%D(icol,icol)
5468  END DO
5469 
5470  IF (breset) THEN
5471  lelement(1,ir)%L = orig(ir)%L
5472  lelement(1,ir)%D = orig(ir)%D
5473  lelement(1,ir)%U = orig(ir)%U
5474  lelement(1,ir)%pivot = 0
5475  END IF
5476 END DO
5477 
5478 END SUBROUTINE resetdiagonal
5479 !-------------------------------------------------------------------------------
5480 
5481 !-------------------------------------------------------------------------------
5482 SUBROUTINE refactorhessian(lmp)
5483 !SPH 08-31-16
5484 !Routine to reset Hessian block prior to refactoring it with a new levenberg parameter (lmp)
5485 !
5486 REAL(dp), INTENT(IN) :: lmp
5487 
5488 CALL resetdiagonal(lmp, .true.)
5489 CALL forwardsolve
5490 
5491 END SUBROUTINE refactorhessian
5492 !-------------------------------------------------------------------------------
5493 
5494 !-------------------------------------------------------------------------------
5495 SUBROUTINE computetranspose
5496 
5497 
5498 
5499 END SUBROUTINE computetranspose
5500 !-------------------------------------------------------------------------------
5501 
5502 !-------------------------------------------------------------------------------
5503 SUBROUTINE vectorcopypar (colsum, colscale)
5504 !-----------------------------------------------
5505 REAL(dp), INTENT(IN) :: colsum(M,N)
5506 REAL(dp), INTENT(OUT) :: colscale(M,N)
5507 !-----------------------------------------------
5508 INTEGER :: js
5509 !-----------------------------------------------
5510 DO js = startrow1, endrow1
5511  colscale(:,js) = colsum(:,js)*colscale(:,js)
5512 END DO
5513 
5514 END SUBROUTINE vectorcopypar
5515 !-------------------------------------------------------------------------------
5516 
5517 !-------------------------------------------------------------------------------
5518 SUBROUTINE padsides(arrin, blksize, top, bot)
5519 !-----------------------------------------------
5520 INTEGER, INTENT(IN) :: top, bot, blksize
5521 REAL(dp), INTENT(INOUT) :: arrin(blksize, N)
5522 !-----------------------------------------------
5523 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
5524 INTEGER :: M1, left, right, tag1=1, tag2=2, &
5525  tr, t1l, tl, t1r
5526 !-----------------------------------------------
5527 left=rank-1; IF(rank.EQ.0) left=mpi_proc_null
5528 right=rank+1; IF(rank.EQ.nranks-1) right=mpi_proc_null
5529 
5530 tr = endglobrow; tl = startglobrow
5531 t1r= min(endglobrow+1, n); t1l= max(1, startglobrow-1)
5532 
5533 !Send top / Rec bottom elements
5534 m1 = blksize*top
5535 CALL mpi_sendrecv(arrin(:,tr-(top-1):tr), m1, mpi_real8, &
5536  right, tag1, arrin(:,t1l-(top-1):t1l), m1, mpi_real8, &
5537  left, tag1, siesta_comm, mpi_stat, mpi_err)
5538 
5539 !Send bottom / Rec top boundary elements
5540 m1 = blksize*bot
5541 CALL mpi_sendrecv(arrin(:,tl:tl+(bot-1)), m1, mpi_real8, &
5542  left, tag2, arrin(:,t1r:t1r+(bot-1)), m1, mpi_real8, &
5543  right, tag2, siesta_comm, mpi_stat, mpi_err)
5544 END SUBROUTINE padsides
5545 
5546 #endif
5547 END MODULE blocktridiagonalsolver_s
5548 !-------------------------------------------------------------------------------
blocktridiagonalsolver_s::levelelement
Data associated with each row at each level.
Definition: blocktridiagonalsolver_s.f90:30
blocktridiagonalsolver_s::timecount
Statistics (timing, etc.)
Definition: blocktridiagonalsolver_s.f90:211
blocktridiagonalsolver_s::mastertoslavemapping
Master-to-slave mapping.
Definition: blocktridiagonalsolver_s.f90:170
v3_utilities::assert
Definition: v3_utilities.f:55
shared_data::lequi1
logical lequi1
Equilibrate matrix with col 1-norm.
Definition: shared_data.f90:236
mpi_inc
Umbrella module avoid multiple inlcudes of the mpif.h header.
Definition: mpi_inc.f:11
blocktridiagonalsolver_s::blacsprocessgrid
BLACS/PBLAS process grid information.
Definition: blocktridiagonalsolver_s.f90:142
blocktridiagonalsolver_s::pblaslevelparameters
Level-specific PBLAS information.
Definition: blocktridiagonalsolver_s.f90:181
blocktridiagonalsolver_s
Module contains subroutines for solver for block tri-diagonal matrices.
Definition: blocktridiagonalsolver_s.f90:7
blocktridiagonalsolver_s::solutionelement
Solution of selected rows of interest to this rank.
Definition: blocktridiagonalsolver_s.f90:39
blocktridiagonalsolver_s::blacsparameters
BLACS/PBLAS information.
Definition: blocktridiagonalsolver_s.f90:154
blocktridiagonalsolver_s::pblastemparray
Definition: blocktridiagonalsolver_s.f90:226
shared_data
This file contains variables and parameters used by many modules in SIESTA.
Definition: shared_data.f90:10
blocktridiagonalsolver_s::pblasstats
Definition: blocktridiagonalsolver_s.f90:216