2 USE v3_utilities,
ONLY:
assert
6 integer,
parameter :: maxroutines = 1024
7 integer,
parameter :: maxlevels = 1024
8 integer,
parameter :: maxstrlen = 127
11 real,
dimension(maxroutines) :: dictstart,dicttotal
12 integer,
dimension(maxroutines) :: dictcount
13 integer :: nroutine,nlevels
16 character(len=maxstrlen),
dimension(maxroutines) :: dictname
17 character(len=maxstrlen),
dimension(maxlevels) :: lastroutine
19 INTEGER,
ALLOCATABLE,
public :: scalcounts(:), scaldisps(:)
20 INTEGER,
ALLOCATABLE :: StartBlockProc(:), EndBlockProc(:)
23 public :: profstart,profend,profstat,profinit,dclock
24 public :: setupscalingallgather
29 real function dclock()
30 integer :: count,count_rate,count_max
32 call system_clock(count,count_rate,count_max)
33 if (count_rate.ne.0)
then
34 dclock = real(count)/real(count_rate)
56 end subroutine profinit
59 subroutine profstart(rname)
60 character(len=*),
intent(in) :: rname
63 character(len=maxstrlen) :: name
70 isok = (1 .le. nlevels).and.(nlevels .le. maxlevels)
71 call assert( isok,
'** profstart: invalid nlevels')
73 lastroutine(nlevels) = name
77 if (dictname(i)(1:1).eq.name(1:1))
then
78 found = (dictname(i) .eq. name)
87 nroutine = nroutine + 1
88 isok = (nroutine .le. maxroutines)
89 call assert(isok,
'** profstart: nroutine > maxroutines')
97 dictstart(ipos) = dclock()
98 dictcount(ipos) = dictcount(ipos) + 1
101 end subroutine profstart
104 subroutine profend(rname)
105 character(len=*),
intent(in) :: rname
107 character(len=maxstrlen) :: name
109 logical :: found,isok
117 isok = (1.le.nlevels).and.(nlevels.le.maxlevels)
118 call assert(isok,
'** profend: invalid nlevels')
120 isok = (name .eq. lastroutine(nlevels))
122 print*,
'** profend name != lastroutine(',nlevels,
') '
123 print*,
'name: ', name
124 print*,
'lastroutine(nlevels): ', lastroutine(nlevels)
133 if (dictname(i)(1:1) .eq. name(1:1))
then
134 found = (dictname(i) .eq. name)
143 print*,
'** profend: routine name not found '
148 dicttotal(ipos) = dicttotal(ipos) + (tend - dictstart(ipos));
149 nlevels = nlevels - 1;
152 end subroutine profend
155 subroutine profstat(outdev_in)
157 integer,
optional,
intent(in):: outdev_in
158 character(len=maxstrlen) :: fname,fstr
161 if (nroutine .le. 0)
return
163 if (
present(outdev_in))
then
169 fname =
'profstat.dat'
170 open(outdev, file=fname, form=
'formatted',
171 & access=
'sequential',status=
'unknown')
174 fstr =
"(A20,' was called ',i10,' times, total ',f10.2,' secs')"
176 write(outdev,fstr) dictname(i), dictcount(i), dicttotal(i)
177 write(*,fstr) dictname(i), dictcount(i), dicttotal(i)
182 IF (
ALLOCATED(scalcounts))
DEALLOCATE (scalcounts, scaldisps)
185 end subroutine profstat
188 SUBROUTINE setupscalingallgather(mblk_size)
190 INTEGER,
INTENT(IN) :: mblk_size
192 EXTERNAL :: numroc, blacs_gridinfo
194 IF (.NOT.
ALLOCATED(scalcounts))
ALLOCATE (scalcounts(nprocs))
195 IF (.NOT.
ALLOCATED(scaldisps))
ALLOCATE (scaldisps(nprocs))
197 CALL numrocmapping(iam, nprocs, mblk_size)
200 scalcounts(i)=(endblockproc(i)-startblockproc(i)+1)
203 DEALLOCATE (startblockproc, endblockproc)
207 CALL blacs_gridinfo(icontxt_1xp,nprow,npcol,myrow,mycol)
213 locq = numroc( mblk_size, nb, mycol, csrc, npcol )
215 mblk_size2 = max(1,locq)
216 CALL assert(scalcounts(iam+1).EQ.mblk_size2,
217 'scalcounts != mblk_size2 in SetupScalingAllGather')
222 scaldisps(i)=scaldisps(i-1)+scalcounts(i-1)
225 END SUBROUTINE setupscalingallgather
227 SUBROUTINE numrocmapping(rank, activeranks, N)
228 use descriptor_mod,
ONLY: siesta_comm
230 INTEGER,
INTENT(IN) :: rank, activeranks, N
231 INTEGER :: startblock, endblock
232 INTEGER :: lload, sload, myload
233 INTEGER :: numL, numS, mpi_err
236 IF (.NOT.
ALLOCATED(startblockproc))
ALLOCATE (startblockproc(activeranks
237 IF (.NOT.
ALLOCATED(endblockproc))
ALLOCATE (endblockproc(activeranks
239 lload=ceiling(real(n)/activeranks)
240 sload=floor(real(n)/activeranks)
242 IF (lload.EQ.sload)
THEN
245 IF (rank.LT.mod(n,activeranks))
THEN
252 IF (sload.EQ.lload)
THEN
256 IF (myload.EQ.lload)
THEN
260 numl=mod(n,activeranks)
265 IF (rank.LT.activeranks)
THEN
266 startblock=numl*lload+nums*sload
267 endblock=startblock+myload-1
274 startblock=startblock+1
277 CALL mpi_allgather(startblock,1,mpi_integer,startblockproc,
278 1,mpi_integer,siesta_comm,mpi_err)
280 CALL mpi_allgather(endblock,1,mpi_integer,endblockproc,
281 1,mpi_integer,siesta_comm,mpi_err)
284 END SUBROUTINE numrocmapping