V3FIT
prof_mod.f90
1  module prof_mod
2  USE v3_utilities, ONLY: assert
3  implicit none
4  private
5 
6  integer, parameter :: maxroutines = 1024
7  integer, parameter :: maxlevels = 1024
8  integer, parameter :: maxstrlen = 127
9 
10 
11  real, dimension(maxroutines) :: dictstart,dicttotal
12  integer, dimension(maxroutines) :: dictcount
13  integer :: nroutine,nlevels
14 
15 
16  character(len=maxstrlen), dimension(maxroutines) :: dictname
17  character(len=maxstrlen), dimension(maxlevels) :: lastroutine
18 
19  INTEGER, ALLOCATABLE, public :: scalcounts(:), scaldisps(:)
20  INTEGER, ALLOCATABLE :: StartBlockProc(:), EndBlockProc(:)
21 
22 #if defined(MPI_OPT)
23  public :: profstart,profend,profstat,profinit,dclock
24  public :: setupscalingallgather
25 
26  contains
27 
28 !=============================================
29  real function dclock()
30  integer :: count,count_rate,count_max
31 
32  call system_clock(count,count_rate,count_max)
33  if (count_rate.ne.0) then
34  dclock = real(count)/real(count_rate)
35  else
36  dclock = 0.0
37  endif
38 
39  end function dclock
40 
41 !=============================================
42  subroutine profinit()
43 
44 ! integer :: i,ipos
45 
46  nroutine = 0
47 
48  dictname(:) = ' '
49  dictstart(:) = 0.0
50  dictcount(:) = 0.0
51  dicttotal(:) = 0.0
52 
53  nlevels = 0
54  lastroutine(:) = ' '
55 
56  end subroutine profinit
57 
58 !=============================================
59  subroutine profstart(rname)
60  character(len=*),intent(in) :: rname
61 
62 
63  character(len=maxstrlen) :: name
64  logical :: found,isok
65  integer :: i,j,ipos
66 
67  name = rname
68  nlevels = nlevels + 1
69 
70  isok = (1 .le. nlevels).and.(nlevels .le. maxlevels)
71  call assert( isok, '** profstart: invalid nlevels')
72 
73  lastroutine(nlevels) = name
74  found = .false.
75  do j=1,nroutine
76  i = nroutine - j + 1
77  if (dictname(i)(1:1).eq.name(1:1)) then
78  found = (dictname(i) .eq. name)
79  if (found) then
80  ipos = i
81  exit
82  endif
83  endif
84  enddo
85 
86  if (.not.found) then
87  nroutine = nroutine + 1
88  isok = (nroutine .le. maxroutines)
89  call assert(isok, '** profstart: nroutine > maxroutines')
90 
91  ipos = nroutine
92  dictname(ipos) = name
93  dictcount(ipos) = 0
94  dicttotal(ipos) = 0.0
95  endif
96 
97  dictstart(ipos) = dclock()
98  dictcount(ipos) = dictcount(ipos) + 1
99 
100  return
101  end subroutine profstart
102 
103 !=============================================
104  subroutine profend(rname)
105  character(len=*),intent(in) :: rname
106 
107  character(len=maxstrlen) :: name
108  integer :: i,j,ipos
109  logical :: found,isok
110 
111  real :: tend
112 
113  name = rname
114  tend = dclock()
115 
116 
117  isok = (1.le.nlevels).and.(nlevels.le.maxlevels)
118  call assert(isok, '** profend: invalid nlevels')
119 
120  isok = (name .eq. lastroutine(nlevels))
121  if (.not.isok) then
122  print*,'** profend name != lastroutine(',nlevels,') '
123  print*,'name: ', name
124  print*,'lastroutine(nlevels): ', lastroutine(nlevels)
125 
126  stop '** error ** '
127  endif
128 
129  found = .false.
130  do j=1,nroutine
131  i = nroutine - j + 1
132 
133  if (dictname(i)(1:1) .eq. name(1:1)) then
134  found = (dictname(i) .eq. name)
135  if (found) then
136  ipos = i
137  exit
138  endif
139  endif
140  enddo
141 
142  if (.not.found) then
143  print*,'** profend: routine name not found '
144  print*,'name: ',name
145  stop '** error ** '
146  endif
147 
148  dicttotal(ipos) = dicttotal(ipos) + (tend - dictstart(ipos));
149  nlevels = nlevels - 1;
150 
151  return
152  end subroutine profend
153 
154 !=============================================
155  subroutine profstat(outdev_in)
156  implicit none
157  integer, optional, intent(in):: outdev_in
158  character(len=maxstrlen) :: fname,fstr
159  integer :: i, outdev
160 
161  if (nroutine .le. 0) return
162 
163  if (present(outdev_in)) then
164  outdev = outdev_in
165  else
166  outdev = 16
167  endif
168 
169  fname = 'profstat.dat'
170  open(outdev, file=fname, form='formatted', &
171  & access='sequential',status='unknown')
172  rewind(outdev)
173 
174  fstr = "(A20,' was called ',i10,' times, total ',f10.2,' secs')"
175  do i=1,nroutine
176  write(outdev,fstr) dictname(i), dictcount(i), dicttotal(i)
177  write(*,fstr) dictname(i), dictcount(i), dicttotal(i)
178  enddo
179 
180  close(outdev)
181 
182  IF (ALLOCATED(scalcounts)) DEALLOCATE (scalcounts, scaldisps)
183 
184  return
185  end subroutine profstat
186 
187 !Add SPH for col/row scaling
188  SUBROUTINE setupscalingallgather(mblk_size)
189  USE descriptor_mod
190  INTEGER, INTENT(IN) :: mblk_size
191  INTEGER :: i, numroc
192  EXTERNAL :: numroc, blacs_gridinfo
193 
194  IF (.NOT.ALLOCATED(scalcounts)) ALLOCATE (scalcounts(nprocs))
195  IF (.NOT.ALLOCATED(scaldisps)) ALLOCATE (scaldisps(nprocs))
196 
197  CALL numrocmapping(iam, nprocs, mblk_size)
198 
199  DO i=1,nprocs
200  scalcounts(i)=(endblockproc(i)-startblockproc(i)+1)
201  END DO
202 
203  DEALLOCATE (startblockproc, endblockproc)
204 
205 !Sanity consistency check
206 !!!!!!!!!!!!!!!!!!!!!!!!!
207  CALL blacs_gridinfo(icontxt_1xp,nprow,npcol,myrow,mycol)
208  mb = mblk_size
209  nb = 1
210 
211  rsrc = 0
212  csrc = 0
213  locq = numroc( mblk_size, nb, mycol, csrc, npcol )
214 ! Locp = numroc( mblk_size, mb, myrow, rsrc, nprow )
215  mblk_size2 = max(1,locq)
216  CALL assert(scalcounts(iam+1).EQ.mblk_size2, &
217  'scalcounts != mblk_size2 in SetupScalingAllGather')
218 !!!!!!!!!!!!!!!!!!!!!!!!!
219 
220  scaldisps(1)=0
221  DO i=2,nprocs
222  scaldisps(i)=scaldisps(i-1)+scalcounts(i-1)
223  END DO
224 
225  END SUBROUTINE setupscalingallgather
226 
227  SUBROUTINE numrocmapping(rank, activeranks, N)
228  use descriptor_mod, ONLY: siesta_comm
229  use mpi_inc
230  INTEGER, INTENT(IN) :: rank, activeranks, N
231  INTEGER :: startblock, endblock
232  INTEGER :: lload, sload, myload
233  INTEGER :: numL, numS, mpi_err
234  INTEGER :: r, c
235 
236  IF (.NOT.ALLOCATED(startblockproc)) ALLOCATE (startblockproc(activeranks))
237  IF (.NOT.ALLOCATED(endblockproc)) ALLOCATE (endblockproc(activeranks))
238 
239  lload=ceiling(real(n)/activeranks)
240  sload=floor(real(n)/activeranks)
241 
242  IF (lload.EQ.sload) THEN
243  myload=lload
244  ELSE
245  IF (rank.LT.mod(n,activeranks)) THEN
246  myload=lload
247  ELSE
248  myload=sload
249  END IF
250  END IF
251 
252  IF (sload.EQ.lload) THEN
253  nums=0
254  numl=rank
255  ELSE
256  IF (myload.EQ.lload) THEN
257  numl=rank
258  nums=0
259  ELSE
260  numl=mod(n,activeranks)
261  nums=rank-numl
262  END IF
263  END IF
264 
265  IF (rank.LT.activeranks) THEN !active ranks
266  startblock=numl*lload+nums*sload
267  endblock=startblock+myload-1
268  ELSE !idle ranks
269  startblock=-2
270  endblock=-3
271  END IF
272 
273 ! Fortranized indices
274  startblock=startblock+1
275  endblock=endblock+1
276 
277  CALL mpi_allgather(startblock,1,mpi_integer,startblockproc, &
278  1,mpi_integer,siesta_comm,mpi_err)
279 
280  CALL mpi_allgather(endblock,1,mpi_integer,endblockproc, &
281  1,mpi_integer,siesta_comm,mpi_err)
282 
283 
284  END SUBROUTINE numrocmapping
285 #endif
286  end module prof_mod
287 
v3_utilities::assert
Definition: v3_utilities.f:55
mpi_inc
Umbrella module avoid multiple inlcudes of the mpif.h header.
Definition: mpi_inc.f:11