V3FIT
nscalingtools.f90
1 !----------------------------------------------------------------------------
2 ! Module to scale SIESTA with the number of surfaces (ns). [S. K. Seal, ORNL]
3 ! Date: April 26, 2013
4 !----------------------------------------------------------------------------
5 
6 !------------------------------------------------
7 MODULE nscalingtools
8 USE stel_kinds
9 USE v3_utilities, ONLY: assert
10 USE descriptor_mod, ONLY: siesta_comm
11 #if defined(MPI_OPT)
12 USE blocktridiagonalsolver_s, ONLY: &
13 & setmatrixrowcoll, setmatrixrowcold, setmatrixrowcolu, &
14 & getmatrixrowcoll, getmatrixrowcold, getmatrixrowcolu, &
15 & getmatrixrhs, storediagonal
16 USE mpi_inc
17 #endif
18 IMPLICIT NONE
19 !------------------------------------------------
20 ! User-controlled logicals
21 !------------------------------------------------
22 LOGICAL :: SKSDBG=.false.
23 LOGICAL :: KPDBG=.false.
24 LOGICAL :: PARSOLVER=.false.
25 LOGICAL :: SYMMETRYCHECK=.true.
26 LOGICAL :: PARGMRES=.false.
27 LOGICAL :: BUFFERED=.false.
28 LOGICAL :: PARFUNCTISL=.false.
29 LOGICAL :: OUTPUT_TIMINGS=.false.
30 !------------------------------------------------
31 
32 LOGICAL :: INITREMAPDONE=.false.
33 LOGICAL :: MAPPINGDONE=.false.
34 LOGICAL :: FIRSTPARTITION_GE_2=.false.
35 LOGICAL :: LASTPARTITION_GE_2=.false.
36 CHARACTER(50) :: crank, cactiveranks
37 CHARACTER*100 :: envvar
38 CHARACTER*100 :: envval
39 CHARACTER, ALLOCATABLE :: remapBuffer(:)
40 
41 INTEGER :: TOFU
42 INTEGER :: OpenStatus, nargs
43 INTEGER, PRIVATE :: N, M, K, KSQR
44 INTEGER, PARAMETER :: SAVEDIAG=4, lower=3,diag=2,upper=1
45 
46 !This is to stitch to blocktri variables of
47 !same name. Just USE nscalingtools.
48 INTEGER :: startglobrow, endglobrow, numBlocks
49 INTEGER, ALLOCATABLE, DIMENSION (:) :: bcyclicStartBlockProc, bcyclicEndBlockProc
50 INTEGER, ALLOCATABLE, DIMENSION (:), PRIVATE :: tagBlock, tagColInBlock
51 INTEGER, ALLOCATABLE, DIMENSION (:), TARGET :: rcounts, disp !SPH added TARGET
52 INTEGER, ALLOCATABLE, DIMENSION (:) :: rcountsNS, dispNS !SPH 092116
53 INTEGER, PRIVATE :: a, b, c, flag, cnt
54 INTEGER, PRIVATE :: icol, itype, block_row, irow, procID, ierror, localbrow
55 INTEGER :: activeranks, rank, nranks, leftproc, rightproc, totRecvs, nrecd
56 INTEGER :: tag, MPI_ERR, SYSMAXTAG, APPMAXTAG
57 INTEGER :: PACKSIZE
58 
59 ! HESSIAN MN-SPACE GATHER-related variables
60 INTEGER :: linblksize
61 INTEGER, ALLOCATABLE, DIMENSION(:) :: mnspcounts, mnspdisps
62 LOGICAL, PRIVATE :: SetUpTOMNSPAllGatherDONE=.false.
63 
64 #if defined(MPI_OPT)
65 INTEGER :: MPI_Status(MPI_STATUS_SIZE)
66 #endif
67 !------------------------------------------------
68 
69 CONTAINS
70 
71 !--------------------------------------------------------------------------
72 SUBROUTINE myenvvariables
73 
74 !--------------------------------------------------------------------------
75 ! Default values of environment values that users are allowed to change
76 !--------------------------------------------------------------------------
77 parsolver=.true.
78 parfunctisl=.true.
79 !--------------------------------------------------------------------------
80 
81 !--------------------------------------------------------------------------
82 ! Default values of environment values that users are NOT allowed to change
83 !--------------------------------------------------------------------------
84 output_timings=.false. !SKS: USERS MUST NOT CHANGE THIS VALUE: 04/26/2013
85 pargmres=.true. !SKS: USERS MUST NOT CHANGE THIS VALUE: 04/26/2013
86 buffered=.false. !SKS: USERS MUST NOT CHANGE THIS VALUE: 04/26/2013
87 symmetrycheck=.true. !SKS: USERS MUST NOT CHANGE THIS VALUE: 04/26/2013
88 sksdbg=.false. !SKS: USERS MUST NOT CHANGE THIS VALUE: 04/26/2013
89 kpdbg=.false. !SKS: USERS MUST NOT CHANGE THIS VALUE: 04/26/2013
90 !--------------------------------------------------------------------------
91 
92 !--------------------------------------------------------------------------
93 ! Read non-default values, if any
94 !--------------------------------------------------------------------------
95 envvar='PARSOLVER'
96 CALL getenv(envvar,envval)
97 IF (envval.EQ.'ScaLAPACK' .OR. envval.EQ.'FALSE') THEN
98  parsolver=.false.
99  parfunctisl=.false.
100  pargmres=.false.
101  output_timings=.false.
102 END IF
103 END SUBROUTINE myenvvariables
104 !--------------------------------------------------------------------------
105 
106 !------------------------------------------------
107 SUBROUTINE setoutputfile (iam, nprocs)
108 
109 INTEGER :: iam, nprocs, istat
110 CHARACTER(100) :: fname, cfname
111 CHARACTER(50) :: ciam, cnprocs
112 
113 WRITE(ciam,*) iam; WRITE(cnprocs,*) nprocs
114 ciam=adjustl(ciam); cnprocs=adjustl(cnprocs)
115 tofu = 2*nprocs+iam
116 
117 fname='sks-'//trim(ciam)//'-P-'//trim(cnprocs)//'.txt'
118 OPEN(unit=tofu, file=fname, status="REPLACE", action="WRITE",&
119 &form="FORMATTED",position="APPEND", iostat=istat)
120 
121 WRITE(tofu,*)'SKSDBG:', sksdbg; CALL flush(tofu)
122 WRITE(tofu,*)'KPDBG:', kpdbg; CALL flush(tofu)
123 
124 WRITE(tofu,*)'SYMMETRYCHECK:', symmetrycheck; CALL flush(tofu)
125 WRITE(tofu,*)'BUFFERED:', buffered; CALL flush(tofu)
126 WRITE(tofu,*)'PARGMRES:', pargmres; CALL flush(tofu)
127 WRITE(tofu,*)'PARSOLVER:', parsolver; CALL flush(tofu)
128 WRITE(tofu,*)
129 
130 END SUBROUTINE setoutputfile
131 !------------------------------------------------
132 
133 !------------------------------------------------
134 ! This routine initializes all the necessary
135 ! variables used by all the routines in this
136 ! module
137 !------------------------------------------------
138 SUBROUTINE initremap (mpol_in,ntor_in,blkrownum,numprocs,myrank)
139 USE shared_data, ONLY: ndims
140 
141 INTEGER, INTENT(IN) :: mpol_in,ntor_in,blkrownum,numprocs,myrank
142 INTEGER :: length
143 INTEGER :: i, FLAG, tmpint
144 INTEGER*8 :: nBuffSize
145 !INTEGER, PARAMETER :: localprec = SELECTED_INT_KIND(24)
146 !INTEGER (localprec) :: nBuffSize
147 
148 IF(initremapdone .OR. .NOT.parsolver) RETURN
149 
150 linblksize=(mpol_in+1)*(2*ntor_in+1)
151 m=ndims*linblksize
152 n=blkrownum
153 
154 CALL assert(m.NE.0, n.NE.0,'M, N = 0 IN initRemap')
155 
156 nranks=numprocs
157 rank=myrank
158 nrecd=0;
159 
160 IF(rank.GT.0) THEN
161  leftproc=rank-1
162 ELSE
163  leftproc=mpi_proc_null
164 END IF
165 
166 IF(rank.LT.nranks-1) THEN
167  rightproc=rank+1
168 ELSE
169  rightproc=mpi_proc_null
170 END IF
171 
172 ! To account for activeranks=P>N
173 activeranks=nranks
174 IF (activeranks.GT.n) activeranks=n
175 
176 length=0; packsize=0
177 CALL mpi_pack_size(2,mpi_integer,siesta_comm,length,mpi_err)
178 packsize=packsize+length
179 CALL mpi_pack_size(m,mpi_real8,siesta_comm,length,mpi_err)
180 packsize=packsize+length
181 
182 !Each PACKSIZE packet has MPI_BSEND_OVERHEAD
183 IF (buffered) THEN
184  tmpint=packsize+mpi_bsend_overhead
185  nbuffsize=tmpint
186  nbuffsize=nbuffsize*4
187  nbuffsize=nbuffsize*m
188  nbuffsize=nbuffsize*n
189  nbuffsize=nbuffsize/max(nranks,1)
190  IF (nbuffsize.LE.0) THEN
191  CALL mpi_barrier(siesta_comm,mpi_err)
192  IF (rank.EQ.0) THEN
193  WRITE(7000+rank,*) '>>> N : ', n; CALL flush(7000+rank)
194  WRITE(7000+rank,*) '>>> M : ', m; CALL flush(7000+rank)
195  WRITE(7000+rank,*) '>>> PACKSIZE : ', packsize; CALL flush(7000+rank)
196  WRITE(7000+rank,*) '>>> MPI_BSEND_OVERHEAD : ', mpi_bsend_overhead; CALL flush(7000+rank)
197  WRITE(7000+rank,*) '>>> nBuffsize : ', nbuffsize; CALL flush(7000+rank)
198  END IF
199  CALL assert(nbuffsize.GT.0,'initRemap nBuffsize')
200  END IF
201 
202  ALLOCATE(remapbuffer(nbuffsize), stat=i)
203  CALL assert(i.eq.0,'MPI remapBuffer allocation failed in initRemap')
204  CALL mpi_buffer_attach(remapbuffer, nbuffsize, mpi_err)
205 END IF
206 
207 IF (.NOT.ALLOCATED(bcyclicstartblockproc)) ALLOCATE (bcyclicstartblockproc(nranks))
208 IF (.NOT.ALLOCATED(bcyclicendblockproc)) ALLOCATE (bcyclicendblockproc(nranks))
209 initremapdone=.true.
210 
211 CALL bcyclicmapping
212 CALL setuptomnspallgather
213 CALL computeallgatherparameters
214 
215 END SUBROUTINE initremap
216 !------------------------------------------------
217 
218 !------------------------------------------------
219 ! This routine deallocates arrays, etc.
220 !------------------------------------------------
221 SUBROUTINE finalizeremap
222 
223 IF(ALLOCATED(bcyclicstartblockproc)) DEALLOCATE(bcyclicstartblockproc)
224 IF(ALLOCATED(bcyclicendblockproc)) DEALLOCATE(bcyclicendblockproc)
225 IF(ALLOCATED(remapbuffer)) DEALLOCATE(remapbuffer)
226 
227 END SUBROUTINE finalizeremap
228 !------------------------------------------------
229 
230 !------------------------------------------------
231 ! This computes the mapping of the block rows to
232 ! the processors. Computes the processors domain
233 ! boundaries in terms of global block indices.
234 !------------------------------------------------
235 SUBROUTINE bcyclicmapping
236 
237 !INTEGER, INTENT(OUT) :: startblock, endblock
238 INTEGER :: startblock, endblock
239 INTEGER :: lload, sload, myload
240 INTEGER :: numL, numS
241 INTEGER :: r, c
242 LOGICAL :: lnum, lnumg=.true.
243 
244 CALL assert(initremapdone, &
245 'Calling bcyclicMapping routine without calling initRemap')
246 
247 IF (mappingdone) RETURN
248 
249 lload=ceiling(real(n)/activeranks)
250 sload=floor(real(n)/activeranks)
251 
252 IF (lload.EQ.sload) THEN
253  myload=lload
254 ELSE
255  IF (rank.LT.mod(n,activeranks)) THEN
256  myload=lload
257  ELSE
258  myload=sload
259  END IF
260 END IF
261 
262 IF (sload.EQ.lload) THEN
263  nums=0
264  numl=rank
265 ELSE
266  IF (myload.EQ.lload) THEN
267  numl=rank
268  nums=0
269  ELSE
270  numl=mod(n,activeranks)
271  nums=rank-numl
272  END IF
273 END IF
274 
275 IF (rank.LT.activeranks) THEN !active ranks
276  startblock=numl*lload+nums*sload
277  endblock=startblock+myload-1
278 ELSE !idle ranks
279  startblock=-2
280  endblock=-3
281 END IF
282 
283 ! Fortranized indices
284 startblock=startblock+1
285 endblock=endblock+1
286 
287 CALL mpi_allgather(startblock,1,mpi_integer,bcyclicstartblockproc,&
288 &1,mpi_integer,siesta_comm,mpi_err)
289 
290 CALL mpi_allgather(endblock,1,mpi_integer,bcyclicendblockproc,&
291 &1,mpi_integer,siesta_comm,mpi_err)
292 
293 startglobrow=bcyclicstartblockproc(rank+1)
294 endglobrow=bcyclicendblockproc(rank+1)
295 
296 firstpartition_ge_2=.false.
297 IF((bcyclicendblockproc(1)-bcyclicstartblockproc(1)+1).GE.2) firstpartition_ge_2=.true.
298 
299 lastpartition_ge_2=.false.
300 IF((bcyclicendblockproc(activeranks)-bcyclicstartblockproc(activeranks)+1).GE.2) lastpartition_ge_2=.true.
301 
302 numblocks=bcyclicendblockproc(rank+1)-bcyclicstartblockproc(rank+1)+1
303 
304 !This may only occur on SOME processors, not all, so send to all
305 lnum = numblocks.GE.2
306 CALL mpi_reduce(lnum, lnumg, 1, mpi_logical, mpi_land, 0, siesta_comm, &
307  mpi_err)
308 
309 CALL assert(lnumg,'NS/nranks < 2...use smaller number of processors')
310 
311 mappingdone=.true.
312 END SUBROUTINE bcyclicmapping
313 !------------------------------------------------
314 
315 !------------------------------------------------
316 ! Compute AllGather vector variant parameters.
317 ! This is to be called after bcyclicMapping.
318 !------------------------------------------------
319 SUBROUTINE computeallgatherparameters
320 
321 INTEGER :: i
322 
323 CALL assert(mappingdone, 'computeAllGatherParameters')
324 
325 IF(.NOT.ALLOCATED(rcounts)) ALLOCATE(rcounts(activeranks), rcountsns(activeranks))
326 IF(.NOT.ALLOCATED(disp)) ALLOCATE(disp(activeranks),dispns(activeranks))
327 
328 DO i=1,activeranks
329  rcountsns(i)=(bcyclicendblockproc(i)-bcyclicstartblockproc(i)+1)
330  rcounts(i) =(bcyclicendblockproc(i)-bcyclicstartblockproc(i)+1)*m
331 END DO
332 
333 disp(1)=0; dispns(1)=0
334 DO i=2,activeranks
335  dispns(i)=dispns(i-1)+rcountsns(i-1)
336  disp(i)=disp(i-1)+rcounts(i-1)
337 END DO
338 
339 END SUBROUTINE computeallgatherparameters
340 !------------------------------------------------
341 
342 !------------------------------------------------
343 ! A generic binary search routine
344 !------------------------------------------------
345 SUBROUTINE search(query,FOUND,location)
346 
347 !INTEGER, DIMENSION(:), INTENT(IN) :: qarray
348 INTEGER, INTENT(IN) :: query
349 LOGICAL, INTENT(OUT) :: FOUND
350 INTEGER, INTENT(OUT) :: location
351 INTEGER :: p
352 
353 CALL assert(mappingdone, 'search')
354 
355 ! To account for P>N
356 activeranks=activeranks
357 IF (activeranks.GT.n) activeranks=n
358 
359 ! Dumb search algorithm
360 found=.false.
361 DO p=1,activeranks
362  IF ((bcyclicstartblockproc(p).LE.query).AND.(query.LE.bcyclicendblockproc(p))) THEN
363  found=.true.
364  location=p
365  EXIT
366  END IF
367 END DO
368 
369 END SUBROUTINE search
370 !------------------------------------------------
371 
372 !------------------------------------------------
373 SUBROUTINE setblocktridatastruct(it, br, ic, coldata)
374 
375 REAL(dp), DIMENSION(M), INTENT(IN) :: coldata
376 INTEGER, INTENT(IN) :: br, ic, it
377 
378 IF (it.EQ.upper) THEN !UPPER DIAGONAL
379  CALL setblockrowcol(br,ic,coldata,upper)
380 ELSE IF (it.EQ.diag) THEN !MAIN DIAGONAL
381  CALL setblockrowcol(br,ic,coldata,diag)
382 ELSE IF (it.EQ.lower) THEN !LOWER DIAGONAL
383  CALL setblockrowcol(br,ic,coldata,lower)
384 ELSE IF (it.EQ.savediag) THEN !SAVED DIAGONAL
385  CALL storediagonal(br,ic,coldata)
386 ELSE
387  WRITE(*,*)'Something wrong in ', rank,mpi_status(mpi_tag),br,ic,it
388  CALL assert(.false., 'IT wrong SetBlockTriDataStruct:')
389 END IF
390 END SUBROUTINE setblocktridatastruct
391 !------------------------------------------------
392 
393 !------------------------------------------------
394 SUBROUTINE send(columnData,blockRowNum,blockRowType,columnNum, procNum)
395 
396 REAL(dp), DIMENSION(1:M), INTENT(IN) :: columnData
397 INTEGER, INTENT(IN) :: blockRowNum, blockRowType, columnNum
398 INTEGER, INTENT(OUT) :: procNum
399 
400 CHARACTER, DIMENSION(PACKSIZE) :: sendbuf
401 INTEGER :: positn
402 LOGICAL :: FOUND
403 
404 positn=0
405 CALL mpi_pack(blockrownum,1,mpi_integer,sendbuf,packsize,positn,siesta_comm,mpi_err)
406 CALL mpi_pack(columnnum,1,mpi_integer,sendbuf,packsize,positn,siesta_comm,mpi_err)
407 CALL mpi_pack(columndata,m,mpi_real8,sendbuf,packsize,positn,siesta_comm,mpi_err)
408 
409 found=.false.
410 CALL search(blockrownum,found,procnum)
411 IF (found) THEN
412  IF(procnum-1.EQ.rank) THEN
413  IF (blockrowtype.EQ.upper) THEN
414  CALL setblocktridatastruct(upper,blockrownum,columnnum,columndata)
415  ELSE IF (blockrowtype.EQ.savediag) THEN
416  CALL setblocktridatastruct(savediag,blockrownum,columnnum,columndata)
417  ELSE IF (blockrowtype.EQ.diag) THEN
418  CALL setblocktridatastruct(diag,blockrownum,columnnum,columndata)
419  ELSE IF (blockrowtype.EQ.lower) THEN
420  CALL setblocktridatastruct(lower,blockrownum,columnnum,columndata)
421  ELSE
422  CALL assert(.false.,'send error, blockRowType:')
423  END IF
424  ELSE
425  IF (buffered) THEN
426  CALL mpi_bsend(sendbuf,positn,mpi_packed,procnum-1,blockrowtype,siesta_comm,mpi_err)
427  ELSE
428  CALL mpi_send(sendbuf,positn,mpi_packed,procnum-1,blockrowtype,siesta_comm,mpi_err)
429  END IF
430  END IF
431 ELSE
432  CALL assert(mappingdone, 'send')
433 END IF
434 
435 END SUBROUTINE send
436 !------------------------------------------------
437 
438 !------------------------------------------------
439 SUBROUTINE receive (IPROBEFLAG)
440 
441 LOGICAL, INTENT(IN) :: IPROBEFLAG
442 LOGICAL :: FLAG
443 REAL(dp), DIMENSION(M) :: coldata
444 CHARACTER, DIMENSION(PACKSIZE) :: recvbuf
445 INTEGER :: br, ic, it
446 INTEGER :: positn
447 
448 flag=.true.
449 IF(iprobeflag) CALL mpi_iprobe (mpi_any_source,mpi_any_tag,siesta_comm,flag,mpi_status,mpi_err)
450 IF (flag) THEN
451  CALL mpi_recv(recvbuf,packsize,mpi_packed,mpi_any_source,&
452  &mpi_any_tag,siesta_comm,mpi_status,mpi_err)
453  nrecd=nrecd+1
454 
455  it=mpi_status(mpi_tag)
456  IF (it.NE.1.AND.it.NE.2.AND.it.NE.3.AND.it.NE.4) THEN
457  WRITE(tofu,*) 'MPI_TAG:',it; FLUSH(tofu)
458  CALL assert(.false.,'receive 1 error')
459  END IF
460  positn=0
461  CALL mpi_unpack(recvbuf,packsize,positn,br,1,mpi_integer,siesta_comm,mpi_err)
462  CALL mpi_unpack(recvbuf,packsize,positn,ic,1,mpi_integer,siesta_comm,mpi_err)
463  CALL mpi_unpack(recvbuf,packsize,positn,coldata,m,mpi_real8,siesta_comm,mpi_err)
464  localbrow=br-bcyclicstartblockproc(rank+1)+1
465 
466  IF (localbrow.LT.1.OR.localbrow.GT.numblocks) THEN
467  WRITE(tofu,*) 'localbrow:',localbrow; FLUSH(tofu)
468  CALL assert(.false.,'receive 2 error')
469  END IF
470 
471  IF (it.EQ.1) THEN !UPPER DIAGONAL
472  CALL setblockrowcol(br,ic,coldata,1)
473  ELSE IF (it.EQ.2) THEN !MAIN DIAGONAL
474  CALL setblockrowcol(br,ic,coldata,2)
475  ELSE IF (it.EQ.3) THEN !LOWER DIAGONAL
476  CALL setblockrowcol(br,ic,coldata,3)
477  ELSE IF (it.EQ.4) THEN !SAVED DIAGONAL
478  CALL storediagonal(br,ic,coldata)
479  ELSE
480  WRITE(*,*)'Something wrong in ', rank,mpi_status(mpi_tag),br,ic,it
481  CALL assert(.false.,'receive 3 error')
482  END IF
483 END IF
484 
485 END SUBROUTINE receive
486 !------------------------------------------------
487 
488 !------------------------------------------------
489 ! A utility function to gather all the partial
490 ! solution vectors and reconstruct the full
491 ! vector on each processor
492 !------------------------------------------------
493 SUBROUTINE getfullsolution(invec,outvec)
494 
495 REAL(dp), DIMENSION(1:N*M), INTENT(IN) :: invec
496 REAL(dp), DIMENSION(N,M), INTENT(OUT) :: outvec
497 INTEGER :: i, j, p
498 INTEGER :: numBlocks, cnt, indx, offset
499 
500 DO p=1, activeranks
501  numblocks=bcyclicendblockproc(p)-bcyclicstartblockproc(p)+1
502  DO i=1,numblocks
503  offset=(bcyclicstartblockproc(p)-1)*m+i
504  DO j=1,m
505  outvec(bcyclicstartblockproc(p)+i-1,j)=invec(offset+(j-1)*numblocks)
506  END DO
507  END DO
508 END DO
509 
510 END SUBROUTINE getfullsolution
511 !------------------------------------------------
512 
513 !------------------------------------------------
514 SUBROUTINE setblockrowcol( globrow, colnum, buf, opt)
515  INTEGER :: globrow
516  REAL(dp), INTENT(IN), DIMENSION(M) :: buf
517  INTEGER :: colnum, opt
518  IF (opt.EQ.1) THEN
519  CALL setmatrixrowcolu( globrow, buf, colnum )
520  ELSE IF (opt.EQ.2) THEN
521  CALL setmatrixrowcold( globrow, buf, colnum )
522  ELSE IF (opt.EQ.3) THEN
523  CALL setmatrixrowcoll( globrow, buf, colnum )
524  ELSE
525  WRITE(*,*) 'Error in diagonal type option'
526  END IF
527 END SUBROUTINE setblockrowcol
528 !------------------------------------------------
529 
530 !-------------------------------------------------------------------------------
531 SUBROUTINE checkpoint(ckpt, infname)
532 
533 INTEGER, INTENT(IN) :: ckpt
534 CHARACTER(3) :: infname
535 
536 CALL mpi_barrier(siesta_comm, mpi_err)
537 WRITE(90000+rank,*) 'CheckPoint: Location ',infname, ' @ pt',ckpt,'in rank', rank
538 CALL flush(90000+rank)
539 
540 END SUBROUTINE checkpoint
541 !-------------------------------------------------------------------------------
542 
543 !-------------------------------------------------------------------------------
544 SUBROUTINE writetime(lfname, ofname, ltagname, tagname, usedtime, spass)
545 
546 INTEGER, INTENT(IN) :: lfname, ltagname, spass
547 CHARACTER(len=lfname), INTENT(IN) :: ofname
548 CHARACTER(len=ltagname), INTENT(IN) :: tagname
549 DOUBLE PRECISION, INTENT(IN) :: usedtime
550 
551 IF (spass.EQ.-1) THEN
552  IF(sksdbg) WRITE(tofu,*) ofname," : ", tagname," : ", usedtime
553  IF(sksdbg) FLUSH(tofu)
554 ELSE
555  IF(sksdbg) WRITE(tofu,*) ofname," : ", tagname, " : ",usedtime, spass
556  IF(sksdbg) FLUSH(tofu)
557 END IF
558 
559 END SUBROUTINE writetime
560 !-------------------------------------------------------------------------------
561 
562 !-------------------------------------------------------------------------------
563 SUBROUTINE setuptomnspallgather()
564 INTEGER :: i
565 
566 setuptomnspallgatherdone=.false.
567 
568 IF (.NOT.ALLOCATED(mnspcounts)) ALLOCATE (mnspcounts(nranks))
569 IF (.NOT.ALLOCATED(mnspdisps)) ALLOCATE (mnspdisps(nranks))
570 
571 DO i=1,nranks
572  mnspcounts(i)=(bcyclicendblockproc(i)-bcyclicstartblockproc(i)+1)*linblksize
573 END DO
574 
575 mnspdisps(1)=0
576 DO i=2,nranks
577  mnspdisps(i)=mnspdisps(i-1)+mnspcounts(i-1)
578 END DO
579 
580 setuptomnspallgatherdone=.true.
581 
582 END SUBROUTINE setuptomnspallgather
583 !-------------------------------------------------------------------------------
584 
585 !-------------------------------------------------------------------------------
586 SUBROUTINE gettimes
587 USE timer_mod
588 
589 ! Get maximum
590 CALL mpi_allreduce(total_time,total_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
591 CALL mpi_allreduce(construct_hessian_time,construct_hessian_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
592 CALL mpi_allreduce(asymmetry_check_time,asymmetry_check_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
593 CALL mpi_allreduce(block_factorization_time,block_factorization_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
594 CALL mpi_allreduce(hessian_funct_island_time,hessian_funct_island_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
595 CALL mpi_allreduce(time_update_upperv,update_upperv_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
596 CALL mpi_allreduce(time_init_state,init_state_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
597 CALL mpi_allreduce(time_update_bfield,update_bfield_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
598 CALL mpi_allreduce(time_update_pres,update_pres_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
599 CALL mpi_allreduce(time_update_force,update_force_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
600 CALL mpi_allreduce(cv_current_time,cv_currents_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
601 CALL mpi_allreduce(get_force_harmonics_time,get_force_harmonics_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
602 CALL mpi_allreduce(bhtobf_time,bhtobf_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
603 CALL mpi_allreduce(tomnsp_time,tomnsp_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
604 CALL mpi_allreduce(toijsp_time,toijsp_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
605 CALL mpi_allreduce(to_full_mesh_time,to_full_mesh_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
606 CALL mpi_allreduce(gmres_time,gmres_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
607 CALL mpi_allreduce(gmres_wrap_time,gmres_wrap_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
608 CALL mpi_allreduce(paryax_time,paryax_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
609 CALL mpi_allreduce(sendrecv_time,sendrecv_time_max,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
610 
611 ! Get minimum
612 CALL mpi_allreduce(time_total,total_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
613 CALL mpi_allreduce(construct_hessian_time,construct_hessian_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
614 CALL mpi_allreduce(asymmetry_check_time,asymmetry_check_time_min,1,mpi_real8,mpi_max,siesta_comm,mpi_err)
615 CALL mpi_allreduce(block_factorization_time,block_factorization_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
616 CALL mpi_allreduce(hessian_funct_island_time,hessian_funct_island_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
617 CALL mpi_allreduce(time_update_upperv,update_upperv_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
618 CALL mpi_allreduce(time_init_state,init_state_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
619 CALL mpi_allreduce(time_update_bfield,update_bfield_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
620 CALL mpi_allreduce(time_update_pres,update_pres_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
621 CALL mpi_allreduce(time_update_force,update_force_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
622 CALL mpi_allreduce(cv_current_time,cv_currents_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
623 CALL mpi_allreduce(get_force_harmonics_time,get_force_harmonics_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
624 CALL mpi_allreduce(bhtobf_time,bhtobf_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
625 CALL mpi_allreduce(tomnsp_time,tomnsp_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
626 CALL mpi_allreduce(toijsp_time,toijsp_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
627 CALL mpi_allreduce(to_full_mesh_time,to_full_mesh_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
628 CALL mpi_allreduce(gmres_time,gmres_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
629 CALL mpi_allreduce(gmres_wrap_time,gmres_wrap_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
630 CALL mpi_allreduce(paryax_time,paryax_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
631 CALL mpi_allreduce(sendrecv_time,sendrecv_time_min,1,mpi_real8,mpi_min,siesta_comm,mpi_err)
632 
633 WRITE (5000+rank, *)' Total runtime :', time_total
634 WRITE (5000+rank, *)' *Time spent in init_data :', init_data_time
635 WRITE (5000+rank, *)' *Time spent in init_metric_elements :', init_metric_elements_time
636 WRITE (5000+rank, *)' **Time spent in init_timers :', init_timers_time
637 WRITE (5000+rank, *)' **Time spent in read_wout_file :', read_wout_file_time
638 WRITE (5000+rank, *)' **Time spent in Spline_Fourier_Modes :', spline_fourier_modes_time
639 WRITE (5000+rank, *)' **Time spent in Add_Ghost_Points :', add_ghost_points_time
640 WRITE (5000+rank, *)' **Time spent in Spline_OneD_Array :', spline_oned_array_time
641 WRITE (5000+rank, *)' **Time spent in LoadRZL_VMEC :', loadrzl_vmec_time
642 
643 WRITE (5000+rank, *)' *Time spent in test_fourier :', test_fourier_time
644 WRITE (5000+rank, *)' *Time spent in init_quantities :', init_quantities_time
645 WRITE (5000+rank, *)' *Time spent in init_evolution :', init_evolution_time
646 WRITE (5000+rank, *)' *Time spent in converge_diagonal :', converge_diagonal_time
647 WRITE (5000+rank, *)' *Time spent in comp_diag_elements :', comp_diag_elements_time
648 WRITE (5000+rank, *)' *Time spent in converge_blocks :', converge_blocks_time
649 
650 WRITE (5000+rank, *)' '
651 WRITE (5000+rank, *)' Time spent in converge_diagonal :', converge_diagonal_time
652 WRITE (5000+rank, *)' *diag_evolve :', diag_evolve_time
653 WRITE (5000+rank, *)' *diag_add_pert :', diag_add_pert_time
654 WRITE (5000+rank, *)' *Time spent in comp_diag_elements :', comp_diag_elements_time
655 
656 WRITE (5000+rank, *)' '
657 WRITE (5000+rank, *)' Time spent in converge_blocks :', converge_blocks_time
658 WRITE (5000+rank, *)' *block_evolve :', block_evolve_time
659 WRITE (5000+rank, *)' *block_add_pert :', block_add_pert_time
660 
661 WRITE (5000+rank, *)' '
662 WRITE (5000+rank, *)' Time spent in block_evolve :', block_evolve_time
663 WRITE (5000+rank, *)' *Time spent in Compute_Hessian_Blocks :', compute_hessian_time
664 WRITE (5000+rank, *)' *Time spent in evolve_funct_island :', evolve_funct_island_time
665 WRITE (5000+rank, *)' *Time spent in GMRES :', gmres_time
666 WRITE (5000+rank, *)' *Time spent in conj_grad :', conj_grad_time
667 WRITE (5000+rank, *)' *Time spent in evolve_restart_file_time :', evolve_restart_file_time
668 WRITE (5000+rank, *)' *Time spent in evolve_add_resistive_E_time :', evolve_add_resistive_e_time
669 
670 WRITE (5000+rank, *)' '
671 WRITE (5000+rank, *)' Time spent in Compute_Hessian_Blocks'
672 WRITE (5000+rank, *)' Time spent in Hessian_construction :', construct_hessian_time
673 WRITE (5000+rank, *)' Time spent in Hessian_assymetry :', asymmetry_check_time
674 WRITE (5000+rank, *)' Time spent in block_factorization :', block_factorization_time
675 
676 WRITE (5000+rank, *)' '
677 WRITE (5000+rank, *)' Time spent in hess_funct_island :', hessian_funct_island_time
678 WRITE (5000+rank, *)' *Time spent in update_upperv :', time_update_upperv
679 WRITE (5000+rank, *)' *Time spent in init_state :', time_init_state
680 WRITE (5000+rank, *)' *Time spent in update_bfield :', time_update_bfield
681 WRITE (5000+rank, *)' *Time spent in update_pres :', time_update_pres
682 WRITE (5000+rank, *)' *Time spent in update_force :', time_update_force
683 
684 WRITE (5000+rank, *)' '
685 WRITE (5000+rank, *)' Time spent in cv_currents :', cv_current_time
686 WRITE (5000+rank, *)' Time spent in get_force_harmonics :', get_force_harmonics_time
687 WRITE (5000+rank, *)' Time spent in bhtobf :', bhtobf_time
688 WRITE (5000+rank, *)' Time spent in toijsp :', toijsp_time
689 WRITE (5000+rank, *)' Time spent in tomnsp :', tomnsp_time
690 
691 WRITE (5000+rank, *)' '
692 WRITE (5000+rank, *)' Time spent in GMRES :', gmres_time
693 WRITE (5000+rank, *)' Time spent in gmres_funct_island :', gmres_funct_island_time
694 WRITE (5000+rank, *)' Time spent in gmres_init_dgmres :', gmres_init_dgmres_time
695 WRITE (5000+rank, *)' Time spent in gmres_wrap :', gmres_wrap_time
696 WRITE (5000+rank, *)' Time spent in gmresr :', gmresr_time
697 
698 WRITE (5000+rank, *)' '
699 WRITE (5000+rank, *)' Time spent in drive_dgmres :', drive_dgmres_time
700 WRITE (5000+rank, *)' Time spent in ParyAx :', paryax_time
701 WRITE (5000+rank, *)' Time spent in dcopy :', dcopy_time
702 WRITE (5000+rank, *)' Time spent in apply_precond_time :', time_apply_precon
703 WRITE (5000+rank, *)' Time spent in dgemv :', dgemv_time
704 WRITE (5000+rank, *)' Time spent in getnlforce :', getnlforce_time
705 WRITE (5000+rank, *)' Time spent in allgather :', gmres_wrap_allgather_time
706 WRITE (5000+rank, *)' Time spent in allreduce :', gmres_wrap_allreduce_time
707 
708 END SUBROUTINE gettimes
709 !-------------------------------------------------------------------------------
710 
711 END MODULE nscalingtools
v3_utilities::assert
Definition: v3_utilities.f:55
mpi_inc
Umbrella module avoid multiple inlcudes of the mpif.h header.
Definition: mpi_inc.f:11
blocktridiagonalsolver_s
Module contains subroutines for solver for block tri-diagonal matrices.
Definition: blocktridiagonalsolver_s.f90:7
shared_data::ndims
integer ndims
Number of independent variables.
Definition: shared_data.f90:58
shared_data
This file contains variables and parameters used by many modules in SIESTA.
Definition: shared_data.f90:10