2 MODULE parallel_vmec_module
9 INTEGER :: grank=0, gnranks=1
10 INTEGER :: vrank=0, vnranks=1
11 INTEGER :: rank=0, nranks=1
12 INTEGER :: last_ns = -1
13 INTEGER :: par_ns, mynsnum, par_nzeta, par_ntheta3
14 INTEGER :: par_ntmax, par_ntor, par_mpol1, par_nznt, par_nuv, par_nuv3
15 INTEGER :: blocksize, ntmaxblocksize
16 INTEGER :: tlglob, trglob, t1lglob, t1rglob
17 INTEGER :: nuvmin, nuvmax, nuv3min, nuv3max
20 INTEGER :: MPI_STAT(MPI_STATUS_SIZE)
22 INTEGER :: RUNVMEC_COMM_WORLD
25 INTEGER :: TWODCOMM, px, py
26 INTEGER :: NS_RESLTN=0
28 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: grid_procs
29 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: grid_size
30 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: grid_time
31 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: f3d_time
32 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: f3d_num
34 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: vgrid_time
36 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: tlglob_arr, trglob_arr
37 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: nuv3min_arr,nuv3max_arr
38 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: trow, brow
39 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: lcol, rcol
41 INTEGER,
ALLOCATABLE,
DIMENSION (:) :: blkrcounts, blkdisp
42 INTEGER,
ALLOCATABLE,
DIMENSION (:) :: ntblkrcounts, ntblkdisp
43 INTEGER,
ALLOCATABLE,
DIMENSION (:) :: nsrcounts, nsdisp
44 INTEGER,
PRIVATE :: mmax, nmax, tmax
46 LOGICAL :: PARVMEC=.false.
47 LOGICAL :: LV3FITCALL=.false.
48 LOGICAL :: LIFFREEB=.false.
49 LOGICAL :: LPRECOND=.false.
51 LOGICAL :: lactive=.false.
52 LOGICAL :: vlactive=.false.
54 CHARACTER*100 :: envvar
55 CHARACTER*100 :: envval
57 REAL(dp) :: bcyclic_comp_time
58 REAL(dp) :: bcyclic_comm_time
59 REAL(dp) :: waitall_time
60 REAL(dp) :: dgemm_time
61 REAL(dp) :: dgetrf_time
62 REAL(dp) :: dgetrs_time
63 REAL(dp) :: dgemv_time
64 REAL(dp) :: ForwardSolveLoop_time
65 REAL(dp),
DIMENSION(12) :: t
67 REAL(dp) :: allgather_time=0
68 REAL(dp) :: allreduce_time=0
69 REAL(dp) :: broadcast_time=0
70 REAL(dp) :: sendrecv_time=0
71 REAL(dp) :: scatter_time=0
77 MODULE PROCEDURE copy4lastntype, copy1lastntype, &
78 copym4lastntype, copym1lastntype
86 SUBROUTINE myenvvariables
90 CALL getenv(envvar,envval)
91 IF (envval.EQ.
'FALSE'.OR.envval.EQ.
'false' &
92 .OR.envval.EQ.
'F'.OR.envval.EQ.
'f')
THEN
98 CALL getenv(envvar,envval)
99 IF (envval.EQ.
'TRUE'.OR.envval.EQ.
'true' &
100 .OR.envval.EQ.
'T'.OR.envval.EQ.
't')
THEN
104 END SUBROUTINE myenvvariables
111 SUBROUTINE initializeparallel
114 CALL mpi_init(mpi_err)
115 CALL mpi_comm_rank(mpi_comm_world,grank,mpi_err)
116 CALL mpi_comm_size(mpi_comm_world,gnranks,mpi_err)
119 END SUBROUTINE initializeparallel
123 SUBROUTINE initrunvmec(INCOMM, LVALUE)
124 INTEGER,
INTENT(IN) :: INCOMM
125 LOGICAL,
INTENT(IN) :: LVALUE
130 CALL mpi_comm_dup(incomm,runvmec_comm_world,mpi_err)
131 CALL mpi_comm_rank(runvmec_comm_world,grank,mpi_err)
132 CALL mpi_comm_size(runvmec_comm_world,gnranks,mpi_err)
134 END SUBROUTINE initrunvmec
138 SUBROUTINE initsurfacecomm(ns, nzeta, ntheta3, ntmax, ntor, mpol1)
140 INTEGER,
INTENT(IN) :: ns, nzeta, ntheta3
141 INTEGER,
INTENT(IN) :: ntmax, ntor, mpol1
143 LOGICAL :: FIRSTPASS = .true.
147 par_ntheta3 = ntheta3
151 par_nznt = par_nzeta*par_ntheta3
152 blocksize = (par_mpol1 + 1)*(par_ntor+1)
153 ntmaxblocksize = 3*ntmax*blocksize
157 ns_resltn = ns_resltn + 1
160 IF (last_ns.NE.par_ns)
THEN
161 CALL setsurfacecommunicator
162 CALL setsurfacepartitions
163 CALL setsurfacepartitionarrays
167 CALL setsurfacecommunicator
168 CALL setsurfacepartitions
169 CALL setsurfacepartitionarrays
175 CALL setoutputfile(rank,nranks,
'parvmecinfo')
176 WRITE(tofu,*)
"============================================================"
177 WRITE(tofu,*)
'PARVMEC = ',parvmec
178 WRITE(tofu,*)
"> available processor count:", gnranks
179 WRITE(tofu,*)
'> global rank:', grank
180 WRITE(tofu,*)
'> nzeta: ', par_nzeta
181 WRITE(tofu,*)
'> ntheta3: ', par_ntheta3
182 WRITE(tofu,*)
'> ntor: ', par_ntor
183 WRITE(tofu,*)
'> mpol1: ', par_mpol1
184 WRITE(tofu,*)
'> ntmax: ', par_ntmax
185 WRITE(tofu,*)
'> blocksize: ',(par_mpol1+1)*(par_ntor+1)
186 WRITE(tofu,*)
"============================================================"
191 WRITE(tofu,*)
">>> grid resolution: ",par_ns
192 WRITE(tofu,*)
">>> active processors: ",nranks
193 WRITE(tofu,*)
">>> xc/gc size: ", par_ns*(par_ntor+1)*(par_mpol1+1)*3*ntmax
194 WRITE(tofu,*)
"------------------------------------------------------------"
201 END SUBROUTINE initsurfacecomm
208 SUBROUTINE setsurfacecommunicator
210 INTEGER :: num_active, color
212 num_active = min(gnranks, par_ns/2)
214 IF (grank.LT.num_active)
THEN
220 CALL mpi_comm_split(runvmec_comm_world, color, grank, ns_comm, &
223 IF (color .eq. 1)
THEN
224 CALL mpi_comm_size(ns_comm,nranks,mpi_err)
225 IF (nranks.NE.num_active)
THEN
226 stop
'num_active != nranks in InitSurfaceCommunicator!'
229 CALL mpi_comm_rank(ns_comm,rank,mpi_err)
236 END SUBROUTINE setsurfacecommunicator
243 SUBROUTINE setsurfacepartitions
245 INTEGER :: mypart, local_err
247 IF (par_ns.LT.nranks)
THEN
248 IF(grank.EQ.0) print *,
"NS is less than NRANKS. Aborting!"
253 IF (rank.LT.mod(par_ns,nranks)) mypart = mypart + 1
254 IF (mod(par_ns,nranks).NE.0)
THEN
255 IF (rank.LT.mod(par_ns,nranks))
THEN
257 ELSE IF (rank .GE. mod(par_ns,nranks))
THEN
258 tlglob = mod(par_ns, nranks)*(mypart + 1) &
259 + (rank - mod(par_ns, nranks))*mypart
266 trglob = tlglob + mypart - 1
268 t1lglob = tlglob - 1;
IF (rank.EQ.0) t1lglob=1
269 t1rglob = trglob + 1;
IF (rank.EQ.nranks-1) t1rglob=par_ns
272 CALL mpi_barrier(ns_comm,mpi_err)
273 WRITE(tofu,*)
'***********************************************************'
274 WRITE(tofu,*)
'* This version is not yet tested for mynsnum <= 2. Aborting!'
275 WRITE(tofu,*)
'***********************************************************'
278 WRITE(*,*)
'***********************************************************'
279 WRITE(*,*)
'* This version is not yet tested for mynsnum <= 2. Aborting!'
280 WRITE(*,*)
'***********************************************************'
283 CALL mpi_abort(ns_comm,local_err,mpi_err)
286 END SUBROUTINE setsurfacepartitions
293 SUBROUTINE setsurfacepartitionarrays
295 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: localpart
296 INTEGER :: i, smallpart, largepart
297 INTEGER,
SAVE :: lastns
299 smallpart = par_ns/nranks; largepart = smallpart
300 IF(mod(par_ns,nranks) .GT. 0) largepart = largepart + 1
302 ALLOCATE (localpart(nranks))
304 localpart(i + 1) = smallpart
306 IF (mod(par_ns,nranks) .GT. 0 .and. &
307 i .LT. mod(par_ns, nranks))
THEN
308 localpart(i + 1) = localpart(i + 1) + 1
312 IF(
ALLOCATED(tlglob_arr))
DEALLOCATE(tlglob_arr)
313 IF(
ALLOCATED(trglob_arr))
DEALLOCATE(trglob_arr)
314 ALLOCATE (tlglob_arr(nranks), trglob_arr(nranks))
318 tlglob_arr(i) = tlglob_arr(i - 1) + localpart(i - 1)
321 trglob_arr(i) = tlglob_arr(i) + localpart(i) - 1
324 DEALLOCATE (localpart)
326 CALL computensallgatherparameters(nranks)
327 CALL computeblockallgatherparameters(nranks)
328 CALL computentmaxblockallgatherparameters(nranks)
330 END SUBROUTINE setsurfacepartitionarrays
337 SUBROUTINE computentmaxblockallgatherparameters(activeranks)
339 INTEGER :: activeranks
341 IF(.NOT.
ALLOCATED(ntblkrcounts))
ALLOCATE(ntblkrcounts(activeranks))
342 IF(.NOT.
ALLOCATED(ntblkdisp))
ALLOCATE(ntblkdisp(activeranks))
343 DO i = 1, activeranks
344 ntblkrcounts(i) = (trglob_arr(i) - tlglob_arr(i) + 1)*ntmaxblocksize
347 DO i = 2, activeranks
348 ntblkdisp(i) = ntblkdisp(i - 1) + ntblkrcounts(i - 1)
350 END SUBROUTINE computentmaxblockallgatherparameters
357 SUBROUTINE computeblockallgatherparameters(activeranks)
359 INTEGER :: activeranks
361 IF(.NOT.
ALLOCATED(blkrcounts))
ALLOCATE(blkrcounts(activeranks))
362 IF(.NOT.
ALLOCATED(blkdisp))
ALLOCATE(blkdisp(activeranks))
363 DO i = 1, activeranks
364 blkrcounts(i) = (trglob_arr(i) - tlglob_arr(i) + 1)*blocksize
367 DO i = 2, activeranks
368 blkdisp(i) = blkdisp(i - 1) + blkrcounts(i - 1)
370 END SUBROUTINE computeblockallgatherparameters
377 SUBROUTINE computensallgatherparameters(activeranks)
379 INTEGER :: activeranks
381 IF(.NOT.
ALLOCATED(nsrcounts))
ALLOCATE(nsrcounts(activeranks))
382 IF(.NOT.
ALLOCATED(nsdisp))
ALLOCATE(nsdisp(activeranks))
383 DO i = 1, activeranks
384 nsrcounts(i) = (trglob_arr(i) - tlglob_arr(i) + 1)
387 DO i = 2, activeranks
388 nsdisp(i) = nsdisp(i - 1) + nsrcounts(i - 1)
390 END SUBROUTINE computensallgatherparameters
394 SUBROUTINE finalizesurfacecomm(INCOMM)
396 INTEGER,
INTENT(INOUT) :: INCOMM
398 CALL mpi_comm_free(incomm, mpi_err)
400 IF (
ALLOCATED(ntblkrcounts))
DEALLOCATE(ntblkrcounts)
401 IF (
ALLOCATED(ntblkdisp))
DEALLOCATE(ntblkdisp)
402 IF (
ALLOCATED(blkrcounts))
DEALLOCATE(blkrcounts)
403 IF (
ALLOCATED(blkdisp))
DEALLOCATE(blkdisp)
404 IF (
ALLOCATED(nsrcounts))
DEALLOCATE(nsrcounts)
405 IF (
ALLOCATED(nsdisp))
DEALLOCATE(nsdisp)
407 END SUBROUTINE finalizesurfacecomm
411 SUBROUTINE finalizerunvmec(INCOMM)
413 INTEGER,
INTENT(INOUT) :: INCOMM
415 CALL mpi_comm_free(incomm, mpi_err)
416 IF(liffreeb)
CALL mpi_comm_free(vac_comm,mpi_err)
420 IF (.not.lv3fitcall) par_ns = 0
430 END SUBROUTINE finalizerunvmec
439 SUBROUTINE setvacuumcommunicator(nuv, nuv3, mgs)
441 INTEGER,
INTENT(IN) :: nuv, nuv3, mgs
442 INTEGER :: num_active, color, mypart
447 num_active = min(gnranks, par_nuv3)
449 IF(grank .LT. num_active)
THEN
454 CALL mpi_comm_split(runvmec_comm_world, color, grank, vac_comm, &
456 IF (color .eq. 1)
THEN
457 CALL mpi_comm_rank(vac_comm,vrank,mpi_err)
458 CALL mpi_comm_size(vac_comm,vnranks,mpi_err)
459 CALL setvacuumpartitions(nuv3,nuv3min, nuv3max)
460 CALL setnuv3partitionarrays
471 END SUBROUTINE setvacuumcommunicator
479 SUBROUTINE setvacuumpartitions(num, left, right)
481 INTEGER,
INTENT(IN) :: num
482 INTEGER,
INTENT(INOUT) :: left, right
485 IF (num.LT.vnranks)
THEN
486 IF(grank.EQ.0) print *,
"NUM is less than VNRANKS. Aborting!"
491 IF (vrank .LT. mod(num,vnranks)) mypart = mypart + 1
492 IF (mod(num,vnranks).NE.0)
THEN
493 IF (vrank.LT.mod(num,vnranks))
THEN
495 ELSE IF (vrank.GE.mod(num,vnranks))
THEN
496 left = mod(num,vnranks)*(mypart + 1) &
497 + (vrank - mod(num,vnranks))*mypart
504 right = left + mypart - 1
506 END SUBROUTINE setvacuumpartitions
513 SUBROUTINE setnuv3partitionarrays
515 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: localpart
516 INTEGER :: i, smallpart, largepart
518 IF (.NOT.
ALLOCATED(nuv3min_arr))
THEN
519 ALLOCATE(nuv3min_arr(vnranks), nuv3max_arr(vnranks))
522 smallpart = par_nuv3/vnranks
523 largepart = smallpart
524 IF (mod(par_nuv3,vnranks).GT.0)
THEN
525 largepart = largepart+1
528 ALLOCATE (localpart(vnranks))
529 DO i = 0, vnranks - 1
531 localpart(i + 1) = smallpart
533 IF (mod(par_nuv3, vnranks) .GT. 0 .AND. &
534 i .LT. mod(par_nuv3, vnranks))
THEN
535 localpart(i + 1) = localpart(i + 1) + 1
541 nuv3min_arr(i) = nuv3min_arr(i - 1) + localpart(i - 1)
544 nuv3max_arr(i) = nuv3min_arr(i) + localpart(i) - 1
547 DEALLOCATE (localpart)
549 END SUBROUTINE setnuv3partitionarrays
553 SUBROUTINE finalizeparallel
558 CALL getenv(envvar, envval)
559 IF (envval .EQ.
'TRUE')
THEN
561 ELSE IF (envval .EQ.
'FALSE')
THEN
567 WRITE(*,
'(1x,a,i4)')
'NO. OF PROCS: ',gnranks
568 WRITE(*,100)
'PARVMEC : ',parvmec
569 WRITE(*,100)
'LPRECOND : ',lprecond
570 WRITE(*,100)
'LV3FITCALL : ',lv3fitcall
575 CALL mpi_finalize(istat)
577 END SUBROUTINE finalizeparallel
583 SUBROUTINE printnsarray(arr, left, right, fileno, ifstop, string)
585 INTEGER,
INTENT(IN) :: fileno, left, right
586 LOGICAL,
INTENT(IN) :: ifstop
587 REAL(dp),
DIMENSION (par_ns + 1),
INTENT(IN) :: arr
588 CHARACTER*(*),
INTENT(IN) :: string
597 WRITE(fileno + rank,50) string, i, arr(i)
598 CALL flush(fileno+rank)
601 CALL flush(fileno+rank)
602 IF(ifstop) stop
'STOPPED CODE'
604 50
FORMAT(a6,1i6,1p,e20.6)
606 END SUBROUTINE printnsarray
610 SUBROUTINE stopmpi(code)
612 INTEGER,
INTENT(IN) :: code
615 CALL mpi_barrier(mpi_comm_world, mpi_err)
617 WRITE(*,*)
'Stopping program with code:', code
619 END SUBROUTINE stopmpi
623 SUBROUTINE parallel2serial4x(inarr,outarr)
625 REAL(dp),
INTENT(IN) :: inarr(par_ns*(par_ntor+1)*(par_mpol1+1)*3*(par_ntmax))
626 REAL(dp),
INTENT(OUT) :: outarr(par_ns*(par_ntor+1)*(par_mpol1+1)*3*(par_ntmax))
627 INTEGER :: i, j, k, l, lk, lks, lkp
635 lkp = j + (par_ntor + 1)*k &
636 + (par_ntor + 1)*(par_mpol1 + 1)*(i - 1) &
637 + (par_ntor + 1)*(par_mpol1 + 1)*par_ns*(l - 1) + 1
638 outarr(lks) = inarr(lkp)
644 END SUBROUTINE parallel2serial4x
648 SUBROUTINE serial2parallel4x (inarr,outarr)
650 REAL(dp),
INTENT(IN) :: inarr(par_ns*(par_ntor+1)*(par_mpol1+1)*3*(par_ntmax))
651 REAL(dp),
INTENT(OUT) :: outarr(par_ns*(par_ntor+1)*(par_mpol1+1)*3*(par_ntmax))
652 INTEGER :: i, j, k, l, lk, lks, lkp
660 lkp = j + (par_ntor + 1)*k &
661 + (par_ntor + 1)*(par_mpol1 + 1)*(i - 1) &
662 + (par_ntor + 1)*(par_mpol1 + 1)*par_ns*(l - 1) + 1
663 outarr(lkp) = inarr(lks)
669 END SUBROUTINE serial2parallel4x
673 SUBROUTINE parallel2serial2x(inarr, outarr)
675 REAL(dp),
DIMENSION(par_nzeta,par_ntheta3,par_ns),
INTENT(IN) :: inarr
676 REAL(dp),
DIMENSION(par_ns*par_nzeta*par_ntheta3+1),
INTENT(OUT) :: outarr
677 INTEGER :: i, j, k, l
680 DO k = 1, par_ntheta3
684 outarr(l) = inarr(j,k,i)
689 END SUBROUTINE parallel2serial2x
693 SUBROUTINE rprintoutlineararray(arr, left, right, flag, fileno)
695 REAL(dp),
DIMENSION(:) :: arr
696 INTEGER,
INTENT(IN) :: fileno, left, right
697 LOGICAL,
INTENT(IN) :: flag
698 INTEGER :: i, j, k, l, lk
700 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: tmp
701 ALLOCATE(tmp(ntmaxblocksize*par_ns))
703 CALL tolastntype(arr,tmp)
705 DO l = 1, 3*par_ntmax
711 lk = j + nmax*(k + mmax*((i - 1) + par_ns*(l - 1))) + 1
713 IF (left .LE. i .AND. i .LE. right)
THEN
714 WRITE(fileno+rank,50) i, j, k, l, tmp(lk)
715 CALL flush(fileno+rank)
723 50
FORMAT(4i6,1p,e24.14)
725 END SUBROUTINE rprintoutlineararray
730 SUBROUTINE printoutlineararray(arr, left, right, flag, fileno)
732 REAL(dp),
DIMENSION(ntmaxblocksize*par_ns) :: arr
733 INTEGER,
INTENT(IN) :: fileno, left, right
734 LOGICAL,
INTENT(IN) :: flag
735 INTEGER :: i, j, k, l, lk
738 DO l = 1, 3*par_ntmax
744 lk = j + nmax*(k + mmax*((i - 1) + par_ns*(l-1)))+1
746 IF (left .LE. i .AND. i .LE. right)
THEN
747 WRITE(fileno+rank,50) i, j, k, l, arr(lk)
748 CALL flush(fileno+rank)
755 50
FORMAT(4i6,1p,e24.14)
757 END SUBROUTINE printoutlineararray
763 SUBROUTINE printparallelijsparray (arr, left, right, fileno, ifstop, string)
765 INTEGER,
INTENT(IN) :: fileno, left, right
766 LOGICAL,
INTENT(IN) :: ifstop
767 REAL(dp),
DIMENSION (par_nzeta,par_ntheta3,par_ns),
INTENT(IN) :: arr
768 CHARACTER*(*),
INTENT(IN) :: string
775 WRITE(fileno+rank,50) string, i, j, k, arr(j,k,i)
776 CALL flush(fileno+rank)
781 CALL flush(fileno+rank)
782 IF(ifstop) stop
'STOPPED PARALLEL CODE'
783 50
FORMAT(a,3i6,1p,e24.14)
785 END SUBROUTINE printparallelijsparray
791 SUBROUTINE printserialijsparray (arr, left, right, fileno, ifstop, string)
793 INTEGER,
INTENT(IN) :: fileno, left, right
794 LOGICAL,
INTENT(IN) :: ifstop
795 REAL(dp),
DIMENSION (par_ns,par_nzeta,par_ntheta3),
INTENT(IN) :: arr
796 CHARACTER*(*),
INTENT(IN) :: string
802 DO k = 1, par_ntheta3
803 WRITE(fileno+rank,50) string, i, j, k, arr(i, j, k)
804 CALL flush(fileno+rank)
809 CALL flush(fileno+rank)
810 IF(ifstop) stop
'STOPPED SERIAL CODE'
811 50
FORMAT(a,3i6,1p,e24.14)
813 END SUBROUTINE printserialijsparray
819 SUBROUTINE printparallelmnsparray (arr, left, right, fileno, ifstop, string)
821 INTEGER,
INTENT(IN) :: fileno, left, right
822 LOGICAL,
INTENT(IN) :: ifstop
823 REAL(dp),
DIMENSION (0:par_ntor,0:par_mpol1,1:par_ns),
INTENT(IN) :: arr
824 CHARACTER*(*),
INTENT(IN) :: string
831 WRITE(fileno+rank,50) string, i, j, k, arr(k,j,i)
832 CALL flush(fileno+rank)
837 CALL flush(fileno+rank)
838 IF(ifstop) stop
'STOPPED PARALLEL CODE'
839 50
FORMAT(a,3i6,1p,e20.12)
841 END SUBROUTINE printparallelmnsparray
847 SUBROUTINE printserialmnsparray (arr, left, right, fileno, ifstop, string)
849 INTEGER,
INTENT(IN) :: fileno, left, right
850 LOGICAL,
INTENT(IN) :: ifstop
851 REAL(dp),
DIMENSION (1:par_ns,0:par_ntor,0:par_mpol1),
INTENT(IN) :: arr
852 CHARACTER*(*),
INTENT(IN) :: string
859 WRITE(fileno+rank,50) string, i, j, k, arr(i, j, k)
860 CALL flush(fileno+rank)
865 CALL flush(fileno+rank)
866 IF(ifstop) stop
'STOPPED SERIAL CODE'
867 50
FORMAT(a,3i6,1p,e20.12)
869 END SUBROUTINE printserialmnsparray
873 SUBROUTINE gather4xarray(arr)
877 REAL(dp),
DIMENSION(0:par_ntor,0:par_mpol1,par_ns,3*par_ntmax),
INTENT(INOUT) :: arr
879 INTEGER :: blksize, numjs
880 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: counts, disps
881 REAL(dp),
ALLOCATABLE,
DIMENSION(:,:,:,:) :: sendbuf
882 REAL(dp),
ALLOCATABLE,
DIMENSION(:) :: recvbuf
883 REAL(dp) :: allgvton, allgvtoff
885 IF (nranks.EQ.1 .OR. .NOT.lactive)
THEN
889 CALL second0(allgvton)
891 blksize = (par_ntor + 1)*(par_mpol1 + 1)*3*par_ntmax
892 numjs = trglob - tlglob + 1
893 ALLOCATE(sendbuf(0:par_ntor,0:par_mpol1,numjs,1:3*par_ntmax))
894 ALLOCATE(recvbuf(par_ns*blksize))
895 ALLOCATE(counts(nranks),disps(nranks))
898 counts(i) = (trglob_arr(i) - tlglob_arr(i) + 1)*blksize
903 disps(i) = disps(i - 1) + counts(i - 1)
906 sendbuf(0:par_ntor,0:par_mpol1,1:numjs,1:3*par_ntmax) = &
907 arr(0:par_ntor,0:par_mpol1,tlglob:trglob,1:3*par_ntmax)
908 CALL mpi_allgatherv(sendbuf, numjs*blksize, mpi_real8, recvbuf, &
909 counts, disps, mpi_real8, ns_comm, mpi_err)
911 numjs = trglob_arr(i) - tlglob_arr(i) + 1
912 arr(0:par_ntor,0:par_mpol1,tlglob_arr(i):trglob_arr(i),1:3*par_ntmax) = &
913 reshape(recvbuf(disps(i)+1:disps(i)+counts(i)), &
914 (/par_ntor+1,par_mpol1+1,numjs,3*par_ntmax/))
917 DEALLOCATE(sendbuf, recvbuf)
918 DEALLOCATE(counts, disps)
919 CALL second0(allgvtoff)
920 allgather_time = allgather_time + (allgvtoff-allgvton)
921 END SUBROUTINE gather4xarray
927 SUBROUTINE gatherreordered4xarray(arr)
931 REAL(dp),
DIMENSION(0:par_ntor,0:par_mpol1,3*par_ntmax,par_ns),
INTENT(INOUT) :: arr
933 INTEGER :: blksize, numjs
934 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: counts, disps
935 REAL(dp) :: allgvton, allgvtoff
937 IF (nranks.EQ.1 .OR. .NOT.lactive)
THEN
941 CALL second0(allgvton)
943 blksize = (par_ntor + 1)*(par_mpol1 + 1)*3*par_ntmax
944 numjs = trglob - tlglob + 1
945 ALLOCATE(counts(nranks),disps(nranks))
948 counts(i) = (trglob_arr(i) - tlglob_arr(i) + 1)*blksize
953 disps(i) = disps(i - 1) + counts(i - 1)
956 CALL mpi_allgatherv(mpi_in_place, numjs*blksize, mpi_real8, arr, &
957 counts, disps, mpi_real8, ns_comm, mpi_err)
959 DEALLOCATE(counts, disps)
960 CALL second0(allgvtoff)
961 allgather_time = allgather_time + (allgvtoff-allgvton)
962 END SUBROUTINE gatherreordered4xarray
967 SUBROUTINE gather2xarray(arr)
969 REAL(dp),
DIMENSION(par_nznt,par_ns),
INTENT(INOUT) :: arr
971 INTEGER :: par_nsmin, par_nsmax, blksize, numjs
972 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: counts, disps
973 REAL(dp) :: allgvton, allgvtoff
975 IF (nranks.EQ.1 .OR. .NOT.lactive)
THEN
979 CALL second0(allgvton)
982 numjs = trglob - tlglob+1
983 ALLOCATE(counts(nranks),disps(nranks))
986 counts(i) =(trglob_arr(i) - tlglob_arr(i) + 1)*blksize
991 disps(i) = disps(i - 1) + counts(i - 1)
995 CALL mpi_allgatherv(mpi_in_place, numjs*blksize, mpi_real8, arr, &
996 counts, disps, mpi_real8, ns_comm, mpi_err)
998 DEALLOCATE(counts, disps)
999 CALL second0(allgvtoff)
1000 allgather_time = allgather_time + (allgvtoff-allgvton)
1001 END SUBROUTINE gather2xarray
1005 SUBROUTINE gather1xarray(arr)
1007 REAL(dp),
DIMENSION(par_ns),
INTENT(INOUT) :: arr
1010 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: counts, disps
1011 REAL(dp) :: allgvton, allgvtoff
1013 IF (nranks.EQ.1 .OR. .NOT.lactive)
THEN
1017 CALL second0(allgvton)
1019 numjs = trglob - tlglob + 1
1020 ALLOCATE(counts(nranks),disps(nranks))
1023 counts(i) = (trglob_arr(i) - tlglob_arr(i) + 1)
1028 disps(i)= disps(i - 1) +counts(i - 1)
1031 #if defined(MPI_OPT)
1032 CALL mpi_allgatherv(mpi_in_place, numjs, mpi_real8, arr, counts, &
1033 disps, mpi_real8, ns_comm, mpi_err)
1035 DEALLOCATE(counts, disps)
1036 CALL second0(allgvtoff)
1037 allgather_time = allgather_time + (allgvtoff - allgvton)
1038 END SUBROUTINE gather1xarray
1047 SUBROUTINE setoutputfile(iam, nprocs, prefix)
1049 INTEGER,
INTENT(IN) :: iam, nprocs
1050 CHARACTER (*),
INTENT(IN) :: prefix
1052 CHARACTER(100) :: fname, cfname
1053 CHARACTER(50) :: ciam, cnprocs
1054 CHARACTER(25) :: cprefix
1057 WRITE(cnprocs,*) nprocs
1058 WRITE(cprefix,*) prefix
1059 ciam = adjustl(ciam)
1060 cnprocs = adjustl(cnprocs)
1061 cprefix = adjustl(cprefix)
1062 tofu = 4*nprocs + iam + 1000
1064 fname=trim(cprefix)//
'.txt'
1065 OPEN(unit=tofu, file=fname, status=
"REPLACE", action=
"WRITE", &
1066 form=
"FORMATTED",position=
"APPEND", iostat=istat)
1068 END SUBROUTINE setoutputfile
1072 SUBROUTINE tolastns(inarr, outarr)
1073 REAL(dp),
INTENT(IN),
DIMENSION(0:par_ntor,0:par_mpol1,par_ns,3*par_ntmax) :: &
1075 INTEGER :: i, j, k, l, cnt
1076 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize,par_ns) :: outarr
1078 DO i = t1lglob, t1rglob
1080 DO l = 1, 3*par_ntmax
1084 outarr(cnt,i) = inarr(j,k,i,l)
1090 END SUBROUTINE tolastns
1094 SUBROUTINE tolastntype(inarr, outarr)
1095 REAL(dp),
INTENT(INOUT),
DIMENSION(0:par_ntor,0:par_mpol1,par_ns,3*par_ntmax) :: outarr
1096 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,par_ns) :: inarr
1097 INTEGER :: i, j, k, l, cnt
1099 DO i=t1lglob, t1rglob
1105 outarr(j,k,i,l)=inarr(cnt,i)
1110 END SUBROUTINE tolastntype
1116 SUBROUTINE copylastns(a1, a2)
1117 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,par_ns) :: a1
1118 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize,par_ns) :: a2
1119 a2(:,t1lglob:t1rglob) = a1(:,t1lglob:t1rglob)
1120 END SUBROUTINE copylastns
1124 SUBROUTINE copy1lastntype(a1, a2)
1125 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize*par_ns) :: a1
1126 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize*par_ns) :: a2
1127 CALL copy4lastntype(a1, a2)
1128 END SUBROUTINE copy1lastntype
1132 SUBROUTINE copy4lastntype(a1, a2)
1133 REAL(dp),
INTENT(IN),
DIMENSION(0:par_ntor,0:par_mpol1,par_ns,3*par_ntmax) :: a1
1134 REAL(dp),
INTENT(INOUT),
DIMENSION(0:par_ntor,0:par_mpol1,par_ns,3*par_ntmax) :: a2
1135 a2(:,:,t1lglob:t1rglob,:) = a1(:,:,t1lglob:t1rglob,:)
1136 END SUBROUTINE copy4lastntype
1140 SUBROUTINE copym1lastntype(a1, a2, d1)
1141 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize*par_ns) :: a1
1142 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize*par_ns) :: a2
1143 REAL(dp),
INTENT(IN) :: d1
1144 CALL copym4lastntype(a1, a2, d1)
1145 END SUBROUTINE copym1lastntype
1149 SUBROUTINE copym4lastntype(a1, a2, d1)
1150 REAL(dp),
INTENT(IN),
DIMENSION(0:par_ntor,0:par_mpol1,par_ns,3*par_ntmax) :: a1
1151 REAL(dp),
INTENT(INOUT),
DIMENSION(0:par_ntor,0:par_mpol1,par_ns,3*par_ntmax) :: a2
1152 REAL(dp),
INTENT(IN) :: d1
1153 a2(:,:,t1lglob:t1rglob,:) = d1*a1(:,:,t1lglob:t1rglob,:)
1154 END SUBROUTINE copym4lastntype
1158 SUBROUTINE saxlastns(a, x, vec)
1159 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,par_ns) :: a, x
1160 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize,par_ns) :: vec
1161 vec(:,tlglob:trglob) = a(:,tlglob:trglob)*x(:,tlglob:trglob)
1162 END SUBROUTINE saxlastns
1166 SUBROUTINE saxlastns1(a, x, vec)
1167 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,tlglob:trglob) :: a
1168 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,par_ns) :: x
1169 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize,tlglob:trglob) :: vec
1170 vec(:,tlglob:trglob) = a(:,tlglob:trglob)*x(:,tlglob:trglob)
1171 END SUBROUTINE saxlastns1
1174 SUBROUTINE saxlastntype(a, x, vec)
1175 REAL(dp),
INTENT(IN),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: a, x
1176 REAL(dp),
INTENT(INOUT),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: vec
1177 vec(:,t1lglob:t1rglob,:) = a(:,t1lglob:t1rglob,:)*x(:,t1lglob:t1rglob,:)
1178 END SUBROUTINE saxlastntype
1182 SUBROUTINE saxpbylastntype(a, x, b, y, vec)
1183 REAL(dp),
INTENT(IN),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: x, y
1184 REAL(dp),
INTENT(INOUT),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: vec
1185 REAL(dp),
INTENT(IN) :: a, b
1186 vec(:,t1lglob:t1rglob,:) = a*x(:,t1lglob:t1rglob,:) + b*y(:,t1lglob:t1rglob,:)
1187 END SUBROUTINE saxpbylastntype
1191 SUBROUTINE saxpbylastns(a, x, b, y, vec)
1192 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,tlglob:trglob) :: x
1193 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,par_ns) :: y
1194 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize,par_ns) :: vec
1195 REAL(dp),
INTENT(IN) :: a, b
1196 vec(:,tlglob:trglob) = a*x(:,tlglob:trglob) + b*y(:,tlglob:trglob)
1197 END SUBROUTINE saxpbylastns
1201 SUBROUTINE saxpby1lastns(a, x, b, y, vec)
1202 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,par_ns) :: x, y
1203 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize,par_ns) :: vec
1204 REAL(dp),
INTENT(IN) :: a, b
1205 vec(:,tlglob:trglob) = a*x(:,tlglob:trglob) + b*y(:,tlglob:trglob)
1206 END SUBROUTINE saxpby1lastns
1210 SUBROUTINE getderivlastns(x1, x2, d1, vec)
1211 REAL(dp),
INTENT(IN),
DIMENSION(ntmaxblocksize,par_ns) :: x1, x2
1212 REAL(dp),
INTENT(INOUT),
DIMENSION(ntmaxblocksize,tlglob:trglob) :: vec
1213 REAL(dp),
INTENT(IN) :: d1
1214 IF (d1 .eq. 0)
RETURN
1215 vec = (x1(:,tlglob:trglob) - x2(:,tlglob:trglob))/d1
1216 END SUBROUTINE getderivlastns
1220 SUBROUTINE zerolastntype(a1)
1221 REAL(dp),
INTENT(INOUT),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: a1
1222 a1(:,t1lglob:t1rglob,:) = 0
1223 END SUBROUTINE zerolastntype
1227 SUBROUTINE copyparallellinearsubarray(fromarr,toarr, first, last)
1228 INTEGER,
INTENT(IN) :: first, last
1229 REAL(dp),
INTENT(IN),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: fromarr
1230 REAL(dp),
INTENT(INOUT),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: toarr
1231 toarr(:,first:last,:) = fromarr(:,first:last,:)
1232 END SUBROUTINE copyparallellinearsubarray
1236 SUBROUTINE padsides(arr)
1237 REAL(dp),
INTENT(INOUT),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: arr
1238 INTEGER :: left, right, tag1=1
1239 REAL(dp) :: ton, toff
1243 #if defined(MPI_OPT)
1244 left = rank - 1;
IF(rank.EQ.0) left = mpi_proc_null
1245 right = rank + 1;
IF(rank.EQ.nranks-1) right = mpi_proc_null
1246 CALL mpi_sendrecv(arr(:,tlglob,:), ntmaxblocksize, mpi_real8, &
1247 left, tag1, arr(:,t1rglob,:), ntmaxblocksize, mpi_real8, &
1248 right, tag1, ns_comm, mpi_stat,mpi_err)
1249 CALL mpi_sendrecv(arr(:,trglob,:),ntmaxblocksize,mpi_real8, &
1250 right, tag1, arr(:,t1lglob,:), ntmaxblocksize,mpi_real8, &
1251 left, tag1, ns_comm, mpi_stat,mpi_err)
1255 sendrecv_time = sendrecv_time + (toff - ton)
1257 END SUBROUTINE padsides
1261 SUBROUTINE padsides1x(arr)
1262 REAL(dp),
INTENT(INOUT),
DIMENSION(par_ns) :: arr
1263 INTEGER :: left, right, tag1
1264 REAL(dp) :: ton, toff
1270 #if defined(MPI_OPT)
1271 left=rank-1;
IF(rank.EQ.0) left = mpi_proc_null
1272 right=rank+1;
IF(rank.EQ.nranks - 1) right = mpi_proc_null
1274 IF (grank .LT. nranks)
THEN
1275 CALL mpi_sendrecv(arr(tlglob), 1, mpi_real8, left, tag1, &
1276 arr(t1rglob), 1, mpi_real8, right, tag1, ns_comm, &
1282 sendrecv_time = sendrecv_time + (toff - ton)
1284 END SUBROUTINE padsides1x
1288 SUBROUTINE compareedgevalues(pxc, pxsave)
1289 REAL(dp),
INTENT(IN),
DIMENSION(blocksize,par_ns,3*par_ntmax) :: pxc, pxsave
1291 IF (rank .EQ. nranks - 1 .AND. &
1292 any(pxc(:,par_ns,:) .NE. pxsave(:,par_ns,:)))
THEN
1293 print *,
' xsave != xc at edge returning from GMRES'
1296 END SUBROUTINE compareedgevalues
1299 END MODULE parallel_vmec_module