V3FIT
parallel_include_module.f90
1  MODULE parallel_include_module
2 
3  USE stel_kinds
4  USE vmec_input, ONLY:lfreeb
5  USE mpi_inc
6 
7  USE parallel_vmec_module, ONLY: parvmec
8  USE parallel_vmec_module, ONLY: lv3fitcall
9  USE parallel_vmec_module, ONLY: lprecond
10  USE parallel_vmec_module, ONLY: tofu
11 
12  USE parallel_vmec_module, ONLY: num_grids
13  USE parallel_vmec_module, ONLY: grid_procs
14  USE parallel_vmec_module, ONLY: grid_size
15  USE parallel_vmec_module, ONLY: grid_time
16  USE parallel_vmec_module, ONLY: vgrid_time
17  USE parallel_vmec_module, ONLY: f3d_time
18  USE parallel_vmec_module, ONLY: f3d_num
19  USE parallel_vmec_module, ONLY: runvmec_comm_world
20  USE parallel_vmec_module, ONLY: ns_comm
21  USE parallel_vmec_module, ONLY: vac_comm
22  USE parallel_vmec_module, ONLY: twodcomm
23  USE parallel_vmec_module, ONLY: px, py
24  USE parallel_vmec_module, ONLY: ns_resltn
25  USE parallel_vmec_module, ONLY: mpi_err
26  USE parallel_vmec_module, ONLY: rank, nranks
27  USE parallel_vmec_module, ONLY: lactive, vlactive
28  USE parallel_vmec_module, ONLY: grank, gnranks
29  USE parallel_vmec_module, ONLY: vrank, vnranks
30  USE parallel_vmec_module, ONLY: nuvmin, nuvmax
31  USE parallel_vmec_module, ONLY: nuv3min, nuv3max
32  USE parallel_vmec_module, ONLY: tlglob, trglob
33  USE parallel_vmec_module, ONLY: t1lglob, t1rglob
34  USE parallel_vmec_module, ONLY: tlglob_arr, trglob_arr
35  USE parallel_vmec_module, ONLY: nuv3min_arr,nuv3max_arr
36  USE parallel_vmec_module, ONLY: trow, brow
37  USE parallel_vmec_module, ONLY: lcol, rcol
38  USE parallel_vmec_module, ONLY: par_ntmax
39  USE parallel_vmec_module, ONLY: par_mpol1
40  USE parallel_vmec_module, ONLY: par_ntor
41  USE parallel_vmec_module, ONLY: par_ns
42  USE parallel_vmec_module, ONLY: par_nuv
43  USE parallel_vmec_module, ONLY: blkrcounts
44  USE parallel_vmec_module, ONLY: blkdisp
45  USE parallel_vmec_module, ONLY: ntblkrcounts
46  USE parallel_vmec_module, ONLY: ntblkdisp
47  USE parallel_vmec_module, ONLY: nsrcounts
48  USE parallel_vmec_module, ONLY: nsdisp
49  USE parallel_vmec_module, ONLY: blocksize
50  USE parallel_vmec_module, ONLY: ntmaxblocksize
51 
52  USE parallel_vmec_module, ONLY: stopmpi
53 
54  USE parallel_vmec_module, ONLY: tolastntype
55  USE parallel_vmec_module, ONLY: copylastntype
56 
57  USE parallel_vmec_module, ONLY: tolastns
58  USE parallel_vmec_module, ONLY: copylastns
59 
60  USE parallel_vmec_module, ONLY: copyparallellinearsubarray
61 
62  USE parallel_vmec_module, ONLY: setvacuumpartitions
63  USE parallel_vmec_module, ONLY: setvacuumcommunicator
64 
65  USE parallel_vmec_module, ONLY: computeblockallgatherparameters
66  USE parallel_vmec_module, ONLY: computensallgatherparameters
67 
68  USE parallel_vmec_module, ONLY: padsides
69 
70  USE parallel_vmec_module, ONLY: parallel2serial2x
71  USE parallel_vmec_module, ONLY: parallel2serial4x
72 
73  USE parallel_vmec_module, ONLY: serial2parallel4x
74 
75  USE parallel_vmec_module, ONLY: gather1xarray
76  USE parallel_vmec_module, ONLY: gather2xarray
77  USE parallel_vmec_module, ONLY: gather4xarray
78  USE parallel_vmec_module, ONLY: gatherreordered4xarray
79 
80  USE parallel_vmec_module, ONLY: printparallelijsparray
81  USE parallel_vmec_module, ONLY: printserialijsparray
82  USE parallel_vmec_module, ONLY: printparallelmnsparray
83  USE parallel_vmec_module, ONLY: printserialmnsparray
84  USE parallel_vmec_module, ONLY: printnsarray
85  USE parallel_vmec_module, ONLY: printoutlineararray
86  USE parallel_vmec_module, ONLY: rprintoutlineararray
87 
88  USE parallel_vmec_module, ONLY: bcyclic_comp_time
89  USE parallel_vmec_module, ONLY: bcyclic_comm_time
90  USE parallel_vmec_module, ONLY: waitall_time
91  USE parallel_vmec_module, ONLY: dgemm_time
92  USE parallel_vmec_module, ONLY: dgemv_time
93  USE parallel_vmec_module, ONLY: dgetrf_time
94  USE parallel_vmec_module, ONLY: dgetrs_time
95  USE parallel_vmec_module, ONLY: forwardsolveloop_time
96  USE parallel_vmec_module, ONLY: t
97 
98  USE parallel_vmec_module, ONLY: allgather_time
99  USE parallel_vmec_module, ONLY: allreduce_time
100  USE parallel_vmec_module, ONLY: broadcast_time
101  USE parallel_vmec_module, ONLY: sendrecv_time
102  USE parallel_vmec_module, ONLY: scatter_time
103 
104  LOGICAL :: LRESIDUECALL=.false.
105  LOGICAL :: LGMRESCALL=.false.
106  LOGICAL :: INFILEOUT=.false.
107  LOGICAL :: RVCTRIGGER=.false.
108  INTEGER :: RVCCALLNUM
109 
110  !Common timers
111  REAL(dp) :: mgrid_file_read_time=0
112  REAL(dp) :: total_time=0
113  REAL(dp) :: evolve_time=0
114  REAL(dp) :: restart_time=0
115  REAL(dp) :: eqsolve_time=0
116  REAL(dp) :: fileout_time=0
117  REAL(dp) :: runvmec_time=0
118  REAL(dp) :: init_parallel_time=0
119  REAL(dp) :: init_radial_time=0
120  REAL(dp) :: allocate_ns_time=0
121  REAL(dp) :: profile1d_time=0
122  REAL(dp) :: profile3d_time=0
123  REAL(dp) :: before_main_loop_time=0
124  REAL(dp) :: reset_params_time=0
125  REAL(dp) :: vsetup_time=0
126  REAL(dp) :: readin_time=0
127  REAL(dp) :: fixarray_time=0
128 
129  REAL(dp) :: myenvvar_time=0
130  REAL(dp) :: init_MPI_time=0
131  REAL(dp) :: read_namelist_time=0
132  REAL(dp) :: get_args_time=0
133  REAL(dp) :: safe_open_time=0
134 
135  REAL(dp) :: jacob1=0, jacob2=0
136 
137  INTEGER :: nfunct3d=0
138  REAL(dp) :: old_vacuum_time=0
139 
140  REAL(dp) :: fo_funct3d_time=0
141  REAL(dp) :: fo_eqfor_time=0
142  REAL(dp) :: fo_wrout_time=0
143  REAL(dp) :: fo_prepare_time=0
144  REAL(dp) :: fo_par_call_time=0
145 
146  !Vacuum, Scalpot variables
147  INTEGER :: blksize_scp, numjs_vac
148  INTEGER, ALLOCATABLE, DIMENSION(:) :: counts_vac, disps_vac, lindx_scp
149 
150  !Parallel timers
151  REAL(dp) :: totzsps_time=0
152  REAL(dp) :: totzspa_time=0
153  REAL(dp) :: jacobian_time=0
154  REAL(dp) :: bcovar_time=0
155  REAL(dp) :: alias_time=0
156  REAL(dp) :: forces_time=0
157  REAL(dp) :: tomnsps_time=0
158  REAL(dp) :: tomnspa_time=0
159  REAL(dp) :: symrzl_time=0
160  REAL(dp) :: symforces_time=0
161  REAL(dp) :: residue_time=0
162  REAL(dp) :: tridslv_time=0
163  REAL(dp) :: scalfor_time=0
164  REAL(dp) :: funct3d_time=0
165  REAL(dp) :: gmres_time=0
166  REAL(dp) :: guess_axis_time=0
167  REAL(dp) :: vacuum_time=0
168  REAL(dp) :: precal_time=0
169  REAL(dp) :: surface_time=0
170  REAL(dp) :: bextern_time=0
171  REAL(dp) :: scalpot_time=0
172  REAL(dp) :: solver_time=0
173  REAL(dp) :: analyt_time=0
174  REAL(dp) :: tolicu_time=0
175  REAL(dp) :: belicu_time=0
176  REAL(dp) :: becoil_time=0
177  REAL(dp) :: greenf_time=0
178  REAL(dp) :: fourp_time=0
179  REAL(dp) :: fouri_time=0
180 
181  REAL(dp) :: init_time=0
182  REAL(dp) :: setup_time=0
183  REAL(dp) :: forwardsolve_time=0
184  REAL(dp) :: backwardsolve_time=0
185  REAL(dp) :: finalize_time=0
186 
187  REAL(dp) :: fill_blocks_time=0
188  REAL(dp) :: compute_blocks_time=0
189  REAL(dp) :: bcyclic_forwardsolve_time=0
190  REAL(dp) :: bcyclic_backwardsolve_time=0
191 
192  !Serial timers
193  REAL(dp) :: s_totzsps_time=0
194  REAL(dp) :: s_totzspa_time=0
195  REAL(dp) :: s_jacobian_time=0
196  REAL(dp) :: s_bcovar_time=0
197  REAL(dp) :: s_alias_time=0
198  REAL(dp) :: s_forces_time=0
199  REAL(dp) :: s_tomnsps_time=0
200  REAL(dp) :: s_tomnspa_time=0
201  REAL(dp) :: s_symrzl_time=0
202  REAL(dp) :: s_symforces_time=0
203  REAL(dp) :: s_residue_time=0
204  REAL(dp) :: s_tridslv_time=0
205  REAL(dp) :: s_scalfor_time=0
206  REAL(dp) :: s_gmres_time=0
207  REAL(dp) :: s_guess_axis_time=0
208  REAL(dp) :: s_vacuum_time=0
209  REAL(dp) :: s_precal_time=0
210  REAL(dp) :: s_surface_time=0
211  REAL(dp) :: s_bextern_time=0
212  REAL(dp) :: s_scalpot_time=0
213  REAL(dp) :: s_solver_time=0
214  REAL(dp) :: s_analyt_time=0
215  REAL(dp) :: s_tolicu_time=0
216  REAL(dp) :: s_belicu_time=0
217  REAL(dp) :: s_becoil_time=0
218  REAL(dp) :: s_greenf_time=0
219  REAL(dp) :: s_fourp_time=0
220  REAL(dp) :: s_fouri_time=0
221 
222  REAL(rprec) :: maxvalue, minvalue
223  INTEGER :: maxrank, minrank
224 
225  !V3FIT
226  INTEGER :: RUNVMEC_PASS=0
227 
228 ! INTEGER :: v3fgcomm, v3freccomm, v3feqcomm
229 ! INTEGER :: v3fgnranks, v3frecnranks, v3feqnranks
230 ! INTEGER :: v3fgrank, v3frecrank, v3feqrank
231 ! LOGICAL :: lreconactive=.FALSE.
232 
233 contains
234 
235  !------------------------------------------------
236  ! Print out parallel timing information
237  !------------------------------------------------
238  SUBROUTINE printtimes
239  IMPLICIT NONE
240  DOUBLE PRECISION :: ind(2,1), outd(2,1)
241  DOUBLE PRECISION :: mt(nranks)
242  LOGICAL :: LMINMAXFILE=.false.
243 
244  IF (parvmec) THEN
245 #if defined(MPI_OPT)
246  ind(1,1) = total_time; ind(2,1) = dble(rank)
247  CALL mpi_reduce(ind,outd,1,mpi_2double_precision,mpi_minloc,0,ns_comm,mpi_err)
248  IF(grank.EQ.0) THEN
249  minvalue = outd(1,1); minrank = outd(2,1)
250  END IF
251  CALL mpi_reduce(ind,outd,1,mpi_2double_precision,mpi_maxloc,0,ns_comm,mpi_err)
252  IF(grank.EQ.0) THEN
253  maxvalue = outd(1,1); maxrank = outd(2,1)
254  END IF
255  CALL mpi_bcast(minvalue,1,mpi_double_precision,0,ns_comm,mpi_err)
256  CALL mpi_bcast(maxvalue,1,mpi_double_precision,0,ns_comm,mpi_err)
257  CALL mpi_bcast(minrank,1,mpi_integer,0,ns_comm,mpi_err)
258  CALL mpi_bcast(maxrank,1,mpi_integer,0,ns_comm,mpi_err)
259 
260  IF (.false..AND.abs(maxvalue-minvalue).GE.0.0) THEN
261  lminmaxfile=.true.
262  IF (grank.EQ.0) THEN
263  WRITE(*,*)
264  WRITE(*,*)'---------------------------------------------------'
265  WRITE(*,*) 'Min and max runtimes exceed tolerance....'
266  WRITE(*,'(A12,F15.6,A12,I6)')'Max. time: ',maxvalue,'Rank :',maxrank
267  WRITE(*,'(A12,F15.6,A12,I6)')'Min. time: ',minvalue,'Rank :',minrank
268  WRITE(*,*)'---------------------------------------------------'
269  WRITE(*,*)
270  END IF
271  END IF
272 #endif
273  ELSE
274  totzsps_time = s_totzsps_time
275  totzspa_time = s_totzspa_time
276  jacobian_time = s_jacobian_time
277  bcovar_time = s_bcovar_time
278  alias_time = s_alias_time
279  forces_time = s_forces_time
280  tomnsps_time = s_tomnsps_time
281  tomnspa_time = s_tomnspa_time
282  symrzl_time = s_symrzl_time
283  symforces_time = s_symforces_time
284  residue_time = s_residue_time
285  tridslv_time = s_tridslv_time
286  scalfor_time = s_scalfor_time
287  gmres_time = s_gmres_time
288  guess_axis_time = s_guess_axis_time
289  vacuum_time = s_vacuum_time
290  precal_time = s_precal_time
291  surface_time = s_surface_time
292  bextern_time = s_bextern_time
293  scalpot_time = s_scalpot_time
294  solver_time = s_solver_time
295  analyt_time = s_analyt_time
296  tolicu_time = s_tolicu_time
297  belicu_time = s_belicu_time
298  becoil_time = s_becoil_time
299  greenf_time = s_greenf_time
300  fourp_time = s_fourp_time
301  fouri_time = s_fouri_time
302  maxvalue=total_time; minvalue=total_time
303  END IF
304 
305  IF (parvmec.AND.lminmaxfile) THEN
306  IF(grank.EQ.minrank) THEN
307  CALL writetimes('min-timings.txt')
308  END IF
309  IF(grank.EQ.maxrank) THEN
310  CALL writetimes('max-timings.txt')
311  END IF
312  END IF
313 
314  IF(grank.EQ.0) THEN
315  CALL writetimes('timings.txt')
316  END IF
317 
318  END SUBROUTINE printtimes
319 !------------------------------------------------
320 
321 !------------------------------------------------
322  SUBROUTINE writetimes(fname)
323  IMPLICIT NONE
324  CHARACTER(*), INTENT(IN) :: fname
325  CHARACTER(100) :: cfname
326  CHARACTER(50) :: ciam, cnprocs
327  INTEGER :: TFILE, i, istat, igrd
328 
329  WRITE(ciam,*) rank; WRITE(cnprocs,*) nranks
330  ciam=adjustl(ciam); cnprocs=adjustl(cnprocs)
331  tfile = 9*nranks+grank+1000
332  OPEN(unit=tfile, file=fname, status="REPLACE", action="WRITE",&
333  &form="FORMATTED",position="APPEND", iostat=istat)
334 
335  IF (parvmec) THEN
336  WRITE(tfile,*) '====================== PARALLEL TIMINGS ===================='
337  ELSE
338  WRITE(tfile,*) '======================= SERIAL TIMINGS ====================='
339  END IF
340 
341  WRITE(tfile,'(A20,A4,F15.6)') 'total', ': ', total_time
342  WRITE(tfile,'(A20,A4,I15)') 'rank', ': ', rank
343  WRITE(tfile,'(A20,A4,F15.6)') 'mgrid file read time', ': ', mgrid_file_read_time
344  WRITE(tfile,'(A20,A4,I15)') 'No. of procs', ': ', gnranks
345  WRITE(tfile,*)
346 
347  IF (lfreeb) THEN
348  DO igrd=1, num_grids
349  WRITE(tfile,'(A20,A4,3I15,F15.6)') '--- non-vacuum', ': ',&
350  f3d_num(igrd), grid_size(igrd), grid_procs(igrd), f3d_time(igrd)-vgrid_time(igrd)
351  END DO
352  WRITE(tfile,*)
353 
354  WRITE(tfile,'(A20,A4,I15)') 'VNRANKS ',': ', vnranks
355  DO igrd=1, num_grids
356  WRITE(tfile,'(A20,A4,I15,F15.6)') '--- vacuum ', ': ',&
357  grid_size(igrd),vgrid_time(igrd)
358  END DO
359  WRITE(tfile,*)
360  ELSE
361  DO igrd=1, num_grids
362  WRITE(tfile,'(A20,A4,3I15,F15.6)') '--- non-vacuum', ': ',&
363  f3d_num(igrd), grid_size(igrd), grid_procs(igrd), f3d_time(igrd)
364  END DO
365  WRITE(tfile,*)
366  END IF
367 
368  WRITE(tfile,'(A20,A4,F15.6)') 'runvmec', ': ', runvmec_time
369  WRITE(tfile,*)
370 
371  WRITE(tfile,'(A20,A4,F15.6)') 'init radial', ': ',init_radial_time
372  WRITE(tfile,'(A20,A4,F15.6)') 'eqsolve', ': ', eqsolve_time
373  WRITE(tfile,'(A20,A4,F15.6)') 'fileout', ': ', fileout_time
374  WRITE(tfile,*)
375  WRITE(tfile,'(A20,A4,F15.6)') 'evolve', ': ', evolve_time
376  WRITE(tfile,'(A20,A4,F15.6)') 'funct3d', ': ', funct3d_time
377  WRITE(tfile,'(A20,A4,I15)') 'nfunct3d', ': ', nfunct3d
378  WRITE(tfile,*)
379  WRITE(tfile,'(A20,A4,F15.6)') 'totzsps', ': ', totzsps_time
380  WRITE(tfile,'(A20,A4,F15.6)') 'totzspa', ': ', totzspa_time
381  WRITE(tfile,'(A20,A4,F15.6)') 'symrzl', ': ', symrzl_time
382  WRITE(tfile,'(A20,A4,F15.6)') 'jacobian', ': ', jacobian_time
383  WRITE(tfile,'(A20,A4,F15.6)') 'bcovar', ': ', bcovar_time
384  WRITE(tfile,'(A20,A4,F15.6)') 'vacuum', ': ', vacuum_time
385  WRITE(tfile,*)
386  WRITE(tfile,'(A20,A4,F15.6)') '- precal', ': ', precal_time
387  WRITE(tfile,'(A20,A4,F15.6)') '- surface', ': ', surface_time
388  WRITE(tfile,*)
389  WRITE(tfile,'(A20,A4,F15.6)') '- bextern', ': ', bextern_time
390  WRITE(tfile,*)
391  WRITE(tfile,'(A20,A4,F15.6)') '-- becoil', ': ', becoil_time
392  WRITE(tfile,'(A20,A4,F15.6)') '-- tolicu', ': ', tolicu_time
393  WRITE(tfile,'(A20,A4,F15.6)') '-- belicu', ': ', belicu_time
394  WRITE(tfile,*)
395  WRITE(tfile,'(A20,A4,F15.6)') '- scalpot', ': ', scalpot_time
396  WRITE(tfile,*)
397  WRITE(tfile,'(A20,A4,F15.6)') '-- analyt', ': ', analyt_time
398  WRITE(tfile,'(A20,A4,F15.6)') '-- greenf', ': ', greenf_time
399  WRITE(tfile,'(A20,A4,F15.6)') '-- fourp', ': ', fourp_time
400  WRITE(tfile,'(A20,A4,F15.6)') '-- fouri', ': ', fouri_time
401  WRITE(tfile,*)
402  WRITE(tfile,'(A20,A4,F15.6)') '- solver', ': ', solver_time
403  WRITE(tfile,*)
404  WRITE(tfile,'(A20,A4,F15.6)') 'alias', ': ', alias_time
405  WRITE(tfile,'(A20,A4,F15.6)') 'forces', ': ', forces_time
406  WRITE(tfile,'(A20,A4,F15.6)') 'symforces', ': ', symforces_time
407  WRITE(tfile,'(A20,A4,F15.6)') 'tomnsps', ': ', tomnsps_time
408  WRITE(tfile,'(A20,A4,F15.6)') 'tomnspa', ': ', tomnspa_time
409  WRITE(tfile,'(A20,A4,F15.6)') 'residue', ': ', residue_time
410  WRITE(tfile,'(A20,A4,F15.6)') '-- tridslv', ': ', tridslv_time
411  WRITE(tfile,*)
412  WRITE(tfile,*) '============================================================'
413  IF (parvmec) THEN
414  WRITE(tfile,*)
415  WRITE(tfile,'(A20,A4,F15.6)') 'allgather', ': ', allgather_time
416  WRITE(tfile,'(A20,A4,F15.6)') 'allreduce', ': ', allreduce_time
417  WRITE(tfile,'(A20,A4,F15.6)') 'broadcast', ': ', broadcast_time
418  WRITE(tfile,'(A20,A4,F15.6)') 'sendrecv ', ': ', sendrecv_time
419  WRITE(tfile,*)
420  WRITE(tfile,'(A20,A4,F15.6)') 'Fill_blocks ', ': ', fill_blocks_time
421  WRITE(tfile,'(A20,A4,F15.6)') 'Compute blocks ', ': ', compute_blocks_time
422  WRITE(tfile,'(A20,A4,F15.6)') 'Forward solve ', ': ', bcyclic_forwardsolve_time
423  WRITE(tfile,'(A20,A4,F15.6)') 'Backward solve ', ': ', bcyclic_backwardsolve_time
424  WRITE(tfile,*) '============================================================'
425  END IF
426  CALL flush(tfile)
427  CLOSE(tfile)
428 
429  !WRITE(*,*) 'totzsps : ', totzsps_time
430  END SUBROUTINE writetimes
431 !------------------------------------------------
432 
433  END MODULE parallel_include_module
434 !------------------------------------------------
435 
mpi_inc
Umbrella module avoid multiple inlcudes of the mpif.h header.
Definition: mpi_inc.f:11
parallel_vmec_module::copylastntype
Definition: parallel_vmec_module.f90:76