1 MODULE parallel_include_module
4 USE vmec_input,
ONLY:lfreeb
7 USE parallel_vmec_module,
ONLY: parvmec
8 USE parallel_vmec_module,
ONLY: lv3fitcall
9 USE parallel_vmec_module,
ONLY: lprecond
10 USE parallel_vmec_module,
ONLY: tofu
12 USE parallel_vmec_module,
ONLY: num_grids
13 USE parallel_vmec_module,
ONLY: grid_procs
14 USE parallel_vmec_module,
ONLY: grid_size
15 USE parallel_vmec_module,
ONLY: grid_time
16 USE parallel_vmec_module,
ONLY: vgrid_time
17 USE parallel_vmec_module,
ONLY: f3d_time
18 USE parallel_vmec_module,
ONLY: f3d_num
19 USE parallel_vmec_module,
ONLY: runvmec_comm_world
20 USE parallel_vmec_module,
ONLY: ns_comm
21 USE parallel_vmec_module,
ONLY: vac_comm
22 USE parallel_vmec_module,
ONLY: twodcomm
23 USE parallel_vmec_module,
ONLY: px, py
24 USE parallel_vmec_module,
ONLY: ns_resltn
25 USE parallel_vmec_module,
ONLY: mpi_err
26 USE parallel_vmec_module,
ONLY: rank, nranks
27 USE parallel_vmec_module,
ONLY: lactive, vlactive
28 USE parallel_vmec_module,
ONLY: grank, gnranks
29 USE parallel_vmec_module,
ONLY: vrank, vnranks
30 USE parallel_vmec_module,
ONLY: nuvmin, nuvmax
31 USE parallel_vmec_module,
ONLY: nuv3min, nuv3max
32 USE parallel_vmec_module,
ONLY: tlglob, trglob
33 USE parallel_vmec_module,
ONLY: t1lglob, t1rglob
34 USE parallel_vmec_module,
ONLY: tlglob_arr, trglob_arr
35 USE parallel_vmec_module,
ONLY: nuv3min_arr,nuv3max_arr
36 USE parallel_vmec_module,
ONLY: trow, brow
37 USE parallel_vmec_module,
ONLY: lcol, rcol
38 USE parallel_vmec_module,
ONLY: par_ntmax
39 USE parallel_vmec_module,
ONLY: par_mpol1
40 USE parallel_vmec_module,
ONLY: par_ntor
41 USE parallel_vmec_module,
ONLY: par_ns
42 USE parallel_vmec_module,
ONLY: par_nuv
43 USE parallel_vmec_module,
ONLY: blkrcounts
44 USE parallel_vmec_module,
ONLY: blkdisp
45 USE parallel_vmec_module,
ONLY: ntblkrcounts
46 USE parallel_vmec_module,
ONLY: ntblkdisp
47 USE parallel_vmec_module,
ONLY: nsrcounts
48 USE parallel_vmec_module,
ONLY: nsdisp
49 USE parallel_vmec_module,
ONLY: blocksize
50 USE parallel_vmec_module,
ONLY: ntmaxblocksize
52 USE parallel_vmec_module,
ONLY: stopmpi
54 USE parallel_vmec_module,
ONLY: tolastntype
57 USE parallel_vmec_module,
ONLY: tolastns
58 USE parallel_vmec_module,
ONLY: copylastns
60 USE parallel_vmec_module,
ONLY: copyparallellinearsubarray
62 USE parallel_vmec_module,
ONLY: setvacuumpartitions
63 USE parallel_vmec_module,
ONLY: setvacuumcommunicator
65 USE parallel_vmec_module,
ONLY: computeblockallgatherparameters
66 USE parallel_vmec_module,
ONLY: computensallgatherparameters
68 USE parallel_vmec_module,
ONLY: padsides
70 USE parallel_vmec_module,
ONLY: parallel2serial2x
71 USE parallel_vmec_module,
ONLY: parallel2serial4x
73 USE parallel_vmec_module,
ONLY: serial2parallel4x
75 USE parallel_vmec_module,
ONLY: gather1xarray
76 USE parallel_vmec_module,
ONLY: gather2xarray
77 USE parallel_vmec_module,
ONLY: gather4xarray
78 USE parallel_vmec_module,
ONLY: gatherreordered4xarray
80 USE parallel_vmec_module,
ONLY: printparallelijsparray
81 USE parallel_vmec_module,
ONLY: printserialijsparray
82 USE parallel_vmec_module,
ONLY: printparallelmnsparray
83 USE parallel_vmec_module,
ONLY: printserialmnsparray
84 USE parallel_vmec_module,
ONLY: printnsarray
85 USE parallel_vmec_module,
ONLY: printoutlineararray
86 USE parallel_vmec_module,
ONLY: rprintoutlineararray
88 USE parallel_vmec_module,
ONLY: bcyclic_comp_time
89 USE parallel_vmec_module,
ONLY: bcyclic_comm_time
90 USE parallel_vmec_module,
ONLY: waitall_time
91 USE parallel_vmec_module,
ONLY: dgemm_time
92 USE parallel_vmec_module,
ONLY: dgemv_time
93 USE parallel_vmec_module,
ONLY: dgetrf_time
94 USE parallel_vmec_module,
ONLY: dgetrs_time
95 USE parallel_vmec_module,
ONLY: forwardsolveloop_time
96 USE parallel_vmec_module,
ONLY: t
98 USE parallel_vmec_module,
ONLY: allgather_time
99 USE parallel_vmec_module,
ONLY: allreduce_time
100 USE parallel_vmec_module,
ONLY: broadcast_time
101 USE parallel_vmec_module,
ONLY: sendrecv_time
102 USE parallel_vmec_module,
ONLY: scatter_time
104 LOGICAL :: LRESIDUECALL=.false.
105 LOGICAL :: LGMRESCALL=.false.
106 LOGICAL :: INFILEOUT=.false.
107 LOGICAL :: RVCTRIGGER=.false.
108 INTEGER :: RVCCALLNUM
111 REAL(dp) :: mgrid_file_read_time=0
112 REAL(dp) :: total_time=0
113 REAL(dp) :: evolve_time=0
114 REAL(dp) :: restart_time=0
115 REAL(dp) :: eqsolve_time=0
116 REAL(dp) :: fileout_time=0
117 REAL(dp) :: runvmec_time=0
118 REAL(dp) :: init_parallel_time=0
119 REAL(dp) :: init_radial_time=0
120 REAL(dp) :: allocate_ns_time=0
121 REAL(dp) :: profile1d_time=0
122 REAL(dp) :: profile3d_time=0
123 REAL(dp) :: before_main_loop_time=0
124 REAL(dp) :: reset_params_time=0
125 REAL(dp) :: vsetup_time=0
126 REAL(dp) :: readin_time=0
127 REAL(dp) :: fixarray_time=0
129 REAL(dp) :: myenvvar_time=0
130 REAL(dp) :: init_MPI_time=0
131 REAL(dp) :: read_namelist_time=0
132 REAL(dp) :: get_args_time=0
133 REAL(dp) :: safe_open_time=0
135 REAL(dp) :: jacob1=0, jacob2=0
137 INTEGER :: nfunct3d=0
138 REAL(dp) :: old_vacuum_time=0
140 REAL(dp) :: fo_funct3d_time=0
141 REAL(dp) :: fo_eqfor_time=0
142 REAL(dp) :: fo_wrout_time=0
143 REAL(dp) :: fo_prepare_time=0
144 REAL(dp) :: fo_par_call_time=0
147 INTEGER :: blksize_scp, numjs_vac
148 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: counts_vac, disps_vac, lindx_scp
151 REAL(dp) :: totzsps_time=0
152 REAL(dp) :: totzspa_time=0
153 REAL(dp) :: jacobian_time=0
154 REAL(dp) :: bcovar_time=0
155 REAL(dp) :: alias_time=0
156 REAL(dp) :: forces_time=0
157 REAL(dp) :: tomnsps_time=0
158 REAL(dp) :: tomnspa_time=0
159 REAL(dp) :: symrzl_time=0
160 REAL(dp) :: symforces_time=0
161 REAL(dp) :: residue_time=0
162 REAL(dp) :: tridslv_time=0
163 REAL(dp) :: scalfor_time=0
164 REAL(dp) :: funct3d_time=0
165 REAL(dp) :: gmres_time=0
166 REAL(dp) :: guess_axis_time=0
167 REAL(dp) :: vacuum_time=0
168 REAL(dp) :: precal_time=0
169 REAL(dp) :: surface_time=0
170 REAL(dp) :: bextern_time=0
171 REAL(dp) :: scalpot_time=0
172 REAL(dp) :: solver_time=0
173 REAL(dp) :: analyt_time=0
174 REAL(dp) :: tolicu_time=0
175 REAL(dp) :: belicu_time=0
176 REAL(dp) :: becoil_time=0
177 REAL(dp) :: greenf_time=0
178 REAL(dp) :: fourp_time=0
179 REAL(dp) :: fouri_time=0
181 REAL(dp) :: init_time=0
182 REAL(dp) :: setup_time=0
183 REAL(dp) :: forwardsolve_time=0
184 REAL(dp) :: backwardsolve_time=0
185 REAL(dp) :: finalize_time=0
187 REAL(dp) :: fill_blocks_time=0
188 REAL(dp) :: compute_blocks_time=0
189 REAL(dp) :: bcyclic_forwardsolve_time=0
190 REAL(dp) :: bcyclic_backwardsolve_time=0
193 REAL(dp) :: s_totzsps_time=0
194 REAL(dp) :: s_totzspa_time=0
195 REAL(dp) :: s_jacobian_time=0
196 REAL(dp) :: s_bcovar_time=0
197 REAL(dp) :: s_alias_time=0
198 REAL(dp) :: s_forces_time=0
199 REAL(dp) :: s_tomnsps_time=0
200 REAL(dp) :: s_tomnspa_time=0
201 REAL(dp) :: s_symrzl_time=0
202 REAL(dp) :: s_symforces_time=0
203 REAL(dp) :: s_residue_time=0
204 REAL(dp) :: s_tridslv_time=0
205 REAL(dp) :: s_scalfor_time=0
206 REAL(dp) :: s_gmres_time=0
207 REAL(dp) :: s_guess_axis_time=0
208 REAL(dp) :: s_vacuum_time=0
209 REAL(dp) :: s_precal_time=0
210 REAL(dp) :: s_surface_time=0
211 REAL(dp) :: s_bextern_time=0
212 REAL(dp) :: s_scalpot_time=0
213 REAL(dp) :: s_solver_time=0
214 REAL(dp) :: s_analyt_time=0
215 REAL(dp) :: s_tolicu_time=0
216 REAL(dp) :: s_belicu_time=0
217 REAL(dp) :: s_becoil_time=0
218 REAL(dp) :: s_greenf_time=0
219 REAL(dp) :: s_fourp_time=0
220 REAL(dp) :: s_fouri_time=0
222 REAL(rprec) :: maxvalue, minvalue
223 INTEGER :: maxrank, minrank
226 INTEGER :: RUNVMEC_PASS=0
238 SUBROUTINE printtimes
240 DOUBLE PRECISION :: ind(2,1), outd(2,1)
241 DOUBLE PRECISION :: mt(nranks)
242 LOGICAL :: LMINMAXFILE=.false.
246 ind(1,1) = total_time; ind(2,1) = dble(rank)
247 CALL mpi_reduce(ind,outd,1,mpi_2double_precision,mpi_minloc,
249 minvalue = outd(1,1); minrank = outd(2,1)
251 CALL mpi_reduce(ind,outd,1,mpi_2double_precision,mpi_maxloc,
253 maxvalue = outd(1,1); maxrank = outd(2,1)
255 CALL mpi_bcast(minvalue,1,mpi_double_precision,0,ns_comm,mpi_err
256 CALL mpi_bcast(maxvalue,1,mpi_double_precision,0,ns_comm,mpi_err
257 CALL mpi_bcast(minrank,1,mpi_integer,0,ns_comm,mpi_err)
258 CALL mpi_bcast(maxrank,1,mpi_integer,0,ns_comm,mpi_err)
260 IF (.false..AND.abs(maxvalue-minvalue).GE.0.0)
THEN
264 WRITE(*,*)
'---------------------------------------------------'
265 WRITE(*,*)
'Min and max runtimes exceed tolerance....'
266 WRITE(*,
'(A12,F15.6,A12,I6)')
'Max. time: ',maxvalue,
'Rank :'
267 WRITE(*,
'(A12,F15.6,A12,I6)')
'Min. time: ',minvalue,
'Rank :'
268 WRITE(*,*)
'---------------------------------------------------'
274 totzsps_time = s_totzsps_time
275 totzspa_time = s_totzspa_time
276 jacobian_time = s_jacobian_time
277 bcovar_time = s_bcovar_time
278 alias_time = s_alias_time
279 forces_time = s_forces_time
280 tomnsps_time = s_tomnsps_time
281 tomnspa_time = s_tomnspa_time
282 symrzl_time = s_symrzl_time
283 symforces_time = s_symforces_time
284 residue_time = s_residue_time
285 tridslv_time = s_tridslv_time
286 scalfor_time = s_scalfor_time
287 gmres_time = s_gmres_time
288 guess_axis_time = s_guess_axis_time
289 vacuum_time = s_vacuum_time
290 precal_time = s_precal_time
291 surface_time = s_surface_time
292 bextern_time = s_bextern_time
293 scalpot_time = s_scalpot_time
294 solver_time = s_solver_time
295 analyt_time = s_analyt_time
296 tolicu_time = s_tolicu_time
297 belicu_time = s_belicu_time
298 becoil_time = s_becoil_time
299 greenf_time = s_greenf_time
300 fourp_time = s_fourp_time
301 fouri_time = s_fouri_time
302 maxvalue=total_time; minvalue=total_time
305 IF (parvmec.AND.lminmaxfile)
THEN
306 IF(grank.EQ.minrank)
THEN
307 CALL writetimes(
'min-timings.txt')
309 IF(grank.EQ.maxrank)
THEN
310 CALL writetimes(
'max-timings.txt')
315 CALL writetimes(
'timings.txt')
318 END SUBROUTINE printtimes
322 SUBROUTINE writetimes(fname)
324 CHARACTER(*),
INTENT(IN) :: fname
325 CHARACTER(100) :: cfname
326 CHARACTER(50) :: ciam, cnprocs
327 INTEGER :: TFILE, i, istat, igrd
329 WRITE(ciam,*) rank;
WRITE(cnprocs,*) nranks
330 ciam=adjustl(ciam); cnprocs=adjustl(cnprocs)
331 tfile = 9*nranks+grank+1000
332 OPEN(unit=tfile, file=fname, status=
"REPLACE", action=
"WRITE",
333 &form=
"FORMATTED",position=
"APPEND", iostat=istat)
336 WRITE(tfile,*)
'====================== PARALLEL TIMINGS ===================='
338 WRITE(tfile,*)
'======================= SERIAL TIMINGS ====================='
341 WRITE(tfile,
'(A20,A4,F15.6)')
'total',
': ', total_time
342 WRITE(tfile,
'(A20,A4,I15)')
'rank',
': ', rank
343 WRITE(tfile,
'(A20,A4,F15.6)')
'mgrid file read time',
': ', mgrid_file_read_time
344 WRITE(tfile,
'(A20,A4,I15)')
'No. of procs',
': ', gnranks
349 WRITE(tfile,
'(A20,A4,3I15,F15.6)')
'--- non-vacuum',
': ',
350 f3d_num(igrd), grid_size(igrd), grid_procs(igrd), f3d_time
354 WRITE(tfile,
'(A20,A4,I15)')
'VNRANKS ',
': ', vnranks
356 WRITE(tfile,
'(A20,A4,I15,F15.6)')
'--- vacuum ',
': ',&
357 grid_size(igrd),vgrid_time(igrd)
362 WRITE(tfile,
'(A20,A4,3I15,F15.6)')
'--- non-vacuum',
': ',
363 f3d_num(igrd), grid_size(igrd), grid_procs(igrd), f3d_time
368 WRITE(tfile,
'(A20,A4,F15.6)')
'runvmec',
': ', runvmec_time
371 WRITE(tfile,
'(A20,A4,F15.6)')
'init radial',
': ',init_radial_time
372 WRITE(tfile,
'(A20,A4,F15.6)')
'eqsolve',
': ', eqsolve_time
373 WRITE(tfile,
'(A20,A4,F15.6)')
'fileout',
': ', fileout_time
375 WRITE(tfile,
'(A20,A4,F15.6)')
'evolve',
': ', evolve_time
376 WRITE(tfile,
'(A20,A4,F15.6)')
'funct3d',
': ', funct3d_time
377 WRITE(tfile,
'(A20,A4,I15)')
'nfunct3d',
': ', nfunct3d
379 WRITE(tfile,
'(A20,A4,F15.6)')
'totzsps',
': ', totzsps_time
380 WRITE(tfile,
'(A20,A4,F15.6)')
'totzspa',
': ', totzspa_time
381 WRITE(tfile,
'(A20,A4,F15.6)')
'symrzl',
': ', symrzl_time
382 WRITE(tfile,
'(A20,A4,F15.6)')
'jacobian',
': ', jacobian_time
383 WRITE(tfile,
'(A20,A4,F15.6)')
'bcovar',
': ', bcovar_time
384 WRITE(tfile,
'(A20,A4,F15.6)')
'vacuum',
': ', vacuum_time
386 WRITE(tfile,
'(A20,A4,F15.6)')
'- precal',
': ', precal_time
387 WRITE(tfile,
'(A20,A4,F15.6)')
'- surface',
': ', surface_time
389 WRITE(tfile,
'(A20,A4,F15.6)')
'- bextern',
': ', bextern_time
391 WRITE(tfile,
'(A20,A4,F15.6)')
'-- becoil',
': ', becoil_time
392 WRITE(tfile,
'(A20,A4,F15.6)')
'-- tolicu',
': ', tolicu_time
393 WRITE(tfile,
'(A20,A4,F15.6)')
'-- belicu',
': ', belicu_time
395 WRITE(tfile,
'(A20,A4,F15.6)')
'- scalpot',
': ', scalpot_time
397 WRITE(tfile,
'(A20,A4,F15.6)')
'-- analyt',
': ', analyt_time
398 WRITE(tfile,
'(A20,A4,F15.6)')
'-- greenf',
': ', greenf_time
399 WRITE(tfile,
'(A20,A4,F15.6)')
'-- fourp',
': ', fourp_time
400 WRITE(tfile,
'(A20,A4,F15.6)')
'-- fouri',
': ', fouri_time
402 WRITE(tfile,
'(A20,A4,F15.6)')
'- solver',
': ', solver_time
404 WRITE(tfile,
'(A20,A4,F15.6)')
'alias',
': ', alias_time
405 WRITE(tfile,
'(A20,A4,F15.6)')
'forces',
': ', forces_time
406 WRITE(tfile,
'(A20,A4,F15.6)')
'symforces',
': ', symforces_time
407 WRITE(tfile,
'(A20,A4,F15.6)')
'tomnsps',
': ', tomnsps_time
408 WRITE(tfile,
'(A20,A4,F15.6)')
'tomnspa',
': ', tomnspa_time
409 WRITE(tfile,
'(A20,A4,F15.6)')
'residue',
': ', residue_time
410 WRITE(tfile,
'(A20,A4,F15.6)')
'-- tridslv',
': ', tridslv_time
412 WRITE(tfile,*)
'============================================================'
415 WRITE(tfile,
'(A20,A4,F15.6)')
'allgather',
': ', allgather_time
416 WRITE(tfile,
'(A20,A4,F15.6)')
'allreduce',
': ', allreduce_time
417 WRITE(tfile,
'(A20,A4,F15.6)')
'broadcast',
': ', broadcast_time
418 WRITE(tfile,
'(A20,A4,F15.6)')
'sendrecv ',
': ', sendrecv_time
420 WRITE(tfile,
'(A20,A4,F15.6)')
'Fill_blocks ',
': ', fill_blocks_time
421 WRITE(tfile,
'(A20,A4,F15.6)')
'Compute blocks ',
': ', compute_blocks_time
422 WRITE(tfile,
'(A20,A4,F15.6)')
'Forward solve ',
': ', bcyclic_forwardsolve_time
423 WRITE(tfile,
'(A20,A4,F15.6)')
'Backward solve ',
': ', bcyclic_backwardsolve_time
424 WRITE(tfile,*)
'============================================================'
430 END SUBROUTINE writetimes
433 END MODULE parallel_include_module