V3FIT
hessian.f90
1  MODULE hessian
2  USE island_params, mpol=>mpol_i, ntor=>ntor_i, ns=>ns_i, &
3  nfp=>nfp_i
4  USE timer_mod
5  USE stel_constants, ONLY: pi
6  USE descriptor_mod
10 #if defined(MPI_OPT)
11  USE prof_mod
12  USE ptrd_mod
13 #endif
14  USE nscalingtools, ONLY: parsolver, mpi_err, parfunctisl, &
15  rcounts, disp, startglobrow, endglobrow, nranks, rank, &
16  sksdbg, tofu, upper, lower, diag, savediag, nrecd, &
17  symmetrycheck, totrecvs, packsize, writetime, send, receive, &
18  setblocktridatastruct, getfullsolution
19  USE blocktridiagonalsolver_s, ONLY: applyparallelscaling, &
20  forwardsolve, setmatrixrhs, backwardsolve, getsolutionvector, &
21  checksymmetry, dmin_tri, maxeigen_tri, findminmax_tri, &
22  checkconditionnumber
23  USE mpi_inc
24  USE v3_utilities, ONLY: assert, assert_eq
25 
26  IMPLICIT NONE
27 
28 !-----------------------------------------------
29 !SPH 090116: EVENTUALLY MOVE TO nscalingtools? ASK SKS
30  PUBLIC :: gather_array
31  INTERFACE gather_array
32  MODULE PROCEDURE gather_array1, gather_array2, gather_array3
33  END INTERFACE
34 !-----------------------------------------------
35 ! L o c a l V a r i a b l e s
36 !-----------------------------------------------
37  PRIVATE
38  INTEGER, PARAMETER :: jstart(3) = (/1,2,3/)
39  INTEGER, ALLOCATABLE :: ipiv_blk(:,:)
40 
41  REAL(dp), ALLOCATABLE, DIMENSION(:,:,:) :: ublk, dblk, lblk
42  REAL(dp), ALLOCATABLE, DIMENSION(:,:,:) :: ublk_s, dblk_s, lblk_s
43  LOGICAL, PUBLIC :: l_backslv = .false.
44 
45  REAL(dp), ALLOCATABLE, DIMENSION(:) :: DataItem
46 
47  INTEGER :: myStartBlock, myEndBlock, mynumBlockRows
48  INTEGER :: mystart(3), myend(3)
49 
50  REAL(dp) :: eps, ton, toff, rcond, anorm
51 
52  REAL(dp), DIMENSION(:,:), ALLOCATABLE :: colsum
53  REAL(dp), PARAMETER, PUBLIC :: eps_factor = 1._dp
54  REAL(dp), ALLOCATABLE, PUBLIC :: mupar_norm(:)
55  REAL(dp), PUBLIC :: levmarq_param, levmarq_param0, asym_index, &
56  mupar, mupar0, maxeigen, mindiag
57  LOGICAL, PUBLIC :: l_Compute_Hessian, l_Diagonal_Only
58  LOGICAL, PUBLIC, PARAMETER :: l_Hess_sym=.false.
59  INTEGER, PUBLIC :: HESSPASS=0
60  PUBLIC :: apply_precond, dealloc_hessian, inithess, &
61  compute_hessian_blocks, apply_colscale
62 
63  CONTAINS
64 
65 !-------------------------------------------------------------------------------
69 !-------------------------------------------------------------------------------
70  SUBROUTINE gather_array1(buffer)
71 
72  IMPLICIT NONE
73 
74 ! Declare Arguments
75  REAL(dp), DIMENSION(:), INTENT(inout) :: buffer
76 
77 ! Start of executable code
78  IF (parsolver) THEN
79  CALL mpi_allgatherv(mpi_in_place, rcounts(iam + 1), mpi_real8, &
80  buffer, rcounts, disp, mpi_real8, siesta_comm, &
81  mpi_err)
82  END IF
83 
84  END SUBROUTINE
85 
86 !-------------------------------------------------------------------------------
90 !-------------------------------------------------------------------------------
91  SUBROUTINE gather_array2(buffer)
92 
93  IMPLICIT NONE
94 
95 ! Declare Arguments
96  REAL(dp), DIMENSION(:,:), INTENT(inout) :: buffer
97 
98 ! Start of executable code
99  IF (parsolver) THEN
100  CALL mpi_allgatherv(mpi_in_place, rcounts(iam + 1), mpi_real8, &
101  buffer, rcounts, disp, mpi_real8, siesta_comm, &
102  mpi_err)
103  END IF
104 
105  END SUBROUTINE
106 
107 !-------------------------------------------------------------------------------
111 !-------------------------------------------------------------------------------
112  SUBROUTINE gather_array3(buffer)
113  USE nscalingtools, ONLY: mnspcounts, mnspdisps
114 
115  IMPLICIT NONE
116 
117 ! Declare Arguments
118  REAL (dp), DIMENSION(:,:,:), INTENT(inout) :: buffer
119 
120 ! Start of executable code
121  IF (parsolver) THEN
122  CALL mpi_allgatherv(mpi_in_place, mnspcounts(iam + 1), mpi_real8, &
123  buffer, mnspcounts, mnspdisps, mpi_real8, &
124  siesta_comm, mpi_err)
125  END IF
126 
127  END SUBROUTINE
128 
129 !-------------------------------------------------------------------------------
134 !-------------------------------------------------------------------------------
135  SUBROUTINE gathercols(colgath, coltmp)
136 
137  IMPLICIT NONE
138 
139 ! Declare Arguments
140  REAL(dp), DIMENSION(mblk_size), INTENT(out) :: colgath
141  REAL(dp), DIMENSION(mblk_size2), INTENT(in) :: coltmp
142 
143 #if defined(MPI_OPT)
144 ! local variables
145  INTEGER :: icol
146  INTEGER :: icol_mpi
147  INTEGER :: iorder
148  INTEGER :: np
149  INTEGER :: mbloc
150  INTEGER :: imax
151  REAL(dp), DIMENSION(:), ALLOCATABLE :: unordered
152 
153 ! Start of executable code
154  ALLOCATE (unordered(mblk_size))
155 
156 ! Gather to unordered array.
157  CALL mpi_allgatherv(coltmp, mblk_size2, mpi_real8, &
158  unordered, scalcounts, scaldisps, &
159  mpi_real8, siesta_comm, mpi_err)
160  CALL assert(mpi_err.EQ.0, 'MPI ERR IN GatherCols')
161 
162 ! Reorder cyclic permutations and save to colgath
163  iorder = 0
164  imax = 0
165  DO np = 1, nprocs
166  icol_mpi = np - nprocs
167  mbloc = scalcounts(np)
168  DO icol = 1, mbloc
169  icol_mpi = icol_mpi + nprocs
170  imax = max(imax, icol_mpi)
171  iorder = iorder + 1
172  colgath(icol_mpi) = unordered(iorder)
173  END DO
174  END DO
175 
176  CALL assert_eq(iorder, mblk_size, 'iorder != mblk_size in GatherCols')
177  CALL assert_eq(imax, mblk_size, 'imax != mblk_size in GatherCols')
178 
179  DEALLOCATE(unordered)
180 #endif
181  END SUBROUTINE
182 
183  SUBROUTINE inithess
184  INTEGER :: icol, mesh, nsmin, nsmax
185 !-----------------------------------------------
186  IF (.NOT.parsolver) THEN
187  startglobrow = 1; endglobrow = ns
188  END IF
189  mystartblock=startglobrow
190  myendblock=endglobrow
191  mynumblockrows=myendblock-mystartblock+1
192 
193  nsmin = max(1, startglobrow-1); nsmax = min(ns, endglobrow+1)
194  myend = nsmax
195  DO mesh = 1, 3
196  icol = mod(jstart(mesh)-nsmin, 3)
197  IF (icol .LT. 0) icol = icol+3
198  mystart(mesh) = nsmin+icol
199  END DO
200 
201  END SUBROUTINE inithess
202 
203 !-------------------------------------------------------------------------------
210 !-------------------------------------------------------------------------------
211  SUBROUTINE apply_colscale(gc, colscale, nsmin, nsmax)
212 
213  IMPLICIT NONE
214 
215 ! Declare Arguments
216  REAL (dp), DIMENSION(mblk_size,ns), INTENT(inout) :: gc
217  REAL (dp), DIMENSION(mblk_size,ns), INTENT(in) :: colscale
218  INTEGER, INTENT(in) :: nsmin
219  INTEGER, INTENT(in) :: nsmax
220 
221 ! Start of executable code
222  gc(:,nsmin:nsmax) = gc(:,nsmin:nsmax)*colscale(:,nsmin:nsmax)
223 
224  END SUBROUTINE
225 
226  SUBROUTINE compute_hessian_blocks (func, ldiagonal)
227  USE shared_data, ONLY: xc, gc, l_linearize
228 !-----------------------------------------------
229 ! D u m m y A r g u m e n t s
230 !-----------------------------------------------
231  LOGICAL, INTENT(IN) :: ldiagonal
232  EXTERNAL :: func
233 !-----------------------------------------------
234 ! L o c a l V a r i a b l e s
235 !----------------------------------------------
236  INTEGER :: iunit
237  CHARACTER(LEN=3) :: label
238  REAL(dp) :: bsize
239  LOGICAL :: lprint
240 !-----------------------------------------------
241  l_compute_hessian = .true.
242  l_diagonal_only = ldiagonal
243  l_linearize = .true.
244  IF (lcolscale) col_scale = 1
245 
246  IF (iam .EQ. 0) THEN
247  DO iunit = 6, unit_out, unit_out-6
248  IF (.NOT.lverbose .AND. iunit.EQ.6) cycle
249  IF (l_diagonal_only) THEN
250  WRITE (iunit, 90) levmarq_param, mupar
251  ELSE
252  WRITE (iunit, 100) levmarq_param, mupar, asym_index
253  ENDIF
254  END DO
255  CALL flush(unit_out)
256  ENDIF
257 
258  90 FORMAT (/,' Computing diagonal preconditioner - ', &
259  ' LM parameter:',1pe9.2,' mu||:',1pe9.2)
260  100 FORMAT (/,' Computing block preconditioner - ', &
261  ' LM parameter:',1pe9.2,' mu||:',1pe9.2, &
262  ' Asym index:',1pe9.2)
263 
264  lprint = (iam.EQ.0 .AND. hesspass.EQ.0 .AND. lverbose)
265 
266  IF (parsolver) THEN
267  IF (parfunctisl) THEN
268  IF(sksdbg) WRITE(tofu,*) 'Executing Compute_Hessian_Blocks_With_No_Col_Redist'; IF(sksdbg) CALL flush(tofu)
269  IF (lprint) print *,'HESSIAN AND INVERSE CALCULATION' // &
270  ' USING BCYCLIC WITH NO COLUMN REDISTRIBUTION'
271  CALL compute_hessian_blocks_with_no_col_redist (xc, gc, func)
272  ELSE
273  IF (sksdbg) WRITE(tofu,*) 'Executing Compute_Hessian_Blocks_With_Col_Redist'; IF(sksdbg) CALL flush(tofu)
274  IF (lprint) print *,'HESSIAN AND INVERSE CALCULATION' // &
275  ' USING BCYCLIC WITH COLUMN REDISTRIBUTION'
276  CALL compute_hessian_blocks_with_col_redist (xc, gc, func)
277  END IF
278  ELSE
279  IF (sksdbg) WRITE(tofu,*) 'Executing Compute_Hessian_Blocks_Thomas'; CALL flush(tofu)
280  IF (lprint) print *,'HESSIAN AND INVERSE CALCULATION' // &
281  ' USING SERIAL THOMAS ALGORITHM'
282  CALL compute_hessian_blocks_thomas (xc, gc, func)
283  END IF
284 
285  IF (hesspass.EQ.0) THEN
286  bsize = 3*ns*kind(dblk) !3 blocks per row
287  bsize = bsize*real(mblk_size*mblk_size,dp)
288  IF (bsize .LT. 1.e6_dp) THEN
289  bsize = bsize/1.e3
290  label = " Kb"
291  ELSE IF (bsize .lt. 1.e9_dp) THEN
292  bsize = bsize/1.e6_dp
293  label = " Mb"
294  ELSE
295  bsize = bsize/1.e9_dp
296  label = " Gb"
297  END IF
298 
299  IF (iam .EQ. 0) THEN
300  DO iunit = 6, unit_out, unit_out-6
301  IF (.NOT.lverbose .AND. iunit.EQ.6) cycle
302  WRITE (iunit, '(1x,a,i4,a,f6.2,a)') 'Block dim: ', &
303  mblk_size, '^2 Preconditioner size: ', bsize, &
304  trim(label)
305  END DO
306  CALL flush(unit_out)
307  ENDIF
308  END IF
309 
310  l_compute_hessian = .false.
311  l_linearize = .false.
312  hesspass = hesspass+1
313 
314  END SUBROUTINE compute_hessian_blocks
315 
316  SUBROUTINE compute_hessian_blocks_with_no_col_redist (xc, gc, &
317  func)
318 !-----------------------------------------------
319 ! D u m m y A r g u m e n t s
320 !-----------------------------------------------
321  REAL(dp), DIMENSION(mblk_size,ns) :: xc, gc
322 !-----------------------------------------------
323 ! L o c a l V a r i a b l e s
324 !----------------------------------------------
325  REAL(dp), PARAMETER :: zero=0, one=1
326  INTEGER :: n, m, js, mesh, ntype, istat, iunit, icol
327  EXTERNAL :: func
328  INTEGER :: js1
329 
330  REAL(dp) :: starttime, endtime, usedtime
331  REAL(dp) :: colstarttime, colendtime, colusedtime
332  INTEGER :: nsmin, nsmax
333  REAL(dp) :: skston, skstoff, temp
334 !----------------------------------------------
335 
336  starttime=0; endtime=0; usedtime=0;
337  colstarttime=0; colendtime=0; colusedtime=0
338 
339  !------------------------------------------------------------
340  ! COMPUTE (U)pper, (D)iagonal, and (L)ower Block matrices
341  !------------------------------------------------------------
342 
343  CALL second0(ton)
344  eps = sqrt(epsilon(eps))*abs(eps_factor)
345 ! eps = 1.E-3_dp
346 
347  nsmin = max(1, startglobrow-1); nsmax = min(ns, endglobrow+1)
348 
349  ALLOCATE (dataitem(mblk_size), stat=istat)
350  CALL assert(istat.EQ.0,'DataItem allocation failed:')
351  dataitem = 0
352 
353  xc(:,nsmin:nsmax) = 0
354 
355  !----------------------------
356  ! Column generation begins
357  !----------------------------
358  CALL second0(colstarttime)
359 
360  icol=0
361  var_type_lg: DO ntype = 1, ndims
362  var_n_lg: DO n = -ntor, ntor
363  var_m_lg: DO m = 0, mpol
364 
365  icol=icol+1
366 
367  mesh_3pt_lg: DO mesh = 1, 3
368 
369  IF (.NOT.(m.EQ.0 .AND. n.LT.0)) THEN
370  DO js = mystart(mesh), myend(mesh), 3
371  xc(icol,js) = eps
372  END DO
373  END IF
374 
375  inhessian=.true.
376  CALL second0(skston)
377 
378  CALL func
379  CALL second0(skstoff)
380  hessian_funct_island_time=hessian_funct_island_time+(skstoff-skston)
381  inhessian=.false.
382 
383 
384  skip3_mesh_lg: DO js = mystart(mesh), myend(mesh), 3
385 
386  xc(icol, js) = 0
387 
388  !ublk(js-1)
389  js1=js-1
390  IF (startglobrow.LE.js1 .AND. js1.LE.endglobrow) THEN
391  dataitem = gc(:,js1)/eps
392  IF (l_diagonal_only) THEN
393  temp=dataitem(icol)
394  dataitem=0
395  dataitem(icol)=temp
396  END IF
397  CALL setblocktridatastruct(upper,js1,icol,dataitem)
398  END IF
399 
400  !dblk(js)
401  IF (startglobrow.LE.js .AND. js.LE.endglobrow) THEN
402  dataitem = gc(:,js)/eps
403  IF (all(dataitem .EQ. zero)) dataitem(icol) = one
404 
405  IF (l_diagonal_only) THEN
406  temp=dataitem(icol)
407  dataitem=0
408  dataitem(icol)=temp
409  END IF
410 
411  CALL setblocktridatastruct(savediag,js,icol,dataitem)
412  CALL setblocktridatastruct(diag,js,icol,dataitem)
413  END IF
414 
415  !lblk(js+1)
416  js1 = js+1
417  IF (startglobrow.LE.js1 .AND. js1.LE.endglobrow) THEN
418  dataitem = gc(:,js1)/eps
419  IF (l_diagonal_only) THEN
420  temp=dataitem(icol)
421  dataitem=0
422  dataitem(icol)=temp
423  END IF
424  CALL setblocktridatastruct(lower,js1,icol,dataitem)
425  END IF
426  END DO skip3_mesh_lg
427  END DO mesh_3pt_lg
428  END DO var_m_lg
429  END DO var_n_lg
430  END DO var_type_lg
431 
432  CALL second0(colendtime)
433  construct_hessian_time=construct_hessian_time+(colendtime-colstarttime)
434 
435 
436  DEALLOCATE (dataitem, stat=istat)
437  CALL assert(istat.EQ.0,'DataItem deallocation error:')
438 !----------------------------
439 ! Column generation ends
440 !----------------------------
441 
442  IF (lcolscale) THEN
443  CALL applyparallelscaling(levmarq_param, col_scale)
444  ELSE
445  CALL findminmax_tri (levmarq_param)
446  END IF
447 
448  mindiag = dmin_tri
449  maxeigen = maxeigen_tri
450 !
451 ! CHECK CONDITION NUMBER
452 !
453  IF (nprocs .EQ. 1) THEN
454  CALL second0(ton)
455  CALL checkconditionnumber(ns,mblk_size,anorm,rcond,info)
456  CALL second0(toff)
457  IF (info .EQ. 0) THEN
458  print '(1x,3(a,1p,e12.3))','RCOND = ', rcond, &
459  ' ||A|| = ', anorm,' TIME: ', toff-ton
460  END IF
461  END IF
462 
463 !
464 ! CHECK SYMMETRY OF BLOCKS
465 !
466  IF (symmetrycheck) THEN
467  CALL second0(skston)
468  CALL checksymmetry(asym_index)
469  CALL second0(skstoff)
470  asymmetry_check_time=asymmetry_check_time+(skstoff-skston)
471  ENDIF
472 
473  CALL second0(toff)
474  IF (l_diagonal_only) THEN
475  time_diag_prec = time_diag_prec+(toff-ton)
476  ELSE
477  time_block_prec = time_block_prec+(toff-ton)
478  END IF
479 
480 ! IF (l_Diagonal_Only) RETURN !ForwardSolve will be called first time but safer this way!
481 
482  ton = toff
483  skston = ton
484 !
485 !FACTORIZE (Invert) HESSIAN
486 !
487  IF (ALLOCATED(ipiv_blk)) DEALLOCATE(ipiv_blk,stat=istat)
488  CALL forwardsolve
489  CALL second0(toff)
490  skstoff=toff
491  block_factorization_time=block_factorization_time+(skstoff-skston)
492  time_factor_blocks = time_factor_blocks + (toff-ton)
493 
494  IF (iam.EQ.0 .AND. lverbose) &
495  print '(a,1p,e12.3)',' BLOCK FACTORIZATION TIME: ', toff-ton
496 
497  END SUBROUTINE compute_hessian_blocks_with_no_col_redist
498 
499 
500  SUBROUTINE compute_hessian_blocks_with_col_redist (xc, gc, func)
501 !-----------------------------------------------
502 ! D u m m y A r g u m e n t s
503 !-----------------------------------------------
504  REAL(dp), DIMENSION(mblk_size,ns) :: xc, gc
505 !-----------------------------------------------
506 ! L o c a l V a r i a b l e s
507 !----------------------------------------------
508  REAL(dp), PARAMETER :: zero=0, one=1
509  INTEGER :: n, m, js, js1, mesh, ntype, istat, iunit, icol, icolmpi
510  EXTERNAL :: func
511  REAL(dp), ALLOCATABLE, DIMENSION(:) :: SavedDiag
512  INTEGER, ALLOCATABLE, DIMENSION (:) :: sendCount, recvCount
513  REAL(dp), ALLOCATABLE, DIMENSION (:) :: recvBuf
514  INTEGER :: procID
515  REAL(dp) :: starttime, endtime, usedtime
516  REAL(dp) :: colstarttime, colendtime, colusedtime
517 
518  LOGICAL :: PROBEFLAG
519  REAL(dp) :: skston, skstoff, temp
520 
521  INTEGER :: it
522 !----------------------------------------------
523  starttime=zero
524  endtime=zero
525  usedtime=zero
526 !------------------------------------------------------------
527 ! COMPUTE (U)pper, (D)iagonal, and (L)ower Block matrices
528 !------------------------------------------------------------
529 
530  CALL second0(ton)
531 
532  CALL assert(mblk_size.EQ.SIZE(gc,1),'MBLK_SIZE IS WRONG IN HESSIAN')
533 
534  eps = sqrt(epsilon(eps))*abs(eps_factor)
535 
536  ALLOCATE (dataitem(mblk_size), stat=istat)
537  CALL assert(istat.EQ.0,'DataItem allocation failed!')
538  dataitem = 0
539  ALLOCATE (saveddiag(mblk_size), stat=istat)
540  CALL assert(istat.EQ.0,'SavedDiag allocation failed!')
541  saveddiag = zero
542 
543  icolmpi = 0
544  icol=0
545  xc = 0
546 
547  IF(.NOT.ALLOCATED(sendcount)) ALLOCATE(sendcount(nprocs))
548  IF(.NOT.ALLOCATED(recvcount)) ALLOCATE(recvcount(nprocs))
549  IF(.NOT.ALLOCATED(recvbuf)) ALLOCATE(recvbuf(packsize))
550  sendcount=0
551  recvcount=0
552  nrecd=0
553  totrecvs=0
554  probeflag=.true.
555 
556  CALL second0(colstarttime)
557  var_type: DO ntype = 1, ndims
558  var_n: DO n = -ntor, ntor
559  var_m: DO m = 0, mpol
560 
561  icol=icol+1
562  IF(mod(icol-1,nprocs)==iam) THEN
563  icolmpi = icolmpi+1
564 
565  mesh_3pt: DO mesh = 1, 3
566 
567  IF (.NOT.(m.EQ.0 .AND. n.LT.0)) THEN
568  DO js = jstart(mesh), ns, 3
569  xc(icol,js) = eps
570  END DO
571  END IF
572 
573  inhessian=.true.
574  CALL second0(skston)
575  CALL func
576  CALL second0(skstoff)
577  hessian_funct_island_time=hessian_funct_island_time+(skstoff-skston)
578  inhessian=.false.
579 
580  skip3_mesh: DO js = jstart(mesh), ns, 3
581 
582  xc(icol,js) = 0
583 
584  !ublk(js-1)
585  js1 = js-1
586  IF (js1 .gt. 0) THEN
587  dataitem = gc(:,js1)/eps
588  !m=0 constraint for n<0
589  IF (m.eq.0 .and. n.lt.0) dataitem = 0
590  IF (l_diagonal_only) THEN
591  temp=dataitem(icol)
592  dataitem=0
593  dataitem(icol)=temp
594  END IF
595  istat = 1
596  CALL receive(probeflag)
597  CALL send(dataitem,js1,istat,icol,procid)
598  IF(procid-1.NE.iam) sendcount(procid) = sendcount(procid)+1
599  CALL receive(probeflag)
600  END IF !js>1
601 
602  !dblk(js)
603  dataitem = gc(:,js)/eps
604  IF (m.eq.0 .and. n.lt.0) dataitem = 0
605  IF (all(dataitem .EQ. zero)) dataitem(icol) = one
606 
607 
608  IF (l_diagonal_only) THEN
609  temp=dataitem(icol)
610  dataitem=0
611  dataitem(icol)=temp
612  END IF
613  saveddiag=dataitem; istat=4
614  CALL receive(probeflag)
615  CALL send(saveddiag,js,istat,icol,procid)
616  IF(procid-1.NE.iam) sendcount(procid) = sendcount(procid)+1
617  CALL receive(probeflag)
618 ! Boundary condition at js=1 and ns (CATCH ALL OF THEM HERE)
619 ! and m=0,n<0: NEED THIS to avoid (near) singular Hessian
620 ! ASSUMES DIAGONALS ARE ALL NEGATIVE
621  istat = 2; js1 = js
622  CALL receive(probeflag)
623  CALL send(dataitem,js1,istat,icol,procid)
624  IF(procid-1.NE.iam) sendcount(procid) = sendcount(procid)+1
625  CALL receive(probeflag)
626 
627  !lblk(js+1)
628  js1 =js+1
629  IF (js .lt. ns) THEN
630  dataitem = gc(:,js1)/eps
631  !m=0 constraint for n<0
632  IF (m.eq.0 .and. n.lt.0) dataitem=0
633  IF (l_diagonal_only) THEN
634  temp=dataitem(icol)
635  dataitem=0
636  dataitem(icol)=temp
637  END IF
638  istat = 3
639  CALL receive(probeflag)
640  CALL send(dataitem,js1,istat,icol,procid)
641  IF(procid-1.NE.iam) sendcount(procid) = sendcount(procid)+1
642  CALL receive(probeflag)
643  END IF !JS < NS
644 
645  END DO skip3_mesh
646 
647  END DO mesh_3pt
648  ENDIF
649  END DO var_m
650  END DO var_n
651  END DO var_type
652  CALL second0(colendtime)
653  colusedtime=colendtime-colstarttime
654 
655  construct_hessian_time=construct_hessian_time+(colendtime-colstarttime)
656 
657  CALL second0(skston)
658  IF (parsolver) THEN
659 
660  probeflag=.NOT.probeflag
661 
662  CALL mpi_alltoall(sendcount,1,mpi_integer,recvcount,1, &
663  mpi_integer,siesta_comm,mpi_err)
664  totrecvs=0
665  DO js=1,nprocs,1
666  totrecvs=totrecvs+recvcount(js)
667  END DO
668 
669  DO WHILE (nrecd.LT.totrecvs)
670  CALL receive(probeflag)
671  END DO
672  END IF
673 
674  DEALLOCATE (dataitem, saveddiag, stat=istat)
675  CALL assert(istat.EQ.0,'DataItem deallocation error!')
676 
677  CALL second0(skstoff)
678  construct_hessian_time=construct_hessian_time+(skstoff-skston)
679  CALL mpi_barrier(siesta_comm, mpi_err)
680 
681 !
682 ! CHECK SYMMETRY OF BLOCKS
683 !
684  IF (symmetrycheck) THEN
685  IF (parsolver) THEN
686  CALL second0(skston)
687  CALL checksymmetry(asym_index)
688  CALL second0(skstoff)
689  asymmetry_check_time=asymmetry_check_time+(skstoff-skston)
690  ENDIF
691  ENDIF
692 
693 !
694 ! FACTORIZE (Invert) HESSIAN
695 !
696  CALL second0(toff)
697  IF (l_diagonal_only) THEN
698  time_diag_prec = time_diag_prec+(toff-ton)
699  ELSE
700  time_block_prec = time_block_prec+(toff-ton)
701  END IF
702 
703  ton=toff
704  starttime=ton
705  skston=ton
706  CALL forwardsolve
707  CALL second0(toff)
708  endtime=toff
709  skstoff=toff
710 
711  block_factorization_time=block_factorization_time+(skstoff-skston)
712  time_factor_blocks = time_factor_blocks + (toff-ton)
713  usedtime=endtime-starttime
714 
715  IF (iam.EQ.0 .AND. lverbose) &
716  print '(a,1p,e12.3)',' BLOCK FACTORIZATION TIME: ', toff-ton
717 
718  END SUBROUTINE compute_hessian_blocks_with_col_redist
719 
720 
721  SUBROUTINE compute_hessian_blocks_thomas (xc, gc, func)
722 !-----------------------------------------------
723 ! D u m m y A r g u m e n t s
724 !-----------------------------------------------
725  REAL(dp), DIMENSION(mblk_size,ns), INTENT(INOUT) :: xc, gc
726 !-----------------------------------------------
727 ! L o c a l V a r i a b l e s
728 !----------------------------------------------
729  REAL(dp), PARAMETER :: zero=0, one=1
730  INTEGER :: n, m, js, js1, mesh, ntype, istat, iunit, icol, icolmpi
731  REAL(dp), ALLOCATABLE, DIMENSION(:,:) :: gc1
732  REAL(dp) :: skston, skstoff, temp
733  EXTERNAL :: func
734 #if defined(MPI_OPT)
735  INTEGER :: neqtotal, numroc
736  REAL(dp) :: starttime, endtime, usedtime
737  REAL(dp) :: colstarttime, colendtime, colusedtime
738  EXTERNAL :: numroc
739 !------------------------------------------------------------
740 
741  starttime=0; endtime=0; usedtime=0
742  colstarttime=0; colendtime=0; colusedtime=0
743 #endif
744 
745 !------------------------------------------------------------
746 ! COMPUTE (U)pper, (D)iagonal, and (L)ower Block matrices
747 !------------------------------------------------------------
748  CALL second0(ton)
749 
750  eps = sqrt(epsilon(eps))*abs(eps_factor)
751 
752  IF (ALLOCATED(ublk)) DEALLOCATE(ublk, dblk, lblk, stat=istat)
753  IF (lscalapack) THEN
754 #if defined(MPI_OPT)
755  CALL blacs_gridinfo(icontxt_1xp,nprow,npcol,myrow,mycol)
756  mb = mblk_size
757  nb = 1
758 
759  rsrc = 0
760  csrc = 0
761  locq = numroc( mblk_size, nb, mycol, csrc, npcol )
762  locp = numroc( mblk_size, mb, myrow, rsrc, nprow )
763  mblk_size2=max(1,locq)
764  lld = max(1,locp)
765  call descinit(desca_1xp,mblk_size,mblk_size,mb,nb,rsrc,csrc, &
766  icontxt_1xp,lld,info)
767  if (info.NE.0) then
768  write(*,*) 'myrow,mycol,nprow,npcol,desc(LLD_),lld ', &
769  myrow,mycol,nprow,npcol,desca_1xp(lld_),lld
770  write(*,*) 'Locp,m,mb ', locp,mblk_size,mb
771  endif
772 
773  call assert(info.eq.0,'descinit descA_1xp')
774  ineed = max(1,lld*locq)
775  mm = mblk_size*ns
776  mb=mm
777  nb=nrhs1
778  locqr = numroc( nrhs1, nb, mycol,csrc,npcol)
779  locpr = numroc( mm, mb, myrow,rsrc,nprow)
780  lld = max(1,locpr)
781  call descinit(descr_all,mm,nrhs1, mb,nb,rsrc,csrc,icontxt,lld,info)
782  call assert(info.eq.0,'test_pdtrd:descinit return info != 0')
783 
784  call blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol)
785 
786  mb = 50
787  nb = mb
788 
789  rsrc = 0
790  csrc = 0
791  locq = numroc( mblk_size, nb, mycol, csrc, npcol )
792  locp = numroc( mblk_size, mb, myrow, rsrc, nprow )
793 
794  lld = max(1,locp)
795  call descinit(desca,mblk_size,mblk_size,mb,nb,rsrc,csrc,icontxt,lld,info)
796  call assert(info.eq.0,'descinit descA')
797  ineed = max(1,lld*locq)
798  IF (ALLOCATED(ublkp)) DEALLOCATE(ublkp, dblkp, lblkp, stat=istat)
799  IF (.NOT. ALLOCATED(ublkp)) THEN
800  ALLOCATE(ublkp(ineed,ns-1), &
801  dblkp(ineed,ns), &
802  lblkp(ineed,ns-1), &
803  stat=istat)
804  CALL assert(istat.EQ.0,'Not enough memory to store a single block!')
805  END IF
806 
807  ALLOCATE(ublk(mblk_size,mblk_size2,ns), &
808  dblk(mblk_size,mblk_size2,ns), &
809  lblk(mblk_size,mblk_size2,ns), &
810  stat=istat)
811  CALL assert(istat.EQ.0,'Not enough memory to store a single block!')
812 #else
813  CALL assert(.false.,'LSCALAPACK=T BUT NOT MPI!')
814 #endif
815  ELSE !.NOT.LSCALAPACK
816 
817  mblk_size2 = mblk_size
818  ALLOCATE(ublk(mblk_size,mblk_size,ns), &
819  dblk(mblk_size,mblk_size,ns), &
820  lblk(mblk_size,mblk_size,ns), &
821  stat=istat)
822 
823  END IF !END LSCALAPACK
824 
825  IF (ALLOCATED(ublk)) THEN
826  ublk = 0; dblk = 0; lblk = 0
827  END IF
828 
829  ALLOCATE (dataitem(mblk_size), stat=istat)
830  CALL assert(istat.EQ.0,'DataItem allocation failed!')
831  dataitem = 0
832  xc = 0
833 
834 #if defined(MPI_OPT)
835  icolmpi = 0
836 #endif
837  icol=0
838 
839  CALL second0(skston)
840  CALL func
841  CALL second0(skstoff)
842  hessian_funct_island_time=hessian_funct_island_time+(skstoff-skston)
843 
844 #if defined(MPI_OPT)
845  CALL second0(colstarttime) !Added by SKS for timing comparisons
846 #endif
847  var_type: DO ntype = 1, ndims
848  var_n: DO n = -ntor, ntor
849  var_m: DO m = 0, mpol
850 
851  icol=icol+1
852 #if defined(MPI_OPT)
853  IF(mod(icol-1,nprocs)==iam .OR. .NOT.lscalapack) THEN
854  IF (lscalapack) THEN
855  icolmpi = icolmpi+1
856  ELSE
857  icolmpi = icol
858  END IF
859 #else
860  icolmpi = icol
861 #endif
862  mesh_3pt: DO mesh = 1, 3
863 
864  IF (.NOT.(m.EQ.0 .AND. n.LT.0)) THEN
865  DO js = jstart(mesh), ns, 3
866  xc(icol,js) = eps
867  END DO
868  END IF
869 
870  inhessian=.true.
871  CALL second0(skston)
872  CALL func
873  CALL second0(skstoff)
874  hessian_funct_island_time=hessian_funct_island_time+(skstoff-skston)
875 
876  inhessian=.false.
877 #if defined(MPI_OPT)
878 #endif
879 
880 !OFF FOR l_linearize=T gc = gc-gc1
881 
882 ! COMPUTE PRECONDITIONER (HESSIAN) ELEMENTS. LINEARIZED EQUATIONS
883 ! OF TRI-DIAGONAL (IN S) FORM
884 !
885 ! F(j-1) = a(j-1)x(j-2) + d(j-1)x(j-1) + b(j-1)x(j)
886 ! F(j) = a(j)x(j-1) + d(j) x(j) + b(j) x(j+1)
887 ! F(j+1) = a(j+1)x(j) + d(j+1)x(j+1) + b(j+1)x(j+2)
888 !
889 ! HESSIAN IS H(k,j) == dF(k)/dx(j); aj == lblk; dj == dblk; bj = ublk
890 
891  skip3_mesh: DO js = jstart(mesh), ns, 3
892 
893  xc(icol,js) = 0
894 
895 !FORCE RESPONSE AT mp,np,nptype TO m,n,js,ntype VELOCITY PERTURBATION
896 
897  !ublk(js-1)
898  js1 = js-1
899  IF (js1 .GT. 0) THEN
900  dataitem = gc(:,js1)/eps
901 
902  IF (l_diagonal_only) THEN
903  temp=dataitem(icol)
904  dataitem=0
905  dataitem(icol)=temp
906  END IF
907  ublk(:,icolmpi,js1) = dataitem
908  END IF
909 
910  !dblk(js)
911  dataitem = gc(:,js)/eps
912  IF (all(dataitem .EQ. zero)) dataitem(icol) = one
913 
914  IF (l_diagonal_only) THEN
915  temp=dataitem(icol)
916  dataitem=0
917  dataitem(icol)=temp
918  END IF
919 
920  dblk(:,icolmpi,js) = dataitem
921 
922  !lblk(js+1)
923  js1=js+1
924  IF (js .LT. ns) THEN
925  dataitem = gc(:,js1)/eps
926  IF (l_diagonal_only) THEN
927  temp=dataitem(icol)
928  dataitem=0
929  dataitem(icol)=temp
930  ENDIF
931  lblk(:,icolmpi,js1) = dataitem
932  END IF
933 
934  END DO skip3_mesh
935  END DO mesh_3pt
936 #if defined(MPI_OPT)
937  ENDIF
938 #endif
939  END DO var_m
940  END DO var_n
941  END DO var_type
942 
943  CALL second0(toff)
944 
945  IF (l_diagonal_only) THEN
946  time_diag_prec = time_diag_prec+(toff-ton)
947  ELSE
948  time_block_prec = time_block_prec+(toff-ton)
949  END IF
950 
951 !SPH 071416 ADDED COLUMN SCALING OPTION
952  IF (lcolscale) THEN
953  CALL serialscaling
954  ELSE
955  CALL findminmax_tri_serial
956  END IF
957 
958  CAll second0(ton)
959  colendtime = ton
960  colusedtime=colendtime-colstarttime
961  time_generate_blocks=time_generate_blocks+colusedtime
962  construct_hessian_time=construct_hessian_time+(colendtime-colstarttime)
963 
964  DEALLOCATE (dataitem, stat=istat)
965  CALL assert(istat.EQ.0,'DataItem deallocation error!')
966 
967  IF (nprocs .EQ. 1) THEN
968 
969 !
970 !SYMMETRIZE BLOCKS: REWRITE FOR nprocs>1 USING SCALAPACK (only works now for mblk_size2=mblk_size)
971 !
972  IF (.NOT.l_hess_sym .OR. nprocs.GT.1) GOTO 1000
973  ALLOCATE(gc1(mblk_size,ns), dblk_s(mblk_size, mblk_size2,ns))
974 
975  DO icol = 1, mblk_size2
976  gc1(:,1:nsh) = (ublk(icol,:,1:nsh) + lblk(:,icol,2:ns))/2
977  ublk(icol,:,1:nsh) = gc1(:,1:nsh)
978  lblk(:,icol,2:ns) = gc1(:,1:nsh)
979  dblk_s(:,icol,:) = (dblk(icol,:,:) + dblk(:,icol,:))/2
980  END DO
981 
982  dblk = dblk_s
983 
984  DEALLOCATE (gc1, dblk_s)
985  1000 CONTINUE
986 
987 !
988 !CHECK CONDITION NUMBER
989 !
990  CALL second0(ton)
991  CALL checkeigenvalues_serial(ns, mblk_size)
992  CALL checkconditionnumber_serial(ns,mblk_size,anorm,rcond,info)
993  CALL second0(toff)
994  IF (info .EQ. 0) THEN
995  print '(1x,3(a,1p,e12.3))','RCOND = ', rcond, &
996  ' ||A|| = ', anorm,' TIME: ', toff-ton
997  END IF
998  END IF
999 
1000 !
1001 !CHECK SYMMETRY OF BLOCKS
1002 !
1003  CALL second0(skston)
1004  IF (lscalapack) THEN
1005  CALL check3d_symmetry(dblk,lblk,ublk,dblkp,lblkp,ublkp, &
1006  mblk_size,mblk_size2,ns,asym_index)
1007  ELSE
1008  CALL check3d_symmetry_serial(dblk,lblk,ublk,mpol,ntor, &
1009  mblk_size,ns,asym_index)
1010  END IF
1011  CALL second0(skstoff)
1012  asymmetry_check_time=asymmetry_check_time+(skstoff-skston)
1013 
1014  900 CONTINUE
1015 
1016 ! IF (l_Diagonal_Only) RETURN
1017 
1018 
1019 ! l_backslv = (nprocs.EQ.1 .AND. nprecon.GE.2) !Controls when dump is made
1020  IF (l_backslv) THEN
1021  istat = 0
1022  IF (.NOT.ALLOCATED(dblk_s)) &
1023  ALLOCATE(ublk_s(mblk_size,mblk_size,ns), &
1024  dblk_s(mblk_size,mblk_size,ns), &
1025  lblk_s(mblk_size,mblk_size,ns), &
1026  stat=istat)
1027  CALL assert(istat.EQ.0,'Allocation error2 in Compute_Hessian_Blocks')
1028  ublk_s = ublk; dblk_s = dblk; lblk_s = lblk
1029  END IF
1030 !
1031 ! FACTORIZE (Invert) HESSIAN
1032 !
1033  IF (ALLOCATED(ipiv_blk)) DEALLOCATE(ipiv_blk,stat=istat)
1034  CALL second0(skston)
1035  IF (lscalapack) THEN
1036 #if defined(MPI_OPT)
1037  ineed = numroc(desca(m_),desca(mb_),0,0,nprow) + desca(mb_)
1038  ALLOCATE( ipiv_blk(ineed, ns), stat=istat )
1039  CALL assert(istat.EQ.0,'ipiv_blk allocation error2 in Compute_Hessian_Blocks')
1040  CALL blk3d_parallel(dblk,lblk,ublk,dblkp,lblkp,ublkp,mblk_size,mblk_size2,ns)
1041  CALL blacs_gridinfo(icontxt,nprow,npcol,myrow,mycol)
1042  mm = mblk_size*ns
1043  locqr = numroc( nrhs1, nb, mycol,csrc,npcol)
1044  locpr = numroc( mm, mb, myrow,rsrc,nprow)
1045  lld = max(1,locpr)
1046  CALL descinit(descx,mm,nrhs1, mb,nb,rsrc,csrc,icontxt,lld,info)
1047  CALL assert(info.eq.0,'test_pdtrd:descinit return info!=0')
1048  descr(:) = descx
1049 
1050  ineedr = max(1,lld*locqr)
1051 
1052  CALL blacs_barrier(icontxt,'All')
1053  CALL profstart('factor call:123')
1054  CALL pdtrdf(mblk_size,ns,dblkp,lblkp,ublkp,ipiv_blk,desca)
1055  CALL blacs_barrier(icontxt,'All')
1056  CALL profend('factor call:123')
1057 #else
1058  CALL assert(.false.,'LSCALAPACK=T BUT NOT IN MPI!')
1059 #endif
1060  ELSE !.NOT. LSCALAPACK
1061  ALLOCATE (ipiv_blk(mblk_size,ns), stat=istat)
1062  CALL assert(istat.EQ.0, 'ipiv_blk allocation error2 in Compute_Hessian_Blocks')
1063  CALL blk3d_factor(dblk, lblk, ublk, ipiv_blk, mblk_size, ns)
1064  END IF ! LSCALAPACK
1065 
1066  CALL second0(skstoff)
1067  block_factorization_time=block_factorization_time+(skstoff-skston)
1068  CALL second0(toff)
1069  time_factor_blocks = time_factor_blocks + (toff-ton)
1070 
1071  IF (iam.EQ.0 .AND. lverbose) &
1072  print '(a,1p,e12.3)',' BLOCK FACTORIZATION TIME: ', toff-ton
1073 
1074  END SUBROUTINE compute_hessian_blocks_thomas
1075 
1076 
1077  SUBROUTINE serialscaling
1078  USE shared_data, ONLY: lequi1, niter
1079 !-----------------------------------------------
1080 ! L o c a l V a r i a b l e s
1081 !----------------------------------------------
1082  REAL(dp), PARAMETER :: zero=0, one=1
1083  INTEGER :: js, icol, icount
1084  REAL(dp) :: eps, minScale, ton, toff, colcnd
1085  REAL(dp), ALLOCATABLE :: col2(:), coltmp(:), col0(:,:)
1086 !----------------------------------------------
1087  CALL assert(all(col_scale.EQ.1), 'COL_SCALE != 1 INITIALLY IN SERIAL SCALING')
1088  icount = 0
1089 
1090  CALL second0(ton)
1091 
1092  IF (lequi1) THEN
1093  minscale = 1.e-5_dp
1094  ELSE
1095  minscale = 1.e-8_dp
1096  END IF
1097 
1098  ALLOCATE(colsum(mblk_size, ns), col2(mblk_size), &
1099  coltmp(mblk_size2), col0(mblk_size2, ns))
1100 
1101  111 CONTINUE
1102 
1103  block_row1: DO js = 1, ns
1104  cols1: DO icol = 1, mblk_size2
1105  col2 = abs(dblk(:,icol,js))
1106  IF (lequi1) THEN
1107  coltmp(icol) = sum(col2)
1108  ELSE
1109  coltmp(icol) = maxval(col2)
1110  END IF
1111  IF (js .GT. 1) THEN
1112  col2 = abs(ublk(:,icol,js-1))
1113  IF (lequi1) THEN
1114  coltmp(icol) = coltmp(icol) + sum(col2)
1115  ELSE
1116  coltmp(icol) = max(coltmp(icol),maxval(col2))
1117  END IF
1118  END IF
1119  IF (js .LT. ns) THEN
1120  col2 = abs(lblk(:,icol,js+1))
1121  IF (lequi1) THEN
1122  coltmp(icol) = coltmp(icol) + sum(col2)
1123  ELSE
1124  coltmp(icol) = max(coltmp(icol),maxval(col2))
1125  END IF
1126  END IF
1127  END DO cols1
1128  eps = minscale*maxval(coltmp) !need ALL(DataItem.eq.zero) check
1129  CALL assert(eps.GT.zero,' coltmp == 0 in SerialScaling')
1130  coltmp = max(coltmp, eps)
1131  coltmp = one/sqrt(coltmp)
1132  col0(:,js) = coltmp !save original ordering for col factors
1133 
1134 !gather colsum onto mblk_size from nprocs (each with mblk_size2 cols)
1135 !and reorder ROW indices
1136  IF (lscalapack) THEN
1137  CALL gathercols(colsum(:,js), coltmp)
1138  ELSE
1139  colsum(:,js) = coltmp
1140  END IF
1141 
1142  ENDDO block_row1
1143 
1144  CALL vector_copy_ser (colsum, col_scale)
1145 
1146 !SCALE BLOCKS
1147  block_row2: DO js = 1, ns
1148  cols2: DO icol = 1, mblk_size2
1149  dblk(:,icol,js) = colsum(:,js)*dblk(:,icol,js)*col0(icol,js)
1150  IF (js .GT. 1) THEN
1151  lblk(:,icol,js) = colsum(:,js)*lblk(:,icol,js)*col0(icol,js-1)
1152  END IF
1153  IF (js .LT. ns) THEN
1154  ublk(:,icol,js) = colsum(:,js)*ublk(:,icol,js)*col0(icol,js+1)
1155  END IF
1156  END DO cols2
1157  END DO block_row2
1158 
1159 !MAXIMUM COLUMN VARIATIONS (like an inverse "Condition number")
1160  colcnd = maxval(colsum)
1161  IF (colcnd .NE. zero) THEN
1162  colcnd = abs(minval(colsum)/colcnd)
1163  ELSE
1164  colcnd = 1
1165  END IF
1166 
1167  icount = icount + 1
1168 
1169 ! IF (icount.LE.6 .AND. colcnd.LT.0.01_dp) GO TO 111 !dgeequ logic
1170 
1171  DEALLOCATE(colsum, col0, col2, coltmp)
1172 
1173  CALL findminmax_tri_serial
1174  CALL second0(toff)
1175  IF (iam.EQ.0 .AND. lverbose) print '(1x,3(a,1p,e10.3))', &
1176  'COLUMN-SCALING TIME: ', toff-ton, ' COLCND: ', colcnd, &
1177  ' DMINL: ', dmin_tri*abs(levmarq_param)
1178 
1179 
1180  END SUBROUTINE serialscaling
1181 
1183  SUBROUTINE findminmax_tri_serial
1184 !-----------------------------------------------
1185 ! L o c a l V a r i a b l e s
1186 !----------------------------------------------
1187  REAL(dp), PARAMETER :: minThreshold=1.e-6, zero=0
1188  INTEGER :: js, icol, jrow
1189  REAL(dp) :: eps, DminL, temp(2), sign_diag=-1
1190 !----------------------------------------------
1191  maxeigen = 0
1192 
1193  dmin_tri = huge(dmin_tri)
1194 
1195  block_row3: DO js = 1, ns
1196  mindiag = 0
1197  jrow = 1+iam
1198  cols1: DO icol = 1, mblk_size2
1199  eps = dblk(jrow,icol,js)
1200  IF (abs(eps) .GT. mindiag) THEN
1201  mindiag = abs(eps)
1202  sign_diag = eps/mindiag
1203  END IF
1204  maxeigen = max(maxeigen, abs(lblk(jrow,icol,js)) &
1205  + abs(dblk(jrow,icol,js)) + abs(ublk(jrow,icol,js)))
1206 
1207  jrow = jrow + nprocs
1208  END DO cols1
1209 
1210 #if defined(MPI_OPT)
1211  IF (lscalapack) THEN
1212  temp(1) = mindiag; temp(2) = maxeigen
1213  CALL mpi_allreduce(mpi_in_place,temp,1,mpi_real8, mpi_max, &
1214  siesta_comm, mpi_err)
1215  mindiag = temp(1); maxeigen = temp(2)
1216  END IF
1217 #endif
1218  mindiag = minthreshold*mindiag
1219  IF (mindiag .EQ. zero) mindiag = minthreshold
1220 
1221  jrow = 1+iam
1222  cols2: DO icol = 1, mblk_size2
1223  eps = dblk(jrow,icol,js)
1224  IF (abs(eps) .LE. mindiag) THEN
1225  dblk(jrow,icol,js) = sign_diag*mindiag
1226  ELSE IF (js .LT. ns) THEN
1227  dmin_tri = min(max(1.e-12_dp,abs(eps)), dmin_tri)
1228  END IF
1229  jrow = jrow + nprocs
1230  END DO cols2
1231  END DO block_row3
1232 
1233 #if defined(MPI_OPT)
1234  IF (lscalapack) THEN
1235  dmin_tri = abs(dmin_tri)
1236  CALL mpi_allreduce(mpi_in_place,dmin_tri,1,mpi_real8, mpi_min, &
1237  siesta_comm, mpi_err)
1238  END IF
1239 #endif
1240  mindiag = sign_diag*dmin_tri
1241  dminl = abs(levmarq_param)*mindiag
1242 
1243 !SPH061016: RESTORE LM PARAMETER
1244  DO js = 1, ns
1245  jrow = 1+iam
1246  cols3: DO icol = 1, mblk_size2
1247  eps = dblk(jrow,icol,js)
1248  CALL assert(sign_diag*eps.GT.zero, &
1249  ' EPS*SIGN_DIAG < 0 IN FindMinMax_TRI_Serial')
1250  IF (lcolscale) THEN
1251  dblk(jrow,icol,js) = eps + sign(dminl,eps)
1252  ELSE
1253  dblk(jrow,icol,js) = eps*(1+abs(levmarq_param))
1254  END IF
1255  jrow = jrow + nprocs
1256  END DO cols3
1257  END DO
1258 
1259  CALL second0(toff)
1260 
1261  END SUBROUTINE findminmax_tri_serial
1262 
1263 
1264  SUBROUTINE vector_copy_ser(colsum, colscale)
1265 !-----------------------------------------------
1266 ! D u m m y A r g u m e n t s
1267 !-----------------------------------------------
1268  REAL(dp), INTENT(IN) :: COLSUM(mblk_size,ns)
1269  REAL(dp), INTENT(OUT) :: COLSCALE(mblk_size,ns)
1270 !-----------------------------------------------
1271 
1272  colscale = colsum * colscale
1273 
1274  END SUBROUTINE vector_copy_ser
1275 
1276 
1277  SUBROUTINE checkeigenvalues_serial(nblock, bsize)
1278 !-----------------------------------------------
1279 ! D u m m y A r g u m e n t s
1280 !-----------------------------------------------
1281  INTEGER, INTENT(IN) :: nblock, bsize
1282 !-----------------------------------------------
1283  INTEGER, SAVE :: nCount=0
1284  CHARACTER*1, PARAMETER :: JOBZ='N', uplo='U'
1285  INTEGER :: KD, N, LWORK, LDZ, OFFK, OFFS, OFFSU, &
1286  rowmin, rowmax, i, j, icol, js, ldab
1287  REAL(dp), ALLOCATABLE, DIMENSION(:) :: W, ACOL, WORK
1288  REAL(dp), ALLOCATABLE, DIMENSION(:,:) :: AB, Z, BAND
1289  REAL(dp) :: ton, toff
1290 !-----------------------------------------------
1291  ncount = ncount+1
1292 
1293  IF (ncount .NE. 17) RETURN
1294  CALL second0(ton)
1295 
1296  kd = 2*bsize-1
1297  n = nblock*bsize; ldab = kd+1; ldz = 1
1298  lwork = max(1,3*n-2)
1299  offk = kd+1
1300  offs = 0
1301  offsu = bsize
1302 
1303  ALLOCATE (w(n), acol(n), work(lwork), ab(ldab,n), stat=i)
1304  CALL assert(i.EQ.0,'Allocation error in CheckEigenvalues_Serial')
1305  IF (jobz .NE. 'N') ALLOCATE(z(ldz,n))
1306  ab = 0 !stores (upper) bands (A assumed symmetric)
1307 
1308 !compute each col (labelled "j") for all rows ACOL(1:n1)
1309  DO js = 1, nblock
1310  DO icol = 1, bsize
1311  j = offs + icol
1312  rowmin = offs-bsize+1; rowmax = offsu+bsize
1313  IF (js .GT. 1) &
1314  acol(rowmin:offs) = ublk(:,icol,js-1)
1315  IF (js .LT. nblock) &
1316  acol(offsu+1:rowmax) = lblk(:,icol,js+1)
1317  acol(offs+1:offsu) = dblk(:,icol,js)
1318 
1319  DO i = max(1,j-kd,rowmin), min(j,n,rowmax)
1320  ab(offk+i-j,j) = acol(i)
1321  END DO
1322 
1323  END DO
1324 
1325  offs = offsu
1326  offsu = offsu + bsize
1327 
1328  END DO
1329 
1330 !real SYMMETRIC band matrix
1331  CALL dsbev( jobz, uplo, n, kd, ab, ldab, w, z, ldz, work, info )
1332 
1333  DEALLOCATE (ab, acol)
1334 
1335  DO j = 1, n
1336  WRITE (4000+ncount, '(i5,1p,2e12.4)') j, w(j), w(j)/w(1)
1337  END DO
1338  CALL flush(4000+ncount)
1339 
1340  DEALLOCATE (w)
1341  IF (jobz .NE. 'N') DEALLOCATE(z)
1342 
1343  CALL second0(toff)
1344  print 100,' Eigenvalue TIME: ', (toff-ton), ' INFO: ', info, &
1345  ' Eigenvalues written to FORT.',4000+ncount
1346  100 FORMAT(a,1p,e10.2,2(a,i4))
1347 
1348  END SUBROUTINE checkeigenvalues_serial
1349 
1350 
1351  SUBROUTINE checkconditionnumber_serial(nblock,bsize,anorm,rcond,info)
1352 !-----------------------------------------------
1353 ! D u m m y A r g u m e n t s
1354 !-----------------------------------------------
1355  INTEGER, INTENT(IN) :: nblock, bsize
1356  INTEGER, INTENT(OUT) :: info
1357  REAL(dp), INTENT(OUT) :: anorm, rcond
1358 !-----------------------------------------------
1359  REAL(dp), PARAMETER :: zero=0, one=1
1360  CHARACTER(LEN=1),PARAMETER :: CHNORM='1' !1 (1-norm) or 'I' for infinity norm
1361  INTEGER :: N1, JS, ICOL, ISTAT, OFFS, OFFSU, I, J, KU, KL, &
1362  ldab, offk, rowmin, rowmax
1363  INTEGER, ALLOCATABLE :: IWORK(:), IPIV(:)
1364  REAL(dp), ALLOCATABLE :: WORK(:), ACOL(:), AB(:,:)
1365  REAL(dp) :: ROWCND, COLCND, AMAX
1366 !-----------------------------------------------
1367  n1 = bsize*nblock
1368 
1369  ALLOCATE (acol(n1), stat=istat)
1370  CALL assert(istat.EQ.0,'COND # ALLOCATION1 FAILED IN HESSIAN')
1371 
1372  ku = 2*bsize-1
1373  kl = 2*bsize-1
1374  ldab = 2*kl+ku+1
1375  offk = kl+ku+1
1376 
1377  ALLOCATE(ab(ldab, n1), ipiv(n1), iwork(n1), work(3*n1), &
1378  stat=istat)
1379  CALL assert(istat.EQ.0,'COND # ALLOCATION2 FAILED IN HESSIAN')
1380 
1381  offs = 0
1382  offsu = bsize
1383  anorm = 0
1384  ab = 0 !stores bands
1385 
1386 !compute each col (labelled "j") for all rows ACOL(1:n1)
1387  DO js = 1, nblock
1388  DO icol = 1, bsize
1389  j = offs + icol
1390  rowmin = offs-bsize+1; rowmax = offsu+bsize
1391  IF (js .GT. 1) &
1392  acol(rowmin:offs) = ublk(:,icol,js-1)
1393  IF (js .LT. nblock) &
1394  acol(offsu+1:rowmax) = lblk(:,icol,js+1)
1395  acol(offs+1:offsu) = dblk(:,icol,js)
1396 
1397  IF (js.EQ.1) THEN
1398  rowmin = offs+1
1399  ELSE IF (js.EQ.nblock) THEN
1400  rowmax = offsu
1401  END IF
1402  anorm = max(anorm, sum(abs(acol(rowmin:rowmax))))
1403 
1404  DO i = max(1,j-ku,rowmin), min(n1,j+kl,rowmax)
1405  ab(offk+i-j,j) = acol(i)
1406  END DO
1407 
1408  END DO
1409 
1410  offs = offsu
1411  offsu = offsu + bsize
1412 
1413  END DO
1414 
1415  DEALLOCATE (acol)
1416 
1417  CALL dgbtrf( n1, n1, kl, ku, ab, ldab, ipiv, info )
1418  IF (info.EQ.0) &
1419  CALL dgbcon( '1', n1, kl, ku, ab, ldab, ipiv, anorm, rcond, &
1420  work, iwork, info )
1421 
1422  DEALLOCATE(ab, work, iwork, ipiv)
1423 
1424  IF (info.EQ.0 .AND. rcond.NE.zero) THEN
1425  rcond = one/rcond
1426  ELSE
1427  rcond = -one
1428  END IF
1429 
1430  END SUBROUTINE checkconditionnumber_serial
1431 
1432 !#define SVD_ON
1433 !#undef SVD_ON
1434 
1435  SUBROUTINE blk3d_factor(a, bm1, bp1, ipiv, mblk, nblocks)
1436  USE stel_constants, ONLY: one, zero
1437 !-----------------------------------------------
1438 ! D u m m y A r g u m e n t s
1439 !-----------------------------------------------
1440  INTEGER, INTENT(in) :: nblocks, mblk
1441  INTEGER, TARGET, INTENT(OUT) :: ipiv(mblk,nblocks)
1442  REAL(dp), TARGET, DIMENSION(mblk,mblk,nblocks), &
1443  INTENT(INOUT) :: a, bm1, bp1
1444 !-----------------------------------------------
1445 ! L o c a l V a r i a b l e s
1446 !-----------------------------------------------
1447 ! INTEGER :: ibuph, incnow, irecl, incbu, iunit=102, ndisk
1448  INTEGER :: k, k1, ier
1449  INTEGER, POINTER :: ipivot(:)
1450  REAL(dp), POINTER :: amat(:,:), bmat(:,:), cmat(:,:)
1451  REAL(dp), ALLOCATABLE, DIMENSION(:,:) :: temp
1452 #if defined(SVD_ON)
1453  REAL(dp), ALLOCATABLE, DIMENSION(:) :: w, work
1454  REAL(dp), ALLOCATABLE, DIMENSION(:,:) :: vt, u
1455  REAL(dp), PARAMETER :: small = 1.e-300_dp
1456  INTEGER :: nw
1457  INTEGER :: lwork
1458  CHARACTER(LEN=1), PARAMETER :: jobu='A', jobvt='A'
1459 #endif
1460 !-----------------------------------------------
1461 ! modified (June, 2003, ORNL): S. P. Hirshman
1462 !-----------------------------------------------------------------------
1463 !
1464 ! this subroutine solves for the Q factors of a block-tridiagonal system of equations.
1465 !
1466 !-----------------------------------------------------------------------
1467 ! INPUT
1468 ! mblk : block size
1469 ! nblocks : number of blocks
1470 ! a : diagonal blocks
1471 ! bp1, bm1 : lower, upper blocks (see equation below)
1472 !
1473 ! OUTPUT
1474 ! ipiv : pivot elements for kth block
1475 ! a : a-1 LU factor blocks
1476 ! bm1 : q = a-1 * bm1 matrix
1477 !
1478 ! LOCAL VARIABLES
1479 ! iunit : unit number for block-tridiagonal solution disk file.
1480 !
1481 ! solutions are indexed in m-n fourier-space, legendre-space. the tri-diagonal
1482 ! equation is:
1483 !
1484 ! bm1 * f(l-1) + a * f(l) + bp1 * f(l+1) = source(l)
1485 !
1486 ! GENERAL SOLUTION SCHEME APPLIED TO EACH BLOCK ROW (INDEX L)
1487 !
1488 ! 1. Start from row N and solve for x(N) in terms of x(N-1):
1489 !
1490 ! x(N) = -q(N)*x(N-1) + r(N)
1491 !
1492 ! q(N) = a(N)[-1] * bm1; r(N) = a(N)[-1] * s(N)
1493 !
1494 ! where a(N)[-1] is the inverse of a(N)
1495 !
1496 ! 2. Substitute for lth row to get recursion equation for q(l) and r(l):
1497 !
1498 ! x(l) = -q(l)*x(l-1) + r(l), in general, where:
1499 !
1500 ! q(l) = (a(l) - bp1(l)*q(l+1))[-1] * bm1(l)
1501 !
1502 ! qblk(l) == (a(l) - bp1(l) * q(l+1))[-1] on return
1503 !
1504 ! r(l) = (a(l) - bp1(l)*q(l+1))[-1] * (s(l) - bp1(l)*r(l+1))
1505 !
1506 ! 3. At row l = 1, bm1(1) = 0 and get an equation for x(1) corresponding to q(1) = 0:
1507 !
1508 ! x(1) = r(1)
1509 !
1510 ! 4. Finally, can back-solve for x(N) in terms of x(N-1) from eqn.(1) above
1511 !
1512 !
1513 ! NUMERICAL IMPLEMENTATION (USING LAPACK ROUTINES)
1514 !
1515 ! IF USING SVD:
1516 !
1517 ! 1. CALL dgesvd: Perform SVD decomposition
1518 ! 2. CALL svbksb: Solve Ax = bi, for i=1,mblk
1519 !
1520 ! OTHERWISE
1521 !
1522 ! 1. CALL dgetrf: Perform LU factorization of diagonal block (A) - faster than sgefa
1523 ! 2. CALL dgetrs: With multiple (mblk) right-hand sides, to do block inversion
1524 ! operation, Ax = B (stores result in B; here B is a matrix)
1525 !
1526 ! ndisk = mblk
1527 
1528 ! main loop. load and process (backwards) block-rows nblocks to 1.
1529 
1530 #if defined(SVD_ON)
1531  lwork = 10*mblk
1532  ALLOCATE(vt(mblk,mblk), w(mblk), &
1533  u(mblk,mblk), work(lwork), stat=ier)
1534  CALL assert(ier.EQ.0,'Allocation error in blk3d_factor!')
1535 #endif
1536  ipiv = 0
1537  ALLOCATE (temp(mblk,mblk), stat=ier)
1538  CALL assert(ier.EQ.0, 'Allocation error in blk3d_factor!')
1539 
1540  blocks: DO k = nblocks, 1, -1
1541 !
1542 ! Compute (and save) qblk(nblocks) = ablk(nblocks)[-1] * bml
1543 !
1544  amat => a(:,:,k)
1545 
1546 #if defined(SVD_ON)
1547 !NOTE: v = vt coming out here
1548  CALL dgesvd (jobu, jobvt, mblk, mblk, amat, mblk, w, u, mblk, &
1549  vt, mblk, work, lwork, ier)
1550 !Set SVD weights w to 0 for weights below the allowed threshold, so backsolver
1551 !will compute pseudo-inverse
1552  WRITE (35, '(a,i4,a,1pe12.4)') 'Block: ',k, ' Condition #: ', sqrt(w(1)/w(mblk))
1553  print '(a,i4,a,1pe12.4)',' BLOCK: ',k,' SVD COND #: ', sqrt(w(1)/w(mblk))
1554  DO nw = 2, mblk
1555  IF (w(nw) .LE. small*w(1)) w(nw) = 0
1556  END DO
1557  CALL assert(ier.EQ.0,'SVD ERROR')
1558 !
1559 ! STORE svd pseudo-inverse IN AMAT since u,vt,w will be deallocated at end
1560  CALL svdinv2 (amat, u, vt, w, mblk)
1561 #else
1562  ipivot => ipiv(:,k)
1563  CALL dgetrf (mblk, mblk, amat, mblk, ipivot, ier)
1564  CALL assert(ier.EQ.0,'DGETRF ERROR')
1565 #endif
1566  IF (k .eq. 1) EXIT
1567 
1568  bmat => bm1(:,:,k)
1569 
1570 #if defined(SVD_ON)
1571 !
1572 ! Use (pseudo) inverse stored in AMAT = V*1/w*Ut
1573 !
1574  temp = bmat
1575  bmat = matmul(amat, temp)
1576 #else
1577  CALL dgetrs('n', mblk, mblk, amat, mblk, ipivot, &
1578  bmat, mblk, ier)
1579  CALL assert(ier.EQ.0,'dgetrs INFO != 0')
1580 #endif
1581 ! CALL wrdisk(iunit, ql, ndisk, incnow, ibuph, incbu, ier)
1582 ! IF (ier .NE. 0) GOTO 302
1583 
1584 !
1585 ! Update effective diagonal "a" matrix. Use dgemm: faster AND doesn't overflow normal stack
1586 !
1587  k1 = k-1
1588  amat => bp1(:,:,k1)
1589  cmat => a(:,:,k1)
1590 ! cmat = cmat - MATMUL(amat, bmat)
1591  CALL dgemm('N','N',mblk,mblk,mblk,-one,amat,mblk, &
1592  bmat, mblk, one, cmat, mblk)
1593 
1594  END DO blocks
1595 
1596 #if defined(SVD_ON)
1597  DEALLOCATE(vt, w, u, work)
1598 #endif
1599 !
1600 ! COMPUTE TRANSPOSES HERE, SINCE REPEATEDLY CALLING MATMUL OPERATION
1601 ! X*At IS FASTER THAN A*X DUE TO UNIT STRIDE
1602 !
1603 
1604  DO k = 1, nblocks
1605  IF (k .NE. nblocks) THEN
1606  temp = transpose(bp1(:,:,k))
1607  bp1(:,:,k) = temp
1608  END IF
1609  IF (k .NE. 1) THEN
1610  temp = transpose(bm1(:,:,k))
1611  bm1(:,:,k) = temp
1612  END IF
1613  END DO
1614 
1615  DEALLOCATE (temp)
1616 
1617  END SUBROUTINE blk3d_factor
1618 
1619 
1620  SUBROUTINE block_precond(gc)
1621  USE timer_mod
1622 !-----------------------------------------------
1623 ! D u m m y A r g u m e n t s
1624 !-----------------------------------------------
1625  REAL(dp), DIMENSION(mblk_size,ns), INTENT(INOUT) :: gc
1626 !-----------------------------------------------
1627 ! L o c a l V a r i a b l e s
1628 !-----------------------------------------------
1629  REAL(dp), PARAMETER :: zero = 0
1630  INTEGER :: m, n, js, ntype, istat, irow
1631  REAL(dp) :: t1, error, error_sum, b_sum
1632  REAL(dp), ALLOCATABLE :: gc_s(:,:)
1633 #if defined(MPI_OPT)
1634  REAL(dp) :: alpha0,beta0
1635  INTEGER :: i,j,k,nn,kk,ia,ja,ix,jx,ic,jc,ib,jb
1636 #endif
1637  REAL(dp) :: starttime, endtime, usedtime
1638  INTEGER :: PRECONDPASS=0
1639  INTEGER :: globrow
1640 
1641 !-----------------------------------------------
1642  starttime = 0
1643  endtime = 0
1644 
1645 !-----------------------------------------------
1646 !
1647 ! Applies 3D block-preconditioner to forces vector (gc)
1648 !
1649  IF (parsolver) THEN
1650  DO globrow = mystartblock, myendblock
1651  CALL setmatrixrhs(globrow, gc(1:mblk_size,globrow))
1652  END DO
1653 
1654  CALL backwardsolve
1655 
1656  DO globrow = mystartblock, myendblock
1657  CALL getsolutionvector(globrow, gc(:,globrow))
1658  END DO
1659 
1660  CALL gather_array(gc)
1661  ELSE
1662 
1663  CALL second0(starttime)
1664 
1665  IF (l_backslv) THEN
1666  istat = 0
1667  IF (.NOT.ALLOCATED(gc_s)) ALLOCATE (gc_s(mblk_size,ns), stat=istat)
1668  CALL assert(istat.EQ.0,'Allocation error0 in block_precond')
1669  gc_s = gc
1670  END IF
1671 
1672 ! Solve Hx = gc, using Hessian (H) LU factors stored in block_... matrices
1673 ! Store solution "x" in gc on output
1674  IF (lscalapack) THEN
1675 #if defined(MPI_OPT)
1676  ALLOCATE (tempp(ineedr), stat=istat)
1677  ia = 1
1678  ja = 1
1679  ib = 1
1680  jb = 1
1681  beta0 = 0.0d0
1682  alpha0 = 1.0d0
1683  mm0 = descr(3)
1684  nrhs0 = descr(4)
1685  call profstart('pdgeadd call:123')
1686  call pdgeadd('N', mm0, nrhs0, alpha0, gc, ia, ja, descr_all, &
1687  beta0, tempp, ib, jb, descr)
1688  call profend('pdgeadd call:123')
1689  ir = 1
1690  jr = 1
1691  call blacs_barrier(icontxt,'All')
1692  call profstart('solver call:123')
1693  call pdtrds(mblk_size,ns,dblkp,lblkp,ublkp,ipiv_blk,desca, &
1694  nrhs1,tempp,ir,jr,descr)
1695  call blacs_barrier(icontxt,'All')
1696  call profend('solver call:123')
1697  gc = 0
1698  ia = 1
1699  ja = 1
1700  ib = 1
1701  jb = 1
1702  beta0 = 0.0d0
1703  alpha0 = 1.0d0
1704  mm0 = descr(3)
1705  nrhs0 = descr(4)
1706 ! descR_all(7)=-1
1707 ! descR_all(8)=-1
1708  call profstart('pdgeadd2 call:123')
1709  call pdgeadd('N', mm0,nrhs0, alpha0, tempp,ia,ja,descr, &
1710  beta0,gc,ib,jb,descr_all)
1711  call dgsum2d( icontxt,'A', ' ', mm,1,gc,mm,-1,-1)
1712  call profend('pdgeadd2 call:123')
1713 
1714  DEALLOCATE (tempp, stat=istat)
1715 #else
1716  CALL assert(.false.,'MPI_OPT must be true for LSCALAPACK!')
1717 #endif
1718  ELSE !NOT LSCALAPACK
1719 
1720  CALL second0(endtime)
1721  usedtime=endtime-starttime
1722  IF (sksdbg) WRITE(tofu,*) &
1723  'HessianTag-2 : Time to BackwardSolve:',usedtime, &
1724  'PRECONDPASS',precondpass,'in native run'
1725  IF (sksdbg) CALL flush(tofu)
1726 
1727 ! Serial solver
1728  CALL blk3d_slv(dblk, lblk, ublk, gc, ipiv_blk, mblk_size, ns)
1729  END IF !END LSCALAPACK
1730  END IF !END OF IF(PARSOLVER) CONDITIONAL
1731  precondpass = precondpass + 1
1732 
1733  IF (l_backslv .AND. ALLOCATED(dblk_s) .AND. .NOT.l_linearize) THEN
1734  error_sum = 0; b_sum = sqrt(sum(gc_s*gc_s)/neqs)
1735  CALL assert(ALLOCATED(gc_s),'gc_s is not allocated')
1736 
1737  WRITE (34, '(2(/,a))') ' BLK3D FACTORIZATION CHECK: Ax = b', &
1738  ' IROW M N NTYPE FORCE DELTA-FORCE'
1739 
1740  DO js = startglobrow, endglobrow
1741  irow = 0
1742  WRITE (34,*) ' JS: ', js
1743  DO ntype = 1, ndims
1744  DO n = -ntor, ntor
1745  DO m = 0, mpol
1746  irow = irow+1
1747  t1 = sum(dblk_s(irow,:,js)*gc(:,js))
1748  IF (js .LT. ns) THEN
1749  t1 = t1 + sum(ublk_s(irow,:,js)*gc(:,js+1))
1750  END IF
1751  IF (js .GT. 1) THEN
1752  t1 = t1 + sum(lblk_s(irow,:,js)*gc(:,js-1))
1753  END IF
1754 
1755  error = t1 - gc_s(irow,js)
1756  error_sum = error_sum + error**2
1757  WRITE(34,110) irow, m, n, ntype, gc_s(irow,js)/b_sum, &
1758  error/b_sum
1759  END DO
1760  END DO
1761  END DO
1762  END DO
1763 
1764  DEALLOCATE(dblk_s, lblk_s, ublk_s, gc_s, stat=istat)
1765  IF (lverbose) THEN
1766  print '(3(a,1pe12.4))',' |b|: ', b_sum, &
1767  ' |x| = ', sqrt(sum(gc*gc)/neqs), &
1768  ' Hessian Error: ', sqrt(error_sum/neqs)/b_sum
1769  END IF
1770 
1771  END IF
1772  110 FORMAT(4i6,1p,2e16.4)
1773 
1774  100 FORMAT(i6,1p,5e14.4)
1775  101 FORMAT(i6,1p,5e14.4,' *')
1776 
1777  END SUBROUTINE block_precond
1778 
1779 
1780  SUBROUTINE apply_precond(gc)
1781 !-----------------------------------------------
1782 ! D u m m y A r g u m e n t s
1783 !-----------------------------------------------
1784  REAL(dp), DIMENSION(mblk_size,ns), INTENT(INOUT) :: gc
1785 !-----------------------------------------------
1786 ! L o c a l V a r i a b l e s
1787 !-----------------------------------------------
1788  REAL(dp) :: ton, toff
1789 !-----------------------------------------------
1790  CALL second0(ton)
1791 ! IF (ltype) THEN
1792 ! CALL diag_precond(gc)
1793 ! ELSE
1794  CALL block_precond(gc)
1795 ! END IF
1796  CALL second0(toff)
1797  time_apply_precon = time_apply_precon+(toff-ton)
1798 
1799  END SUBROUTINE apply_precond
1800 
1801 
1802  SUBROUTINE blk3d_slv(ablk, qblk, bp1, source, ipiv, mblk, nblocks)
1803  USE stel_kinds
1804  USE stel_constants, ONLY: one
1805 !-----------------------------------------------
1806 ! D u m m y A r g u m e n t s
1807 !-----------------------------------------------
1808  INTEGER, INTENT(in) :: nblocks, mblk
1809  INTEGER, TARGET, INTENT(in) :: ipiv(mblk,nblocks)
1810  REAL(dp), TARGET, DIMENSION(mblk,mblk,nblocks), INTENT(IN) :: &
1811  ablk, qblk, bp1
1812  REAL(dp), DIMENSION(mblk,nblocks), TARGET, INTENT(INOUT) :: source
1813 !-----------------------------------------------
1814 ! L o c a l V a r i a b l e s
1815 !-----------------------------------------------
1816  INTEGER, POINTER :: ipivot(:)
1817 ! INTEGER :: ibuph, incnow, irecl, incbu, iunit=102, ndisk
1818  INTEGER :: k, ier
1819  REAL(dp), POINTER :: amat(:,:), x1(:), source_ptr(:)
1820 !-----------------------------------------------
1821 ! modified (June, 2003, ORNL): S. P. Hirshman
1822 !-----------------------------------------------------------------------
1823 !
1824 ! this subroutine solves a block-tridiagonal system of equations, using
1825 ! the ABLK, QBLK factors from blk3d_factor,
1826 !
1827 !-----------------------------------------------------------------------
1828 ! INPUT
1829 ! mblk : block size
1830 ! nblocks : number of blocks
1831 ! bp1 : upper blocks (see equation below)
1832 ! ipiv : pivot elements for kth block
1833 ! ablk : a-1 blocks
1834 ! qblk : q = a-1 * bm1
1835 ! source : input right side
1836 !
1837 ! OUTPUT
1838 ! source : Solution x of A x = source
1839 !
1840 ! LOCAL VARIABLES
1841 ! iunit : unit number for block-tridiagonal solution disk file.
1842 !
1843 ! the tri-diagonal equation is:
1844 !
1845 ! bm1 * f(l-1) + a * f(l) + bp1 * f(l+1) = source(l)
1846 !
1847 ! GENERAL SOLUTION SCHEME APPLIED TO EACH BLOCK ROW (INDEX L)
1848 !
1849 ! 1. Start from row N and solve for x(N) in terms of x(N-1):
1850 !
1851 ! x(N) = -q(N)*x(N-1) + r(N)
1852 !
1853 ! q(N) = a(N)[-1] * bm1; r(N) = a(N)[-1] * s(N)
1854 !
1855 ! where a(N)[-1] is the inverse of a(N)
1856 !
1857 ! 2. Substitute for lth row to get recursion equation fo q(l) and r(l):
1858 !
1859 ! x(l) = -q(l)*x(l-1) + r(l), in general, where:
1860 !
1861 ! q(l) = (a(l) - bp1(l)*q(l+1))[-1] * bm1(l)
1862 !
1863 ! qblk(l) == (a(l) - bp1(l) * q(l+1))[-1] on return
1864 !
1865 ! r(l) = (a(l) - bp1(l)*q(l+1))[-1] * (s(l) - bp1(l)*r(l+1))
1866 !
1867 ! 3. At row l = 1, bm1(1) = 0 and get an equation for x(1) corresponding to q(1) = 0:
1868 !
1869 ! x(1) = r(1)
1870 !
1871 ! 4. Finally, can back-solve for x(N) in terms of x(N-1) from eqn.(1) above
1872 !
1873 !
1874 ! NUMERICAL IMPLEMENTATION (USING LAPACK ROUTINES)
1875 !
1876 ! 1. CALL dgetrs: With single right hand side (source) to solve A x = b (b a vector)
1877 ! Faster than dgesl
1878 ! ndisk = mblk
1879 
1880 ! main loop. load and process (backwards) block-rows nblocks to 1.
1881 ! note: about equal time is spent in calling dgetrs and in performing
1882 ! the two loop sums: on ibm-pc, 2 s (trs) vs 3 s (sums); on linux (logjam),
1883 ! 2.4 s (trs) vs 3 s (sums).
1884 
1885 !
1886 ! Back-solve for modified sources first
1887 !
1888  blocks: DO k = nblocks, 1, -1
1889 
1890  source_ptr => source(:,k)
1891  amat => ablk(:,:,k)
1892 #if defined(SVD_ON)
1893  source_ptr = matmul(amat, source_ptr)
1894 #else
1895  ipivot => ipiv(:,k);
1896  CALL dgetrs('n', mblk, 1, amat, mblk, ipivot, source_ptr, mblk, ier)
1897  CALL assert(ier.EQ.0,'DGETRS INFO != 0')
1898 #endif
1899  IF (k .eq. 1) EXIT
1900 !
1901 ! NOTE: IN BLK3D_FACTOR, BP1 AND BM1 WERE TRANSPOSED (AND STORED)
1902 ! TO MAKE FIRST INDEX FASTEST VARYING IN THE FOLLOWING MATMUL OPS
1903 !
1904  amat => bp1(:,:,k-1)
1905  x1 => source(:,k); source_ptr => source(:,k-1)
1906  CALL dgemv('T',mblk,mblk,-one,amat,mblk,x1,1,one,source_ptr,1)
1907 
1908  END DO blocks
1909 !
1910 ! forward (back-substitution) solution sweep for block-rows k = 2 to nblocks
1911 ! now, source contains the solution vector
1912 !
1913  DO k = 2, nblocks
1914 ! CALL rddisk (iunit, ql, ndisk, incnow, ibuph, ier)
1915 ! IF (ier .NE. 0) GOTO 303
1916 ! ibuph = ibuph - incbu
1917 
1918  amat => qblk(:,:,k)
1919  x1 => source(:,k-1); source_ptr => source(:,k)
1920 ! source_ptr = source_ptr - MATMUL(x1,amat) !USE THIS FORM IF TRANSPOSED qblk
1921 ! source_ptr = source_ptr - MATMUL(amat,x1) !UNTRANSPOSED FORM
1922  CALL dgemv('T',mblk,mblk,-one,amat,mblk,x1,1,one,source_ptr,1)
1923 
1924  END DO
1925 
1926  END SUBROUTINE blk3d_slv
1927 
1928 
1929  SUBROUTINE dealloc_hessian
1930  INTEGER :: istat
1931 
1932  IF (ALLOCATED(ublk)) DEALLOCATE(ublk, dblk, lblk, stat=istat)
1933  IF (ALLOCATED(ublkp))DEALLOCATE(dblkp,lblkp,ublkp,stat=istat)
1934  IF (ALLOCATED(mupar_norm)) DEALLOCATE(mupar_norm, stat=istat)
1935  IF (ALLOCATED(ipiv_blk)) DEALLOCATE (ipiv_blk)
1936 
1937  END SUBROUTINE dealloc_hessian
1938 
1939 #if defined(MPI_OPT)
1940  SUBROUTINE blk3d_parallel(a,b,c,ap,bp,cp,mblk,mblkc,nblocks)
1941 !-----------------------------------------------
1942 ! L o c a l P a r a m e t e r s
1943 !-----------------------------------------------
1944  INTEGER :: ipart,jpart,k
1945  INTEGER :: ia,ja,inc1,ib,jb,inc2,nsm
1946 !-----------------------------------------------
1947 ! D u m m y A r g u m e n t s
1948 !-----------------------------------------------
1949  INTEGER, INTENT(IN) :: nblocks, mblk, mblkc
1950  REAL(dp) :: aij2,aij
1951  REAL(dp), TARGET, DIMENSION(mblk,mblkc,nblocks), &
1952  INTENT(INOUT) :: a, b, c
1953  REAL(dp), TARGET, DIMENSION(Locq,Locp,nblocks), &
1954  INTENT(INOUT) :: ap
1955  REAL(dp), TARGET, DIMENSION(Locq,Locp,nblocks-1), &
1956  INTENT(INOUT) :: bp, cp
1957  REAL(dp), POINTER :: amat(:,:), bmat(:,:), cmat(:,:)
1958  REAL(dp), POINTER :: amatp(:,:), bmatp(:,:), cmatp(:,:)
1959 
1960  do k=1,nblocks
1961  call profstart('change context:123')
1962  call blacs_barrier(desca(ctxt_),'All')
1963  amat => a(:,:,k)
1964  amatp => ap(:,:,k)
1965  call pdgemr2d(mblk,mblk,amat,1,1,desca_1xp, &
1966  amatp,1,1,desca, icontxt_global)
1967  if(k<nblocks)then
1968  bmat => b(:,:,k+1)
1969  bmatp => bp(:,:,k)
1970  call pdgemr2d(mblk,mblk,bmat,1,1,desca_1xp, &
1971  bmatp,1,1,desca, icontxt_global)
1972  cmat => c(:,:,k)
1973  cmatp => cp(:,:,k)
1974  call pdgemr2d(mblk,mblk,cmat,1,1,desca_1xp, &
1975  cmatp,1,1,desca, icontxt_global)
1976  endif
1977  call blacs_barrier(desca(ctxt_),'All')
1978  call profend('change context:123')
1979 ! do ja=1,mblk
1980 ! call pdelget( 'A',' ',aij, amatp,ja,ja,descA )
1981 ! call pdelget( 'A',' ',aij2, amat,ja,ja,descA_1xp )
1982 ! write(*,*) 'ja,ja,aij ',ja,ja,aij,aij2
1983 ! enddo
1984  end do
1985  END SUBROUTINE blk3d_parallel
1986 #endif
1987 
1988  END MODULE hessian
shared_data::nprecon
integer nprecon
Preconditioner flag.
Definition: shared_data.f90:68
shared_data::lverbose
logical lverbose
Use verbose screen output.
Definition: shared_data.f90:234
shared_data::l_getfsq
logical l_getfsq
Compute |F|^2.
Definition: shared_data.f90:216
shared_data::xc
real(dp), dimension(:), allocatable xc
1D array of Fourier mode displacement components.
Definition: shared_data.f90:97
hessian::gather_array
Definition: hessian.f90:31
shared_data::l_applyprecon
logical l_applyprecon
Apply preconditioner.
Definition: shared_data.f90:218
v3_utilities::assert_eq
Definition: v3_utilities.f:62
v3_utilities::assert
Definition: v3_utilities.f:55
shared_data::lequi1
logical lequi1
Equilibrate matrix with col 1-norm.
Definition: shared_data.f90:236
shared_data::col_scale
real(dp), dimension(:,:,:,:), allocatable col_scale
Column scaling factors.
Definition: shared_data.f90:110
shared_data::niter
integer niter
Total number of iteration to run.
Definition: shared_data.f90:60
mpi_inc
Umbrella module avoid multiple inlcudes of the mpif.h header.
Definition: mpi_inc.f:11
shared_data::lcolscale
logical lcolscale
Apply column scaling to hessian.
Definition: shared_data.f90:228
island_params
This file contains fix parameters related to the computational grids.
Definition: island_params.f90:10
shared_data::l_linearize
logical l_linearize
Use linearized forces.
Definition: shared_data.f90:210
shared_data::unit_out
integer, parameter unit_out
File output io unit.
Definition: shared_data.f90:39
blocktridiagonalsolver_s
Module contains subroutines for solver for block tri-diagonal matrices.
Definition: blocktridiagonalsolver_s.f90:7
shared_data::gc
real(dp), dimension(:), allocatable, target gc
1D Array of Fourier mode MHD force components
Definition: shared_data.f90:99
shared_data::ndims
integer ndims
Number of independent variables.
Definition: shared_data.f90:58
shared_data::mblk_size
integer mblk_size
Block size. (mpol + 1)*(2*ntor + 1)*ndims.
Definition: shared_data.f90:62
shared_data
This file contains variables and parameters used by many modules in SIESTA.
Definition: shared_data.f90:10
shared_data::neqs
integer neqs
Number of elements in the xc array.
Definition: shared_data.f90:54