V3FIT
lmdif.f90
1  SUBROUTINE lmdif(fcn, m, n, x, fvec, ftol, xtol, gtol, maxfev, &
2  epsfcn, diag, mode, factor, nprint, info, nfev, fjac, &
3  ldfjac, ipvt, qtf, wa1, wa2, wa3, wa4)
4  USE stel_kinds
5  USE lmpar_mod, fjac_mod=>fjac, ldfjac_mod=>ldfjac, &
6  ipvt_mod=>ipvt, qtf_mod=>qtf, diag_mod=>diag
7 #if defined(MPI_OPT)
8  USE fdjac_mod, ONLY: flip,flag_singletask,flag_cleanup,fdjac2_mp
9 #else
10  USE fdjac_mod, ONLY: max_processors, flip, flag_singletask, &
11  flag_cleanup, fdjac2
12 #endif
13  USE mpi_params
14  USE mpi_inc
15  IMPLICIT NONE
16 !-----------------------------------------------
17 ! D u m m y A r g u m e n t s
18 !-----------------------------------------------
19  INTEGER :: m, n, maxfev, mode, nprint, info, nfev, ldfjac
20  REAL(rprec), INTENT(in) :: ftol, xtol, gtol, epsfcn, factor
21  REAL(rprec), DIMENSION(n) :: x, wa1, wa2, wa3
22  REAL(rprec), DIMENSION(m) :: fvec, wa4
23  INTEGER, DIMENSION(n), TARGET :: ipvt
24  REAL(rprec), DIMENSION(n), TARGET :: diag, qtf
25  REAL(rprec), DIMENSION(ldfjac,n), TARGET :: fjac
26 !-----------------------------------------------
27 ! L o c a l P a r a m e t e r s
28 !-----------------------------------------------
29  REAL(rprec), PARAMETER :: zero = 0, one = 1, &
30  p1=0.1_dp, p5=0.5_dp, p25=0.25_dp, p75=0.75_dp, p0001=1.e-4_dp
31  CHARACTER(LEN=130), DIMENSION(0:11) :: info_array
32 !-----------------------------------------------
33 ! L o c a l V a r i a b l e s
34 !-----------------------------------------------
35  INTEGER :: iflag, iter, j, l, istat, ikey
36  INTEGER :: cycle_count, subcycle
37  REAL(rprec) :: actred, dirder, epsmch, fnorm, &
38  gnorm, prered, ratio, sum0, temp, &
39  temp1, temp2, xnorm
40  REAL(rprec), DIMENSION(:,:), ALLOCATABLE :: fjac_save
41 #if !defined(MPI_OPT)
42  REAL(rprec) :: wall_time, wall_time_lev
43 #endif
44  REAL(rprec) :: fnorm_min
45  REAL(rprec), ALLOCATABLE, DIMENSION(:) :: x_min, fvec_min
46 !-----------------------------------------------
47 ! E x t e r n a l F u n c t i o n s
48 !-----------------------------------------------
49  EXTERNAL fcn
50  REAL(rprec), EXTERNAL :: dpmpar, enorm
51 !-----------------------------------------------
52 !
53 ! SUBROUTINE lmdif
54 !
55 ! the purpose of lmdif is to minimize the sum of the squares of
56 ! m nonlinear functions in n variables by a modification of
57 ! the levenberg-marquardt algorithm. the user must provide a
58 ! subroutine which calculates the functions. the jacobian is
59 ! then calculated by a forward-difference approximation.
60 !
61 ! the subroutine statement is
62 !
63 ! subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn,
64 ! diag,mode,factor,nprint,info,nfev,fjac,
65 ! ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4)
66 !
67 ! where
68 !
69 ! fcn is the name of the user-supplied subroutine which
70 ! calculates the functions. fcn must be declared
71 ! in an external statement in the user calling. see lmdif1 for
72 ! documentation and should be written as follows.
73 !
74 ! subroutine fcn(m, n, x, fvec, iflag, ncnt)
75 ! integer m,n,iflag
76 ! real(rprec) x(n),fvec(m)
77 ! ----------
78 ! calculate the functions at x and
79 ! return this vector in fvec.
80 ! ----------
81 ! return
82 ! end
83 !
84 !
85 ! ftol is a nonnegative input variable. termination
86 ! occurs when both the actual and predicted relative
87 ! reductions in the sum of squares are at most ftol.
88 ! therefore, ftol measures the relative error desired
89 ! in the sum of squares.
90 !
91 ! xtol is a nonnegative input variable. termination
92 ! occurs when the relative error between two consecutive
93 ! iterates is at most xtol. therefore, xtol measures the
94 ! relative error desired in the approximate solution.
95 !
96 ! gtol is a nonnegative input variable. termination
97 ! occurs when the cosine of the angle between fvec and
98 ! any column of the jacobian is at most gtol in absolute
99 ! value. therefore, gtol measures the orthogonality
100 ! desired between the function vector and the columns
101 ! of the jacobian.
102 !
103 ! maxfev is a positive integer input variable. termination
104 ! occurs when the number of calls to fcn is at least
105 ! maxfev by the end of an iteration.
106 !
107 ! epsfcn is an input variable used in determining a suitable
108 ! step length for the forward-difference approximation. this
109 ! approximation assumes that the relative errors in the
110 ! functions are of the order of epsfcn. if epsfcn is less
111 ! than the machine precision, it is assumed that the relative
112 ! errors in the functions are of the order of the machine
113 ! precision.
114 !
115 ! diag is an array of length n. if mode = 1 (see
116 ! below), diag is internally set. if mode = 2, diag
117 ! must contain positive entries that serve as
118 ! multiplicative scale factors for the variables.
119 !
120 ! mode is an integer input variable. if mode = 1, the
121 ! variables will be scaled internally. if mode = 2,
122 ! the scaling is specified by the input diag. other
123 ! values of mode are equivalent to mode = 1.
124 !
125 ! factor is a positive input variable used in determining the
126 ! initial step bound. this bound is set to the product of
127 ! factor and the euclidean norm of diag*x if nonzero, or else
128 ! to factor itself. in most cases factor should lie in the
129 ! interval (.1,100.). 100. is a generally recommended value.
130 !
131 ! nprint is an integer input variable that enables controlled
132 ! printing of iterates if it is positive. in this case,
133 ! fcn is called with iflag = 0 at the beginning of the first
134 ! iteration and every nprint iterations thereafter and
135 ! immediately prior to return, with x and fvec available
136 ! for printing. if nprint is not positive, no special calls
137 ! of fcn with iflag = 0 are made.
138 !
139 ! info is an integer output variable. if the user has
140 ! terminated execution, info is set to the (negative)
141 ! value of iflag. see description of fcn. otherwise,
142 ! info is set
143 ! nfev is an integer output variable set to the number of
144 ! calls to fcn.
145 !
146 ! fjac is an output m by n array. the upper n by n submatrix
147 ! of fjac contains an upper triangular matrix r with
148 ! diagonal elements of nonincreasing magnitude such that
149 !
150 ! t t t
151 ! p *(jac *jac)*p = r *r,
152 !
153 ! where p is a permutation matrix and jac is the final
154 ! calculated jacobian. column j of p is column ipvt(j)
155 ! (see below) of the identity matrix. the lower trapezoidal
156 ! part of fjac contains information generated during
157 ! the computation of r.
158 !
159 ! ldfjac is a positive integer input variable not less than m
160 ! which specifies the leading dimension of the array fjac.
161 !
162 ! ipvt is an integer output array of length n. ipvt
163 ! defines a permutation matrix p such that jac*p = q*r,
164 ! where jac is the final calculated jacobian, q is
165 ! orthogonal (not stored), and r is upper triangular
166 ! with diagonal elements of nonincreasing magnitude.
167 ! column j of p is column ipvt(j) of the identity matrix.
168 !
169 ! qtf is an output array of length n which contains
170 ! the first n elements of the vector (q transpose)*fvec.
171 !
172 ! wa1, wa2, and wa3 are work arrays of length n.
173 !
174 ! wa4 is a work array of length m.
175 !
176 ! subprograms called
177 !
178 ! user-supplied ...... fcn
179 !
180 ! minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac
181 !
182 ! fortran-supplied ... ABS,max,min,sqrt,mod
183 !
184 ! argonne national laboratory. MINpack project. march 1980.
185 ! burton s. garbow, kenneth e. hillstrom, jorge j. more
186 !
187 ! **********
188 
189 #if defined(MPI_OPT)
190 ! Get mpi parameters
191  CALL mpi_comm_rank (mpi_comm_world, myid, ierr_mpi) !mpi stuff
192  IF (ierr_mpi .ne. 0) stop 'MPI_COMM_RANK error in LMDIF'
193  CALL mpi_comm_size (mpi_comm_world, numprocs, ierr_mpi) !mpi stuff
194  IF (ierr_mpi .ne. 0) stop 'MPI_COMM_SIZE error in LMDIF'
195 #endif
196  info = 0; iflag = 0; nfev = 0; cycle_count = 0
197 
198  info_array(9) = &
199  "improper input parameters (number constraints MUST " // &
200  "be greater than number variables (>0)."
201 
202  info_array(1) = &
203  "algorithm estimates that the relative error in the " // &
204  "sum of squares is at most ftol."
205 
206  info_array(2) = &
207  "algorithm estimates that the relative error between" // &
208  " x and the solution is at most xtol."
209 
210  info_array(3) = &
211  "algorithm estimates that the relative error in the " // &
212  "sum of squares and between x and the solution is at" // &
213  " most ftol and xtol."
214 
215  info_array(4) = &
216  "the cosine of the angle between fvec and any column" // &
217  " of the jacobian is at most gtol in absolute value."
218 
219  info_array(5) = &
220  "number of calls to fcn has reached or exceeded maxfev."
221 
222  info_array(6) = &
223  "ftol is too small. no further reduction in the sum " // &
224  "of squares is possible."
225 
226  info_array(7) = &
227  "xtol is too small. no further improvement in the " // &
228  "approximate solution x is possible."
229 
230  info_array(8) = &
231  "gtol is too small. fvec is orthogonal to the columns" // &
232  " of the jacobian to machine precision."
233 
234  info_array(0) = &
235  "levenberg-marquardt optimizer terminated properly."
236 
237  info_array(10) = &
238  "Levenberg-Marquardt terminated prematurely due to " // &
239  "function evaluation error."
240 
241  info_array(11) = &
242  "Levenberg-Marquardt terminated prematurely: " // &
243  "must request more than one (1) processor"
244 
245 #if defined(MPI_OPT)
246  IF (numprocs > n) THEN
247  IF (myid .eq. master) THEN
248  WRITE (6, *)'Warning: more processors have been requested', &
249  ' than the maximum (nvar) required = ',n
250  END IF
251  ELSE IF (numprocs <= 1) THEN
252  info = 11
253  GOTO 400
254  END IF
255 #endif
256 !
257 ! check the input parameters for errors.
258 !
259 
260  ALLOCATE (x_min(n), fvec_min(m), flip(n), stat=istat)
261  IF (istat .NE. 0) stop 'Allocation error in lmdif'
262 
263 !
264 ! epsmch is the machine precision.
265 ! flip is control for direction flipping in fdjac2!
266 
267  epsmch = dpmpar(1)
268  flip = .false.
269 #if !defined(MPI_OPT)
270  myid = 0; wall_time = 0; wall_time_lev = 0
271 #endif
272 !
273 ! ASSIGN VALUES TO MODULE VARIABLES (FACILITATES PASSING TO SUBROUTINES)
274 !
275  ldfjac_mod = ldfjac
276  ipvt_mod => ipvt
277  fjac_mod => fjac
278  diag_mod => diag
279  qtf_mod => qtf
280 !
281 ! check the input parameters for errors.
282 !
283  IF (n.le.0 .or. m.lt.n .or. ldfjac.lt.m .or. ftol.lt.zero &
284  .or. xtol.lt. zero .or. gtol.lt.zero .or. maxfev.le.0 &
285  .or. factor.le.zero) THEN
286  info = 9
287  GOTO 400
288  END IF
289 
290  IF (mode .EQ. 2) THEN
291  DO j = 1, n
292  IF (diag(j) .le. zero) GOTO 300
293  END DO
294  END IF
295 
296 !
297 ! evaluate the function at the starting point
298 ! and calculate its norm.
299 !
300 ! Set up workers communicator (only master processor here) for initial run
301 !
302 #if defined(MPI_OPT)
303  IF (myid .ne. master) THEN
304  ikey = mpi_undefined
305  ELSE
306  ikey = worker_split_key+1
307  END IF
308  CALL mpi_comm_split(mpi_comm_world, ikey, worker_id, &
309  mpi_comm_workers, ierr_mpi)
310  IF (ierr_mpi .ne. 0) stop 'MPI_COMM_SPLIT error in lsfun1'
311 #endif
312  IF (myid .EQ. master) THEN
313  iflag = flag_singletask
314  CALL fcn (m, n, x, fvec, iflag, nfev)
315 #if defined(MPI_OPT)
316  CALL mpi_comm_free(mpi_comm_workers, ierr_mpi)
317 #endif
318  END IF
319 
320 
321 #if defined(MPI_OPT)
322  CALL mpi_bcast(iflag,1, mpi_integer, master, &
323  mpi_comm_world, ierr_mpi)
324  IF (ierr_mpi .NE. 0) GOTO 3000
325  IF (iflag .ge. 0) CALL &
326  mpi_bcast(fvec, m, mpi_real8, master, &
327  mpi_comm_world, ierr_mpi)
328  IF (ierr_mpi .NE. 0) GOTO 3000
329 #endif
330 
331  IF (iflag .LT. 0) GOTO 300
332  fnorm = enorm(m,fvec)
333  IF (nfev.GE.maxfev .OR. maxfev.EQ.1) info = 5
334  IF (info .NE. 0) GOTO 300
335 !
336 ! initialize levenberg-marquardt parameter (par) and iteration counter.
337 !
338  par = 0
339  iter = 1
340  lfirst_lm = .true.
341 #if defined(MPI_OPT)
342  IF (myid .EQ. master) WRITE (6, 1000) numprocs
343  1000 FORMAT (/,' Beginning Levenberg-Marquardt Iterations',/, &
344  ' Number of Processors: ',i4,//, &
345 ! 1 ' Number of Processors: ',i4,' (1 controller proc)',//,
346  70('='),/,2x,'Iteration',3x,'Processor',7x,'Chi-Sq',7x, &
347  'LM Parameter',6x,'Delta Tol'/,70('='))
348 #else
349  WRITE (6, 1000) max_processors
350  1000 FORMAT (/,' Beginning Levenberg-Marquardt Iterations',/, &
351  ' Number processors requested: ', i4,//, &
352  59('='),/,2x,'Iteration',8x,'Chi-Sq',7x, &
353  'LM Parameter',6x,'Delta Tol',/,59('='))
354 #endif
355 !
356 ! beginning of the outer loop.
357 !
358  outerloop: DO WHILE (nfev .lt. maxfev)
359 !
360 ! calculate the jacobian matrix.
361 !
362 #if defined(MPI_OPT)
363  CALL fdjac2_mp(fcn, m, n, x, fvec, fjac, ldfjac, &
364  iflag, nfev, epsfcn, fnorm_min, x_min, fvec_min)
365 #else
366  iflag = 2
367  CALL fdjac2(fcn, m, n, x, fvec, fjac, ldfjac, iflag, nfev, &
368  epsfcn, wa4, wall_time, fnorm_min, x_min, fvec_min)
369 #endif
370  nfev = nfev + n
371  IF (iflag .LT. 0) EXIT outerloop
372 
373 !
374 ! compute the qr factorization of the jacobian.
375 !
376  CALL qrfac(m, n, fjac, ldfjac, .true., ipvt, &
377  n, wa1, wa2, wa3)
378 
379 !
380 ! on the first iteration and if mode is 1, scale according
381 ! to the norms of the columns of the initial jacobian.
382 !
383  IF (iter .EQ. 1) THEN
384  IF (mode .ne. 2) THEN
385  diag = wa2
386  WHERE (wa2 .eq. zero) diag = one
387  END IF
388 
389 !
390 ! also on the first iteration, calculate the norm of the scaled x
391 ! and initialize the step bound delta.
392 !
393  wa3 = diag*x
394  xnorm = enorm(n,wa3)
395  delta = factor*xnorm
396  IF (delta .eq. zero) delta = factor
397  END IF
398 
399 !
400 ! form (q TRANSPOSE)*fvec and store the first n components in qtf.
401 !
402  wa4 = fvec
403  DO j = 1, n
404  IF (fjac(j,j) .ne. zero) THEN
405  sum0 = sum(fjac(j:m,j)*wa4(j:m))
406  temp = -sum0/fjac(j,j)
407  wa4(j:m) = wa4(j:m) + fjac(j:m,j)*temp
408  END IF
409  fjac(j,j) = wa1(j)
410  qtf(j) = wa4(j)
411  END DO
412 
413 !
414 ! compute the norm of the scaled gradient.
415 !
416  gnorm = zero
417  IF (fnorm .ne. zero) THEN
418  DO j = 1, n
419  l = ipvt(j)
420  IF (wa2(l) .ne. zero) THEN
421  sum0 = sum(fjac(1:j,j)*(qtf(1:j)/fnorm))
422  gnorm = max(gnorm,abs(sum0/wa2(l)))
423  END IF
424  END DO
425  END IF
426 
427 !
428 ! test for convergence of the gradient norm.
429 !
430  IF (gnorm .LE. gtol) info = 4
431  IF (info .NE. 0) EXIT outerloop
432 
433 !
434 ! rescale if necessary.
435 !
436  IF (mode .ne. 2) diag = max(diag,wa2)
437 
438 !
439 ! set up for inner loop (levmarqloop) to determine x update.
440 !
441  subcycle = 0
442  ratio = 0
443 
444  ALLOCATE (fjac_save(n,n), stat=istat)
445  IF (istat .NE. 0) stop 'Fjac_save allocation error'
446 
447  fjac_save(:n,:n) = fjac(:n,:n)
448 
449  levmarqloop: DO WHILE (ratio.lt.p0001 .and. subcycle.lt.2)
450 
451  subcycle = subcycle + 1
452  fjac(:n,:n) = fjac_save(:n,:n)
453  spread_ratio = epsfcn/delta/10
454 
455 !
456 ! Determine the levenberg-marquardt parameter.
457 !
458 #if defined(MPI_OPT)
459 ! for parallel processing, scan a range of values for this
460 ! parameter to find the 'optimal' choice
461 !
462  CALL levmarq_param_mp (x, wa1, wa2, wa3, wa4, &
463  nfev, m, n, iflag, fcn)
464 #else
465  CALL levmarq_param(x, wa1, wa2, wa3, wa4, &
466  wall_time_lev, nfev, m, n, iflag, fcn)
467 #endif
468  IF (iflag .LT. 0) EXIT
469 
470 !
471 ! on the first iteration, adjust the initial step bound.
472 !
473  IF (iter .EQ. 1) delta = min(delta, pnorm)
474 
475 !
476 ! compute the scaled actual reduction.
477 !
478  actred = -one
479  IF (p1*fnorm1 .LT. fnorm) actred = one - (fnorm1/fnorm)**2
480 
481 !
482 ! compute the scaled predicted reduction (prered) and
483 ! the scaled directional derivative (dirder).
484 !
485  DO j = 1, n
486  wa3(j) = zero
487  l = ipvt(j)
488  temp = wa1(l)
489  wa3(1:j) = wa3(1:j) + fjac(1:j,j)*temp
490  END DO
491 
492  temp1 = enorm(n,wa3)/fnorm
493  temp2 = (sqrt(par)*pnorm)/fnorm
494  prered = temp1**2 + temp2**2/p5
495  dirder = -(temp1**2 + temp2**2)
496 
497 !
498 ! compute the ratio of the actual to the predicted reduction.
499 !
500  ratio = zero
501  IF (prered .NE. zero) ratio = actred/prered
502 
503 !
504 ! test if Jacobian calculation gave best improvement
505 ! (added by MZ and SH)
506 !
507  IF (fnorm_min .LT. min(fnorm,fnorm1)) THEN
508  IF (myid .EQ. master) WRITE (6, *) &
509  ' Using minimum state from Jacobian evaluation'
510  wa2 = x_min
511  wa4 = fvec_min
512  fnorm1 = fnorm_min
513  IF (p1*fnorm1 .lt. fnorm) actred = one - (fnorm1/fnorm)**2
514  IF (prered .ne. zero) ratio = actred/prered
515  END IF
516 
517 !
518 ! update the step bound.
519 !
520  IF (ratio .le. p25) THEN
521  IF (actred .ge. zero) THEN
522  temp = p5
523  ELSE
524  temp = p5*dirder/(dirder + p5*actred)
525  END IF
526  IF (p1*fnorm1.ge.fnorm .or. temp.lt.p1) temp = p1
527  delta = temp*min(delta,pnorm/p1)
528  par = par/temp
529  ELSE IF (par.eq.zero .or. ratio.ge.p75) THEN
530  delta = pnorm/p5
531  par = p5*par
532  END IF
533 
534 !
535 ! test for successful iteration.
536 ! update x, fvec, and their norms.
537 !
538  IF (ratio .GE. p0001) THEN
539  x = wa2
540  wa2 = diag*x
541  fvec = wa4
542  xnorm = enorm(n,wa2)
543  fnorm = fnorm1
544  iter = iter + 1
545  IF (myid .EQ. master) WRITE(6,'(/,3(2x,a,1es10.3)/)') &
546  'new minimum = ', fnorm**2,'lm-par = ', par, &
547  'delta-tol = ', delta
548  END IF
549 
550  cycle_count = cycle_count + 1
551 
552 !
553 ! tests for convergence.
554 !
555  IF (abs(actred).LE.ftol .AND. prered.LE.ftol &
556  .AND. p5*ratio.LE.one) info = 1
557 !
558 ! next test made more stringent to avoid premature declaration of
559 ! completion observed on some problems.
560 !
561  IF (delta.LE.xtol*xnorm .AND. cycle_count.GT.2) info = 2
562 ! IF (delta .le. xtol*xnorm) info = 2
563 
564  IF (abs(actred).LE.ftol .AND. prered.LE.ftol &
565  .AND. p5*ratio.LE.one .AND. info.EQ.2) info = 3
566  IF (info .NE. 0) EXIT levmarqloop
567 
568 !
569 ! tests for termination and stringent tolerances.
570 !
571  IF (nfev .ge. maxfev) info = 5
572  IF (abs(actred).le.epsmch .and. prered.le.epsmch &
573  .and. p5*ratio.le.one) info = 6
574  IF (delta .le. epsmch*xnorm) info = 7
575  IF (gnorm .le. epsmch) info = 8
576  IF (info .ne. 0) EXIT levmarqloop
577 !
578 ! END of the inner loop. repeat if iteration unsuccessful (ratio < p0001)
579 !
580  END DO levmarqloop
581 
582  DEALLOCATE (fjac_save)
583  IF (info.ne.0 .or. iflag.ne.0) EXIT outerloop
584 
585  END DO outerloop
586 
587 ! termination, either normal or user imposed.
588 
589  300 CONTINUE
590 
591  IF (info.EQ.0 .AND. iflag.NE.0) info = 10
592 
593  400 CONTINUE
594 
595  IF (myid .EQ. master) THEN
596  IF (info.ge.lbound(info_array,1) .and. &
597  info.le.ubound(info_array,1)) THEN
598  WRITE (6, '(2(/,1x,a))') &
599  'Levenberg-Marquardt optimizer status: ',trim(info_array(info))
600  ELSE
601  WRITE (6, '(a,i5)')' LM info out of bounds: ', info
602  END IF
603  ENDIF ! MPI
604 
605  IF (ALLOCATED(x_min)) DEALLOCATE (x_min, fvec_min, flip)
606 
607  IF (iflag .LT. 0) info = iflag
608 
609  IF (nfev.LE.1 .and. info.NE.11) THEN
610  nfev = 1
611  iflag = flag_cleanup !!Clean-up last time through for master
612  CALL fcn (m, n, x, fvec, iflag, nfev)
613  END IF
614 
615 #if defined(MPI_OPT)
616  RETURN
617 
618  3000 CONTINUE
619  WRITE (6, *) 'MPI_BCAST error in LMDIF, ierr = ', ierr_mpi
620 #else
621  WRITE(*, '(2(/,a, f10.2))') &
622  ' Total wall clock time in jacobian multi-process call = ', &
623  wall_time, &
624  ' Total wall clock time in lev param multi-process call = ', &
625  wall_time_lev
626 #endif
627  END SUBROUTINE lmdif
mpi_inc
Umbrella module avoid multiple inlcudes of the mpif.h header.
Definition: mpi_inc.f:11