1 SUBROUTINE lmdif(fcn, m, n, x, fvec, ftol, xtol, gtol, maxfev, &
2 epsfcn, diag, mode, factor, nprint, info, nfev, fjac, &
3 ldfjac, ipvt, qtf, wa1, wa2, wa3, wa4)
5 USE lmpar_mod, fjac_mod=>fjac, ldfjac_mod=>ldfjac,
6 ipvt_mod=>ipvt, qtf_mod=>qtf, diag_mod=>diag
8 USE fdjac_mod,
ONLY: flip,flag_singletask,flag_cleanup,fdjac2_mp
10 USE fdjac_mod,
ONLY: max_processors, flip, flag_singletask, &
19 INTEGER :: m, n, maxfev, mode, nprint, info, nfev, ldfjac
20 REAL(rprec),
INTENT(in) :: ftol, xtol, gtol, epsfcn, factor
21 REAL(rprec),
DIMENSION(n) :: x, wa1, wa2, wa3
22 REAL(rprec),
DIMENSION(m) :: fvec, wa4
23 INTEGER,
DIMENSION(n),
TARGET :: ipvt
24 REAL(rprec),
DIMENSION(n),
TARGET :: diag, qtf
25 REAL(rprec),
DIMENSION(ldfjac,n),
TARGET :: fjac
29 REAL(rprec),
PARAMETER :: zero = 0, one = 1,
30 p1=0.1_dp, p5=0.5_dp, p25=0.25_dp, p75=0.75_dp, p0001=1.e-4_dp
31 CHARACTER(LEN=130),
DIMENSION(0:11) :: info_array
35 INTEGER :: iflag, iter, j, l, istat, ikey
36 INTEGER :: cycle_count, subcycle
37 REAL(rprec) :: actred, dirder, epsmch, fnorm, &
38 gnorm, prered, ratio, sum0, temp,
40 REAL(rprec),
DIMENSION(:,:),
ALLOCATABLE :: fjac_save
42 REAL(rprec) :: wall_time, wall_time_lev
44 REAL(rprec) :: fnorm_min
45 REAL(rprec),
ALLOCATABLE,
DIMENSION(:) :: x_min, fvec_min
50 REAL(rprec),
EXTERNAL :: dpmpar, enorm
191 CALL mpi_comm_rank (mpi_comm_world, myid, ierr_mpi)
192 IF (ierr_mpi .ne. 0) stop
'MPI_COMM_RANK error in LMDIF'
193 CALL mpi_comm_size (mpi_comm_world, numprocs, ierr_mpi)
194 IF (ierr_mpi .ne. 0) stop
'MPI_COMM_SIZE error in LMDIF'
196 info = 0; iflag = 0; nfev = 0; cycle_count = 0
199 "improper input parameters (number constraints MUST " //
200 "be greater than number variables (>0)."
203 "algorithm estimates that the relative error in the " //
204 "sum of squares is at most ftol."
207 "algorithm estimates that the relative error between" //
208 " x and the solution is at most xtol."
211 "algorithm estimates that the relative error in the " //
212 "sum of squares and between x and the solution is at" //
213 " most ftol and xtol."
216 "the cosine of the angle between fvec and any column" //
217 " of the jacobian is at most gtol in absolute value."
220 "number of calls to fcn has reached or exceeded maxfev."
223 "ftol is too small. no further reduction in the sum " //
224 "of squares is possible."
227 "xtol is too small. no further improvement in the " //
228 "approximate solution x is possible."
231 "gtol is too small. fvec is orthogonal to the columns" //
232 " of the jacobian to machine precision."
235 "levenberg-marquardt optimizer terminated properly."
238 "Levenberg-Marquardt terminated prematurely due to " //
239 "function evaluation error."
242 "Levenberg-Marquardt terminated prematurely: " //
243 "must request more than one (1) processor"
246 IF (numprocs > n)
THEN
247 IF (myid .eq. master)
THEN
248 WRITE (6, *)
'Warning: more processors have been requested',
249 ' than the maximum (nvar) required = ',n
251 ELSE IF (numprocs <= 1)
THEN
260 ALLOCATE (x_min(n), fvec_min(m), flip(n), stat=istat)
261 IF (istat .NE. 0) stop
'Allocation error in lmdif'
269 #if !defined(MPI_OPT)
270 myid = 0; wall_time = 0; wall_time_lev = 0
283 IF (n.le.0 .or. m.lt.n .or. ldfjac.lt.m .or. ftol.lt.zero
284 .or. xtol.lt. zero .or. gtol.lt.zero .or. maxfev.le.0
285 .or. factor.le.zero)
THEN
290 IF (mode .EQ. 2)
THEN
292 IF (diag(j) .le. zero)
GOTO 300
303 IF (myid .ne. master)
THEN
306 ikey = worker_split_key+1
308 CALL mpi_comm_split(mpi_comm_world, ikey, worker_id,
309 mpi_comm_workers, ierr_mpi)
310 IF (ierr_mpi .ne. 0) stop
'MPI_COMM_SPLIT error in lsfun1'
312 IF (myid .EQ. master)
THEN
313 iflag = flag_singletask
314 CALL fcn (m, n, x, fvec, iflag, nfev)
316 CALL mpi_comm_free(mpi_comm_workers, ierr_mpi)
322 CALL mpi_bcast(iflag,1, mpi_integer, master,
323 mpi_comm_world, ierr_mpi)
324 IF (ierr_mpi .NE. 0)
GOTO 3000
325 IF (iflag .ge. 0)
CALL
326 mpi_bcast(fvec, m, mpi_real8, master,
327 mpi_comm_world, ierr_mpi)
328 IF (ierr_mpi .NE. 0)
GOTO 3000
331 IF (iflag .LT. 0)
GOTO 300
332 fnorm = enorm(m,fvec)
333 IF (nfev.GE.maxfev .OR. maxfev.EQ.1) info = 5
334 IF (info .NE. 0)
GOTO 300
342 IF (myid .EQ. master)
WRITE (6, 1000) numprocs
343 1000
FORMAT (/,
' Beginning Levenberg-Marquardt Iterations',/,
344 ' Number of Processors: ',i4,//,
346 70(
'='),/,2x,
'Iteration',3x,
'Processor',7x,
'Chi-Sq',7x,
347 'LM Parameter',6x,
'Delta Tol'/,70(
'='))
349 WRITE (6, 1000) max_processors
350 1000
FORMAT (/,
' Beginning Levenberg-Marquardt Iterations',/,
351 ' Number processors requested: ', i4,//,
352 59(
'='),/,2x,
'Iteration',8x,
'Chi-Sq',7x,
353 'LM Parameter',6x,
'Delta Tol',/,59(
'='))
358 outerloop:
DO WHILE (nfev .lt. maxfev)
363 CALL fdjac2_mp(fcn, m, n, x, fvec, fjac, ldfjac,
364 iflag, nfev, epsfcn, fnorm_min, x_min, fvec_min)
367 CALL fdjac2(fcn, m, n, x, fvec, fjac, ldfjac, iflag, nfev,
368 epsfcn, wa4, wall_time, fnorm_min, x_min, fvec_min)
371 IF (iflag .LT. 0)
EXIT outerloop
376 CALL qrfac(m, n, fjac, ldfjac, .true., ipvt,
383 IF (iter .EQ. 1)
THEN
384 IF (mode .ne. 2)
THEN
386 WHERE (wa2 .eq. zero) diag = one
396 IF (delta .eq. zero) delta = factor
404 IF (fjac(j,j) .ne. zero)
THEN
405 sum0 = sum(fjac(j:m,j)*wa4(j:m))
406 temp = -sum0/fjac(j,j)
407 wa4(j:m) = wa4(j:m) + fjac(j:m,j)*temp
417 IF (fnorm .ne. zero)
THEN
420 IF (wa2(l) .ne. zero)
THEN
421 sum0 = sum(fjac(1:j,j)*(qtf(1:j)/fnorm))
422 gnorm = max(gnorm,abs(sum0/wa2(l)))
430 IF (gnorm .LE. gtol) info = 4
431 IF (info .NE. 0)
EXIT outerloop
436 IF (mode .ne. 2) diag = max(diag,wa2)
444 ALLOCATE (fjac_save(n,n), stat=istat)
445 IF (istat .NE. 0) stop
'Fjac_save allocation error'
447 fjac_save(:n,:n) = fjac(:n,:n)
449 levmarqloop:
DO WHILE (ratio.lt.p0001 .and. subcycle.lt.2)
451 subcycle = subcycle + 1
452 fjac(:n,:n) = fjac_save(:n,:n)
453 spread_ratio = epsfcn/delta/10
462 CALL levmarq_param_mp (x, wa1, wa2, wa3, wa4,
463 nfev, m, n, iflag, fcn)
465 CALL levmarq_param(x, wa1, wa2, wa3, wa4,
466 wall_time_lev, nfev, m, n, iflag, fcn)
468 IF (iflag .LT. 0)
EXIT
473 IF (iter .EQ. 1) delta = min(delta, pnorm)
479 IF (p1*fnorm1 .LT. fnorm) actred = one - (fnorm1/fnorm)**2
489 wa3(1:j) = wa3(1:j) + fjac(1:j,j)*temp
492 temp1 = enorm(n,wa3)/fnorm
493 temp2 = (sqrt(par)*pnorm)/fnorm
494 prered = temp1**2 + temp2**2/p5
495 dirder = -(temp1**2 + temp2**2)
501 IF (prered .NE. zero) ratio = actred/prered
507 IF (fnorm_min .LT. min(fnorm,fnorm1))
THEN
508 IF (myid .EQ. master)
WRITE (6, *)
509 ' Using minimum state from Jacobian evaluation'
513 IF (p1*fnorm1 .lt. fnorm) actred = one - (fnorm1/fnorm)**2
514 IF (prered .ne. zero) ratio = actred/prered
520 IF (ratio .le. p25)
THEN
521 IF (actred .ge. zero)
THEN
524 temp = p5*dirder/(dirder + p5*actred)
526 IF (p1*fnorm1.ge.fnorm .or. temp.lt.p1) temp = p1
527 delta = temp*min(delta,pnorm/p1)
529 ELSE IF (par.eq.zero .or. ratio.ge.p75)
THEN
538 IF (ratio .GE. p0001)
THEN
545 IF (myid .EQ. master)
WRITE(6,
'(/,3(2x,a,1es10.3)/)')
546 'new minimum = ', fnorm**2,
'lm-par = ', par,
547 'delta-tol = ', delta
550 cycle_count = cycle_count + 1
555 IF (abs(actred).LE.ftol .AND. prered.LE.ftol
556 .AND. p5*ratio.LE.one) info = 1
561 IF (delta.LE.xtol*xnorm .AND. cycle_count.GT.2) info = 2
564 IF (abs(actred).LE.ftol .AND. prered.LE.ftol
565 .AND. p5*ratio.LE.one .AND. info.EQ.2) info = 3
566 IF (info .NE. 0)
EXIT levmarqloop
571 IF (nfev .ge. maxfev) info = 5
572 IF (abs(actred).le.epsmch .and. prered.le.epsmch
573 .and. p5*ratio.le.one) info = 6
574 IF (delta .le. epsmch*xnorm) info = 7
575 IF (gnorm .le. epsmch) info = 8
576 IF (info .ne. 0)
EXIT levmarqloop
582 DEALLOCATE (fjac_save)
583 IF (info.ne.0 .or. iflag.ne.0)
EXIT outerloop
591 IF (info.EQ.0 .AND. iflag.NE.0) info = 10
595 IF (myid .EQ. master)
THEN
596 IF (info.ge.lbound(info_array,1) .and.
597 info.le.ubound(info_array,1))
THEN
598 WRITE (6,
'(2(/,1x,a))')
599 'Levenberg-Marquardt optimizer status: ',trim(info_array(info))
601 WRITE (6,
'(a,i5)')
' LM info out of bounds: ', info
605 IF (
ALLOCATED(x_min))
DEALLOCATE (x_min, fvec_min, flip)
607 IF (iflag .LT. 0) info = iflag
609 IF (nfev.LE.1 .and. info.NE.11)
THEN
612 CALL fcn (m, n, x, fvec, iflag, nfev)
619 WRITE (6, *)
'MPI_BCAST error in LMDIF, ierr = ', ierr_mpi
621 WRITE(*,
'(2(/,a, f10.2))')
622 ' Total wall clock time in jacobian multi-process call = ',
624 ' Total wall clock time in lev param multi-process call = ',