; Ensemble Kalman filter (EnKF) with the Lorenz 1963 model ; ; Stefano Migliorini ; National Centre for Earth Observation ; University of Reading ; s.migliorini@reading.ac.uk ; ; Aug 2006 - First version. The structure of the EnKF is based on Geir Evensen's fortran implementation ; of the EnKF and EnKS with the Lorenz equations (see http://enkf.nersc.no/Examples/Lorenz). ; The analysis scheme is based on the square root algorithm described in ; Evensen, Ocean Dynamics, 54, 539-560, 2004, with a symmetric ensemble transform matrix ; (see Wang et al., MWR, 132, 1590-1605, 2004) which preserves the analysis ensemble mean ; (see Sakov and Oke, MWR, 136, 1042-1053, 2008). ; May 2010 - Overall revision and addition of free forecast capability ; Aug 2010 - Added options for observing only part of the state (see flags noxob, noyob, nozob) pro lorenz_enkf, nrens=nrens, nmeas=nmeas, ini_var=ini_var, meas_var=meas_var, perf_obs=perf_obs, $ noxob=noxob, noyob=noyob, nozob=nozob, test=test, cov_flag=cov_flag, covs=covs, ih=ih, ref=ref, $ moderr_flag=moderr_flag, path=path, print_flag=print_flag ; input parameters default values nmeast = 3 ; number of observed state components at a given observation time ndim = 401 ; dimension of the reference state (assimilation part) ndimtot = 801 ; total dimension of the reference state na=0 ; initial step nb=800 ; end step (nb must be < ndim) dt=0.01 ; integration time step if ~keyword_set(nrens) then nrens = 100 ; ensemble members if ~keyword_set(ini_var) then ini_var = 2.0 ; initial state error variance if ~keyword_set(nmeas) then nmeas = 80 ; total number of measurements if ~keyword_set(meas_var) then meas_var = 2.0 ; obs error variance (at the moment is the same for x, y and z) if keyword_set(perf_obs) then begin meas_var=0.0 ; but R is set to a "small" value. See below print, 'perfect observations case' endif else perf_obs = 0 if keyword_set(noxob) then begin &$ nmeast = nmeast-1 &$ print, 'x-component not observed' &$ endif else noxob = 0 if keyword_set(noyob) then begin &$ nmeast = nmeast-1 &$ print, 'y-component not observed' &$ endif else noyob = 0 if keyword_set(nozob) then begin &$ nmeast = nmeast-1 &$ print, 'z-component not observed' &$ endif else nozob = 0 if ~keyword_set(test) then test = 0 ; true to compare results (e.g. for various nrens values) if ~keyword_set(cov_flag) then cov_flag = 0 ; returns the cov if true if ~keyword_set(moderr_flag) then moderr_flag = 0 ; applies model error if true if ~keyword_set(path) then path = './' if ~keyword_set(print_flag) then print_flag = 0 if nmeast eq 0 then message, 'at least one component of the state must be observed!' print, 'ensemble members: ', nrens print, 'initial state error variance: ', ini_var print, 'total number of measurements: ', nmeas print, 'obs error variance: ', meas_var if (print_flag) then begin &$ print, 'printing on postscript' set_plot,'ps' &$ tvlct, [0,255,0,0,175], [0,0,255,0,175], [0,0,0,255,175] &$ red = 1 &$ green = 2 &$ blue = 3 &$ endif else begin &$ red = 255L &$ green = 256L*255L &$ blue = 256L*256L*255L &$ endelse ; model error variances (see Evensen, Data Assimilation: The Ensemble Kalman Filter, 2007, Eq. 6.31) modvar = [0.1491, 0.9048, 0.9180] ; same values as in Evensen, Data Assimilation: The Ensemble Kalman Filter, 2007, sction 9.8.1 ref = fltarr(ndimtot,3) ref[0,0] = 1.508870 ref[0,1] = -1.531271 ref[0,2] = 25.46091 ; calculate the reference trajectory between na and nb n_ab = nb-na+1 lorenz, na, nb, dt, ref t = indgen(n_ab)*dt ; EnKF cfcst = fltarr(ndimtot,3) ; central forecast if ~test then cfcst[0,*] = sqrt(ini_var)*randomn(seed,3) + ref[0,*] $ ; at time 0 ; to fix the initial forecast as a function of obs_var (for comparing experims) else begin cfcst[0,0] = 1.608870 cfcst[0,1] = -1.631271 cfcst[0,2] = 25.66091 print, 'fixed initial conditions for the forecast: ', cfcst[0,*] endelse A = fltarr(nrens, ndimtot, 3) for i=0,2 do begin &$ A[*,0,i] = randomn(seed, nrens) &$ A[*,0,i] = (A[*,0,i] - mean(A[*,0,i])) / stddev(A[*,0,i]) &$ A[*,0,i] = cfcst[0,i] + sqrt(ini_var)*A[*,0,i] &$ endfor ; creates obs mask deltaobs = double(ndim-1)*dt/double(nmeas) ih = intarr(nmeas) for i=0,nmeas-1 do begin &$ ii=fix(deltaobs*double(i+1)/dt) &$ ; index of obs time ih(i)=ii &$ ; measurement matrix endfor ; creates obs d = fltarr(3, nmeas) if ~test then for i=0, nmeas-1 do d[*,i] = sqrt(meas_var)*randomn(seed,3) + ref[ih[i],*] else $ for i=0, nmeas-1 do d[*,i] = sqrt(meas_var)*(-1)^i + ref[ih[i],*] ; to fix obs as a function of obs_var (for comparing experims) if test then print, 'Fixed observation values' ; Obs of x: ', d[0,*] ; Time stepping for m=0,nmeas-1 do begin &$ ;print, m, nmeas &$ na=fix(deltaobs*double(m)/dt) &$ ; start time nb=fix(deltaobs*double(m+1)/dt) &$ ; end time ;print,'na=',na,' nb=',nb ; ensemble members forecasts with model error (if selected) for j=0,nrens-1 do begin &$ A1 = reform(A[j,*,*]) &$ lorenz,na,nb,dt,A1, moderr_flag=moderr_flag, modvar=modvar, seed=seed &$ A[j,*,*] = A1 &$ endfor &$ ; only for diagnostics and plotting for i=na, nb do begin &$ for j=0,2 do cfcst[i,j] = mean(A[*,i,j]) &$ endfor bkg = cfcst[nb,*] ; saves the forecast before it gets overwritten by the analysis ; Ensemble Kalman filter analysis AA = reform(A[*,nb,*]) &$ ; ensemble at observation time H = fltarr(3,3) &$ ; obs operator. It is an nmeast x n matrix, where nmeast is the number of obs at each time. ; In the default case nmeast=n=3 (state fully observed) R = fltarr(3,3) &$ ; full rank (i.e. exact) obs err covariance if perf_obs then meas_var = ini_var / 10. ; to make the filter numerically meaningful (even though ; meas_var should be zero) for i=0,2 do begin &$ H[i,i] = 1.0 &$ ; H is the identity matrix R[i,i] = meas_var &$ ; uncorrelated obs endfor &$ ; selects only the observed components stateind = intarr(3) + 1 if noxob then stateind[0] = 0 if noyob then stateind[1] = 0 if nozob then stateind[2] = 0 istate = where(stateind gt 0, nistate) if nistate ne nmeast then message, 'inconsistent number of observed components!' H = H[*,istate] Rtmp = R[*,istate] R = Rtmp[istate,*] ; mean_AA = fltarr(3) &$ for i=0,2 do begin &$ mean_AA[i] = mean(AA[*,i]) &$ ; ensemble mean AA[*,i] = AA[*,i] - mean_AA[i] &$ ; ensemble perturbation matrix endfor &$ S = H##AA &$ ZLZT = S##transpose(S) + (nrens-1)*R &$ obs = d[istate,m] &$ ; observation vector at observation time t if nmeast gt 1 then begin &$ lambda = la_eigenql(ZLZT, eigenvectors=ZT, /double) &$ if min(lambda) le 0.0 then message, 'non-positive eigenvalues!' &$ Z = transpose(ZT) &$ ; to use rows by cols prods max_abs_res1 = max(abs(ZLZT - Z##diag_matrix(lambda)##ZT)) &$ if max_abs_res1 gt 1e-8 then message, 'check precision of eigenvec decomposition!' ; else $ ; print, 'eigenvec precision: ', max_abs_res1 &$ y1 = ZT##(obs - H##mean_AA) &$ y2 = diag_matrix(1.0/lambda)##y1 &$ y3 = Z ## y2 &$ y4 = transpose(S)##y3 &$ ana = mean_AA + AA##y4 &$ ; analysis! X2 = diag_matrix(1.0/sqrt(lambda))##ZT##S &$ ; nmeast x nrens elements endif else begin &$ y3 = (obs - H##transpose(mean_AA))/ZLZT &$ y4 = transpose(S)*y3[0] &$ ana = mean_AA + AA##y4 &$ ; analysis! X2 = S/sqrt(ZLZT[0]) &$ ; 1 x nrens elements endelse rnk = min([nmeast,nrens-1]) ; commented out, given that IDL's SVD routine does not calculate the eigenvecs with zero eigenval ;la_svd, X2, sigma2, U2, V2, /double &$ ; fixed by calculating eigenvecs of transpose(X2)##X2 instead sigma2_evals_square = la_eigenql(transpose(X2)##X2,eigenvec=evecs,/double) &$ ;tiny = 50.0D*(machar(/double)).eps ; our def of machine precision ;if abs((max(sigma2_evals_square) - 1.0D)) lt tiny then message, 'sigma2_evals_square > 1' if 1.0 - max(sigma2_evals_square) lt 0.0 then message, 'sigma2_evals_square > 1' ; non-zero eigenvals sigma2_evals_square = sigma2_evals_square[nrens-rnk:nrens-1] &$ ; also goes back to single precision V2 = transpose(evecs) &$ ; isigma = fltarr(nrens) + 1.0 isigma[nrens-rnk:nrens-1] = sqrt(1.0 - sigma2_evals_square) &$ X3 = V2 &$ for j=nrens-rnk,nrens-1 do X3[j,*]=X3[j,*]*isigma[j] &$ ; this is V2 ## sqrt(I - sigma2'sigma2) AA_ana = AA##X3##transpose(V2) ; symmetric square root for i=0,2 do AA_ana[*,i] = AA_ana[*,i] + ana[i] &$ ; add the analysis: analysis ensemble ; that's it for the Kalman Filter at time t: now it passes results back to the time-dependent ensemble: cfcst[nb,*] = ana &$ A[*,nb,*] = AA_ana &$ ; endfor na=nb &$ ; start time nb=ndimtot-1 &$ ; end time ;print,'final na=',na,' nb=',nb ; ensemble members forecasts with model error (if selected) for j=0,nrens-1 do begin &$ A1 = reform(A[j,*,*]) &$ lorenz,na,nb,dt,A1, moderr_flag=moderr_flag, modvar=modvar, seed=seed &$ A[j,*,*] = A1 &$ endfor &$ for i=na, nb do begin &$ for j=0,2 do cfcst[i,j] = mean(A[*,i,j]) &$ endfor ; observation plot title=['x','y','z'] if print_flag then device, /color, filename=path+'enkf_res.ps' else window, 0 !p.multi=[0,1,3] for jcoor=0,2 do begin plot, t, ref[*,jcoor], xtitle='time', ytitle=title[jcoor], title=title[jcoor], /nodata if stateind[jcoor] then for i=0, nmeas-1 do plots, t[ih[i]], d[jcoor,i], psym=2, color=red oplot, t, ref[*,jcoor] oplot, t, cfcst[*,jcoor], color=green ; plots the analysis plots, [t[ndim-1],t[ndim-1]], [min([cfcst[*,jcoor],ref[*,jcoor]]),max([cfcst[*,jcoor],ref[*,jcoor]])], lin=2 endfor if print_flag then device,/close !p.multi=0 ; analysis variance ana_vars = fltarr(ndimtot,3) for i=0,ndimtot-1 do begin &$ AA = reform(A[*,i,*]) &$ for j=0,2 do AA[*,j] = AA[*,j] - mean(AA[*,j]) &$ covar = AA##transpose(AA)/(nrens-1) &$ ana_vars[i,*] = covar[indgen(3), indgen(3)] &$ endfor if print_flag then device, /color, filename=path+'enkf_ana_err.ps' else window,1 ; analysis error and analysis error standard devs !p.multi=[0,1,3] for j=0,2 do begin lim = max([sqrt(ana_vars[*,j]),abs(cfcst[*,j] - ref[*,j])]) plot, t, cfcst[*,j] - ref[*,j], yr=[-lim,lim], $ xtitle='time', ytitle='analysis error', title=title[j] oplot, t, sqrt(ana_vars[*,j]), color=green oplot, t, - sqrt(ana_vars[*,j]), color=green plots, [t[ndim-1],t[ndim-1]], [min([cfcst[*,j],ref[*,j]]),max([cfcst[*,j],ref[*,j]])], lin=2 endfor if print_flag then device, /close !p.multi=0 rmse = fltarr(ndimtot) for i=0,ndimtot-1 do rmse[i] = sqrt(total((cfcst[i,*]-ref[i,*])^2)/3.0) if print_flag then device, /color, filename=path+'enkf_ana_rmse.ps' else window,2 plot, t, rmse, xtitle='time', ytitle='RMSE' plots, [t[ndim-1],t[ndim-1]], [min(rmse),max(rmse)], lin=2 if print_flag then device, /close if test then begin openw, 1, path+'ana_err.dat' printf, 1, ' nrens= ', nrens, ' nmeas= ', nmeas, ' ini_var= ', ini_var, ' meas_var= ', meas_var, ' stateind= ', stateind ;printf, 1, ' x_ana x_ana_err x_std obs_mask' printf, 1, ' RMSE obs_mask' m=0 for i=0, ndimtot-1 do begin if m lt nmeas then begin obs_mask = 0 if i eq ih[m] then begin obs_mask=1 ++m endif endif else obs_mask = 0 ;printf, 1, cfcst[i,0], cfcst[i,0]-ref[i,0], sqrt(ana_vars[i,0]), obs_mask printf, 1, rmse[i], obs_mask endfor close, 1 endif if print_flag then device, /color, filename=path+'enkf_xz.ps' else window,3 plot, ref[*,0], ref[*,2], xtitle='x', ytitle='z' plots, ref[0,0], ref[0,2], psym=4 plots, cfcst[0,0], cfcst[0,2], psym=4, color=green oplot, cfcst[*,0], cfcst[*,2], color=green oplot, d[0,*]*(1-noxob), d[2,*]*(1-nozob), psym=2, color=red ; set to zero the component that is not observed if print_flag then device,/close ; ensemble members if ~print_flag then begin window,4 ;plot, t, cfcst[*,0], /nodata, yr=[-20,20] plot, t, ref[*,0], /nodata, xtitle='time', ytitle='forecast ensemble' for i=0,ndimtot-1 do plots, t[i], A[*,i,0], psym=1, color=blue ;oplot, t, cfcst[*,0], color=green oplot, t, ref[*,0], color=green plots, [t[ndim-1],t[ndim-1]], [min([cfcst[*,0],ref[*,0]]),max([cfcst[*,0],ref[*,0]])], lin=2 endif ;returns the covs if cov_flag then begin &$ covs = fltarr(ndimtot,3,3) &$ for i=0,ndimtot-1 do begin &$ tmp = reform(A[*,i,*]) &$ for j=0,2 do tmp[*,j] = tmp[*,j] - mean(tmp[*,j]) &$ covs[i,*,*] = tmp##transpose(tmp)/(ndimtot-1.0) &$ endfor &$ endif set_plot,'x' end