# -*- coding: utf-8 -*- """ Program to ============================================================ Language: Python Authors ------- Bethan Harris Assimila c/o University of Reading bethan.harris@reading.ac.uk BASED ON CODE WRITTEN BY PETER JAN VAN LEEUWEN (www.nceo.ac.uk/PFtools/code/PFModel.py) With thanks to Melanie Ades ============================================================ Use the email address above for correspondance relating to this code """ ######################################################################## ## IMPORT LIBRARIES ######################################################################## import numpy as np import matplotlib.pyplot as plt import scipy.stats #----------------------------------------------------------------------# #-- NON-LINEAR MODEL --------------------------------------------------# #----------------------------------------------------------------------# def NonLinModel(old, k, force=0): return 0.5*old + 25*old/(1+old**2) + 8*np.cos(0.8*k) + force #----------------------------------------------------------------------# #-- RUNNING THE CODE --------------------------------------------------# #----------------------------------------------------------------------# # Number of timesteps is Timesteps = 100 # The number of particles we will use is N = 10 #The TRUTH #################################################################### #The initial temperature is: Truth = np.zeros(Timesteps) Truth[0] = -8.0 # degrees celcius at start of run #truth run for t in range(1,Timesteps): Truth[t] = Truth[t]=NonLinModel(Truth[t-1],t,np.random.randn()) ##The Observations ############################################################ Observations = np.zeros(Timesteps)*np.NaN #Number of observations: obs_no = 5.0 ## These will be scattered around the truth with a variance of: obs_var = 0.5 #timesteps interval between observations obs_interval = np.round(Timesteps/(obs_no+1)) #timesteps at which observations take place obs_timesteps = np.linspace(obs_interval,obs_interval*obs_no,obs_no) obs_timesteps = np.round(obs_timesteps,0) #generate observations from Truth for t in obs_timesteps: Observations[t] = Truth[t] + np.sqrt(obs_var) * np.random.randn() ## The Particles ############################################################## #model error variance model_var = 0.05 #strength of pull # this should be no bigger than (obs_var / model_var) to avoid over-pulling and instability pull_strength = 5 # fraction of particles to be reatined at each observation timestep particles_retained = 0.8 # The initial temperatures of each particle are evenly spread between P = np.zeros([N]) Particles = np.zeros([N,Timesteps]) Particles[:,0] = np.linspace(-20,20,N) #The weights W = np.zeros([N]) Weights = np.zeros([N,Timesteps]) Weights[:,0] = 1.0/N Log_Weights = np.zeros([N,Timesteps]) W_Proposal = np.zeros([N]) W_equalising = np.zeros([N,Timesteps]) # empty array for proposal weights #statistics for forecasting Particles_mean = np.zeros(Timesteps) #For plotting only Colour_index = np.zeros([N,Timesteps]) Colour_index[:,0] = np.array(range(N))/float(N) Particles_before_resampling = np.zeros([N,Timesteps])*np.NaN #used to record particles before resampling #now loop each particle over our timesteps for t in range(1,Timesteps): for n in range(N): #set up the random error for the Particles: force = np.random.randn()*np.sqrt(model_var) # calculate the particles' positions################ Particles[n,t]=NonLinModel(Particles[n,t-1],t,force) #How many previous observations have we had? previous_obs_no = np.sum(np.isfinite(Observations[:t])) #Set up the pull term: # set the pull fraction initially to be zero pull_fraction = 0.0 # define the pull fraction to be equal to how far away in time we are # from the next observation. 0 is the furthest (previous timestep) 1 is # the closest (current timestep) pull_fraction = (1.0*t - obs_interval*previous_obs_no)/(1.0*obs_interval) # Set up the reduced pull fraction, this takes the fraction # calculated above and reduced the value by 0.3 The pull strength # starts at 0 and can peak at a maximum of 0.7 before the observation timestep. if pull_fraction > 0.3: pull_fraction_reduced = pull_fraction-0.3 else: pull_fraction_reduced = 0.0 # Now we calculate the pull term to be included into the model # equations using the Particles, the observations, the reduced # pull fraction, the model and observation errors and a user-defined # pull strength. #first make sure that we have an observation to pull towards if (previous_obs_no+1)*obs_interval <= Timesteps: pull = pull_strength * model_var / obs_var * pull_fraction_reduced * (Observations[(previous_obs_no+1)*obs_interval] - Particles[n,t]) # Add this pull term on to the Particle value Particles[n,t] += pull # Set up the log of the portion of the weights which come from # applying the pull term. These will be added to the weights # based on position later on and then the exponent will be taken WP_this_particle_and_timestep = 0.5*(np.square(pull+force)*(1/model_var)-np.square(force)*(1/model_var)) # add this onto the weights calculated for this particle at # previous timesteps. This way, when we come to use these weights # at an observation timestep, we're using the sum of all weights # since the last timestep. W_Proposal[n] += WP_this_particle_and_timestep #if there are no observations to pull towards then we don't alter the # particle and just set the proposal weights to zero (this happens at # the end of the run after the last observation) else: W_Proposal[n] = 0.0 #If we have an observation here then we need to update our particle weights if np.isfinite(Observations[t]): print "Observation at "+ str(t) + " is " + str(Observations[t]) # take a record our particles at this stage (for plotting and # re-weighting purposes: Particles_before_resampling[:,t] = Particles[:,t] # calculate the maximum possible weights values. This comes from # solving the qudratic equation which defines the weights due to the # proximity to observation and proximity to f(x(n-1)) for the minimum value W_max = 0.5*(np.square(Observations[t] - Particles[:,t]) / (model_var + obs_var)) # add the log of the proposal weights to the this maximum value W_max = W_Proposal[:] + W_max # Now we move on to the calculate the equivalnet weights: # sort these maximum weights into ascending order: W_sort = np.sort(W_max) # define the target weight (particles with max weights below this are # not taken into consideration and are lost at this observation) W_target = W_sort[particles_retained*N-1] print "particles retained index = " + str(particles_retained*N) print "target weight = " + str(W_target) print "max weights = " + str(W_sort) # We use a variable called Kalman Gain (used extensively in Kalman # filters) to help us determine the best position of the particle Kalman_gain = model_var / (model_var + obs_var) # Work through the equations to calculate the position of the individual # particles to achieve this target weight. More detail on these # (including their derivation) can be found in the final references for n in range(N): if W_max[n] <= W_target: # We use this a lot in the next few steps, so explicitly define the # diference between the observation and the particle difference = Observations[t] - Particles[n,t] # variable extracted from final equation (breaking things down) aaa = Kalman_gain * 0.5 * np.square(difference)/obs_var # variable extracted from final equation (breaking things down) bbb = 0.5 * np.square(difference)/obs_var - W_target + W_Proposal[n] # variable extracted from final equation (breaking things down) alpha = 1.0 + np.sqrt(1.0 - bbb/aaa + 0.00000001) # Calculate final position of the particle Particles[n,t] = Particles[n,t] + alpha * Kalman_gain * difference # Add on a little bit of random error (there are many different # ways of doing this). Particles[n,t] += np.sqrt(model_var) * np.random.randn() * 1.e-5 # Calculate (or re-calculate) the difference between the observations # and the particles for these new particle positions difference = Observations[t] - Particles[n,t] # We will need to know how much the particles have been moved # as a term for the following equations. Calcualte this explicitly now movement = Particles[n,t] - Particles_before_resampling[n,t] # find the additional weights contribution from the distance the # is from the observations and the amount is has been moved, now that # we've changed the particle's position. This value should ensure # that the final weights are all roughly equal. W_equalising[n,t] = 0.5 * np.square(difference)/obs_var + 0.5 * np.square(movement)/model_var # Find the new weights of these particles (they should all be # around the same value) Weights[n,t] = W_Proposal[n] + W_equalising[n,t] #DEBUG # now we normalise and take the exponent of the log weights Weights[:,t] -= np.min(Weights[:,t]) Weights[:,t] = np.exp(-Weights[:,t]) Weights[:,t] = Weights[:,t] / np.sum(Weights[:,t]) #print "Weights after obs = " + str(Weights[:,t]) ####################################################################### # Re-sampling ######################################################## ####################################################################### # Here we use Stochastic universal sampling, taken from: Kitagawa, G., # 1996: Monte-Carlo filter and smoother for non-Gaussian non-linear # state-space models. Journal of Computational and Graphical # Statistics, 10, 253–259. # Arrange the weights into an array so that they are cumulative W[0] = Weights[0,t] for n in range(1,N): W[n] = Weights[n,t] + W[n-1] #initialise a random number to act as the first weight interval random_interval = np.random.rand()/N #index used for the re-sampling process. index = 0 # Prepare to step through the arrays of cumulative weights and # particles index by index. for n in range(N): # if the random interval is greater than the cumulative weights of # the particles at the current array index value then increment to # the next index.(otherwise we will create a duplicate particle as # we sample from this index again) while random_interval > W[index]: index += 1 # sample a particle from this array index. P[n] = Particles[index,t] #increment r by approximately one random_interval += 0.9999/N # for plotting only ----------------------------------------------- # update the colours index so that lost particles' colours are # gone and duplicated particles' colour are duplicated Colour_index[n,t] = Colour_index[index,t-1] #add on a little bit of colour noise if this is a duplicate if P[n] in P[:n]: Colour_index[n,t] += 0.01*np.random.rand() #------------------------------------------------------------------ #Re-write Particles[n,t] with the new values Particles[:,t] = P #print "Particles after resampling = " + str(Particles[:,t]) #Re-set the weights to be even again Weights[:,t] = 1.0 / N W_Proposal[:] = 0.0 #DEBUG #print "Weights after resampling = " + str(Weights[:,t]) # If this isn't an observation timestep the we just update the the weights # with the most recent proposal weight and take the colour indices forward # (for plotting). else: Log_Weights[:,t] = W_Proposal[:] #normalise and take the exponent of the log weights Weights[:,t] = np.exp(-Log_Weights[:,t] + np.min(Log_Weights[:,t])) Weights[:,t] = Weights[:,t] / np.sum(Weights[:,t]) #print "Weights after normal = " + str(Weights[:,t]) Colour_index[:,t] = Colour_index[:,t-1] #Calculate the ensemble meanand standard deviation Particles_mean[t] = np.sum(Weights[:,t]*Particles[:,t]) ############################################################################### ############################################################################### #Plot Everything ############################################################################### ############################################################################### def timeseries_plot(title): plt.figure(figsize=(16,6)) plt.plot(range(Timesteps),Truth,'k',linewidth=10, alpha=0.1, label="Truth") plt.plot(range(Timesteps),Particles_mean,'c',linewidth=10, alpha=0.1, label="Particle/Ensemble Mean") #plt.plot([0,1],Particles[N/2,0:2],color=plt.cm.winter(float(0)/(N/2)),linewidth=0.5, label="particles") for n in range(N): plt.plot(range(0,Timesteps),Particles[n],color='0.9',linewidth=0.5) for n in range(N): for t in range(1,Timesteps): if np.isfinite(Observations[t]): match = np.where(Colour_index[n,t-1] == Colour_index[:,t]) try: plt.plot([t-1, t],[Particles[n,t-1],Particles[match[0],t]],color=plt.cm.hsv(Colour_index[n,t-1]),linewidth=1, alpha=Weights[n,t-1]/np.max(Weights[:,t-1])) except: plt.plot(t-1,Particles[n,t-1],'x',color=plt.cm.hsv(Colour_index[n,t-1]),markersize=10,linewidth=2) else: plt.plot([t-1, t],[Particles[n,t-1], Particles[n,t]],color=plt.cm.hsv(Colour_index[n,t]),linewidth=1, alpha=Weights[n,t-1]/np.max(Weights[:,t-1])) plt.errorbar(range(Timesteps),Observations,obs_var*2,fmt='ko',linewidth=2,markersize=8) plt.ylabel("Temperature (degC)") plt.xlabel("Timestep") plt.savefig(title+".png") def moving_hist_with_prob(title): fig = plt.figure() for t in range(Timesteps): fig.clear() plt.axes(xlim = (-25,25), ylim=(0,N)) bin_counter=0 #If this is an observation timestep then plotting is much more complicated. if np.isfinite(Observations[t]): print "obs plotting" #plot the observations observation = scipy.stats.norm.pdf(np.linspace(-25,25,100),Observations[t],np.sqrt(obs_var)) observation = 0.5 * N * observation / np.max(observation) plt.fill(np.linspace(-25,25,100),observation,'y', edgecolor="none") #Find the lost particles i_old_remaining = np.in1d(Particles_before_resampling[:,t], Particles[:,t]) lost = Particles_before_resampling[np.invert(i_old_remaining),t] #append the two together hist = np.sort(np.append(Particles[:,t],lost)) # plot Standard Particles for p in range(len(hist)): if p>0 and round(hist[p],0)==round(hist[p-1],0): bin_counter+=1 else: bin_counter=0 if hist[p] in lost: lost_particle_no = np.where(Particles_before_resampling[:,t]==hist[p])[0][0] plt.bar(round(hist[p],0)-0.5,1.0, bottom=bin_counter,width=1.0,facecolor=plt.cm.hsv(Colour_index[lost_particle_no,t-1]), alpha=0.1) else: particle_no = np.where(Particles[:,t]==hist[p])[0][0] if hist[p] in hist[:p]: plt.bar(round(hist[p],0)-0.5,1.0, bottom=bin_counter,width=1.0,facecolor=plt.cm.hsv(Colour_index[particle_no,t]), edgecolor="w") else: plt.bar(round(hist[p],0)-0.5,1.0, bottom=bin_counter,width=1.0,facecolor=plt.cm.hsv(Colour_index[particle_no,t])) #straightforward non-observation plotting else: hist = np.sort(Particles[:,t]) # plot Standard Particles for n in range(len(hist)): if n>0 and round(hist[n],0)==round(hist[n-1],0): bin_counter+=1 else: bin_counter=0 particle_no = np.where(Particles[:,t]==hist[n])[0][0] plt.bar(round(hist[n],0)-0.5,1.0, bottom=bin_counter,width=1.0,facecolor='0.9') plt.bar(round(hist[n],0)-0.5,1.0, bottom=bin_counter,width=1.0,facecolor=plt.cm.hsv(Colour_index[particle_no,t]), alpha=Weights[n,t-1]/np.max(Weights[:,t-1])) plt.plot(Truth[t],0.75*n,'k*',markersize=20) plt.title("Timestep = "+str(int(t))) plt.xlabel("Temperature (degC)") plt.ylabel("Number of particles") plt.savefig(title+"_"+str(int(t))+".png") # Call these plotting fuctions timeseries_plot("Draft_standard_timeseries_"+str(N)) #moving_hist_with_prob("Eq_Weights_hist/Eq_Weights_hist_"+str(N))