Matlab: el programa devuelve valores basura, ayuda en la ejecución adecuada del filtro de Kalman y estimación de parámetros
He implementado el suavizado de Kalman con maximización de expectativas basado en el documentoEstimación de parámetros para sistema dinámico lineal. Todas las anotaciones se basan en este documento. El modelo es un filtro IIR (AR (2))
y(t) = 0.195 *y(t-1) - 0.95*y(t-2) + w(t)
La representación del espacio de estado:
x(t+1) = a^Tx(t) + w(t)
y(t) = C(t) + v(t)
El modelo de espacio de estado:
x(t+1) = Ax(t) + w(t)
y(t) = Cx(t) + v(t)
w(t) = N(0,Q) is the driving process noise
v(t) = N(0,R) is the measurement noise
Reescribiendo el modelo AR como representación de espacio de estado:
¿Alguien puede señalar dónde he hecho mal para que el código funcione? He seguido la mayor parte de la secuencia y la estructura dehttps://github.com/cswetenham/pmr/blob/master/toolboxes/lds/lds.m#L110
(1) La ecuación (26) necesita un valor inicial de $ x0 $. Yo suministréx0 = mean(x,2)
a la funciónPredict
. Tengo una duda en esto. Seráx0
y por lo tantoinitx
ser el medio de la observacióny
lo que da un escalar o serán 2 valores (2 filas por 1 columna) ya que el espacio de estado es AR (2). No estoy seguro de esto.
(2) si tomox0 = mean(x,2)
y comentar el Código después de que Kalman Filtering dé los resultados adecuados para la estimación del estado. Es solo a partir del suavizado que la estimación del parámetro no es correcta. No es correcto porque el nuevox0 = initx = x1sum/N
se convierte en un escalar mientras que al inicializar era una matriz de 2 por 1, donde cada fila es el estado.
%%% Matlab script to simulate data and process usiung Kalman for the state
%%% estimation of AR(2) time series.: y(t) = 0.195 *y(t-1) - 0.95*y(t-2) + excite_input(t);
% Linear system representation
% x_n+1 = A x_n + Bw_n
% y_n = Cx_n + v_n
% w = N(0,Q); v = N(0,R)
clc
clear all
T = 100;
order = 2;
a1 = 0.195;
a2 = -0.95;
A = [ a1 a2;
1 0 ];
C = [ 1 0 ];
B = [1;
0];
x =[ rand(order,1) zeros(order,T-1)];% a sequence of 100 2-d vectors
sigma_2_w =1;
sigma_2_v = 0.01;
Q=eye(order);
P=Q;
%Simulate the steady state covariance matrix P
%P = A*P*A' + B*sqrt(sigma_2_w)*B';
P = dlyap(A,B*B');
%Simulate AR model time series, x;
sqrtW=sqrtm(sigma_2_w);
excite_input=B*sqrtW*randn(1,T);
for t = 1:T-1
x(:,t+1) = A*x(:,t) + excite_input(t+1);
end
%noisy observation
y = C*x + sqrt(sigma_2_v)*randn(1,T);
R = sigma_2_v ;
z = y;
%X= x';
x0=mean(x,2);
YHAT = zeros(1,T);
XHAT = zeros(2,T+1);
LL=[];
converged = 0;
previous_loglik = -Inf;
Y =y;
z = Y;
N = T;
max_iter = 500;
num_iter = 0;
initx = x0;
% V1 = var(initx);
loglik = 0;
V1 = P;
while ~converged & (num_iter <= max_iter)
initx = x0;
k = length(initx);
I=eye(k);
xtt=zeros(2,T); Vtt=zeros(2,2,T); xtt1=zeros(2,T); Vtt1=zeros(2,2,T); xhat_s = zeros(2,T);
xtT=zeros(2,T); VtT=zeros(2,2,T); J=zeros(2,2,T); Vtt1T=zeros(2,2,T); Ptsum = 0;
x1sum = 0;
P1sum = 0;
A1=zeros(k);
A2=zeros(k);
XPred = zeros(2,T);
Ptsum=zeros(k);
xhat = zeros(2,1);
Pcov = zeros(k,k);
Kcur = 0;
YX = 0;
%KAlman Filtering
for i =1:T
[xpred, Ppred] = predict(x0,V1, A, Q);
[nu, S] = innovation(xpred, Ppred, z(i), C, R);
[xnew, P, yhat, KalmanGain] = innovation_update_LDS(A, xpred, Ppred, V1, nu, S, C);
YHAT(i) = yhat;
Phat(i) = sqrt(C*P*C');
xtt(:,i) = xnew; %xtt is the filtered state
Vtt(:,:,i) = P; %filtered covariance
Vtt1(:,:,i) = Ppred;
XPred(:,i) = A*xtt(:,i);
end
KC = KalmanGain*C;
%
% %Kalman Smoothing
%
%
KT = KalmanGain;
% %backward pass gets you E[x(t)|y(1:T)] from E[x(t)|y(1:t)]
t=T;
xtT(:,t) = xtt(:,t);
VtT(:,:,t) = Vtt(:,:,t);
% %SMOOTHING
for t=(T-1):-1:1
Vtt1(:,:,t) = A*Vtt(:,:,t)*A' + Q;
J(:,:,t) = Vtt(:,:,t)*A'*inv(Vtt1(:,:,t+1)); %Eq(31)
xtT(:,t) = xtt(:,t) + ((xtT(:,t+1)- XPred(:,t))'*J(:,:,t))'; % Eq(32) xsmooth modified the transpose
VtT(:,:,t) = Vtt(:,:,t) + J(:,:,t)*(VtT(:,:,t+1)-Vtt1(:,:,t+1))*J(:,:,t)'; % Eq(33) Vsmooth or Psmooth
Pt=VtT(:,:,t) + xtT(:,t)*xtT(:,t)';
Ptsum=Ptsum+Pt;
YX = YX+Y(:,t)'*xtT(:,t); %For Eq(14)
x1sum = x1sum + xtT(:,1);
% gama2 = gama2 + Pt - xtT(:,1)*xtT(:,1)' - VtT(:,:,1);
end
% Pt = VtT + xtT'*xtT;
% Pt = VtT(:,:,t) + xtT(:,t)'*xtT(:,t); %P_t,t-1 = V_t,t-1^T + x_t^T * x_t^T'
Sum_Pt_2T= Ptsum - Pt; %A3 gama2
A2= Ptsum + A2; %gama1
xhat_s = xtT; %smoothed estimate of x(t)
t= T;
Pcov=(eye(2)-KC)*A*Vtt(:,:,t-1);
A1=A1+Pcov+xtT(:,t)'*xtT(:,t-1);
for t= (T-1):-1:2
Pcov =(Vtt(:,:,t) + J(:,:,t)*(Pcov - A*Vtt(:,:,t)))*J(:,:,t-1)'; %Eq(34)
A1 = A1+Pcov+xtT(:,t)'*xtT(:,t-1);
end;
Rterm = (Y - C*xtt);
R_result = 0.5*Rterm' * inv(R)* Rterm;
R_sum_result = sum(sum(R_result));
Qterm = xtt(:,2:T)-(A*xtt(:,1:(T-1)));
Q_result = 0.5*Qterm' * inv(Q) * Qterm;
Q_sum_result = sum(sum(Q_result));
V1term = (xtt(:,1) -initx);
V1_result = 0.5 * V1term' * inv(V1) * V1term;
loglik_t = - R_sum_result - 0.5*T*log(det(R)) - Q_sum_result - 0.5*(T-1)*log(det(Q)) - V1_result - 0.5*log(det(V1)) - 0.5*T*log(2*pi);
%STEP 2 Re-estimate B,Q,R,initx,initV1 via ML given x(t) estimate
C=YX'*inv(Ptsum)/N;
A=A1*inv(A2);
R1term = sum(Y.*Y)'/(T);
R2term = diag(C*YX)/T;
R = R1term - R2term; % R = (1/T)*sum(Y.*Y - C.*xhat_s.*Y');
Q=(1/(T-1))*diag(diag((Sum_Pt_2T-A*(A1'))));
initx = x1sum/N;
x0 = initx;
V1 = Pt(:,:,1) - initx*initx';
LL=[LL loglik_t];
num_iter = num_iter+1
converged = em_converged(loglik, previous_loglik); %subfunction below
previous_loglik = loglik_t;
end %while not converged
A
C
Q
R
function [xpred, Ppred] = predict(x0,P, A, Q)
xpred = A*x0;
Ppred = A*P*A' + Q;
end
function [nu, S] = innovation(xpred, Ppred, y, C, R)
nu = y - C*xpred; %% innovation
S = R + C*Ppred*C'; %% innovation covariance
end
function [xnew, Pnew, yhat, K] = innovation_update_LDS(A,xpred, Ppred,V1, nu, S, C)
% invP=inv(S);
% K = Ppred*C'*invP; %% Kalman gain
% xnew = xpred + K*nu; %% new state
% Pnew = Ppred - Ppred*K*C; %% new covariance
% yhat = C*xnew; % Observed value at time step i, assuming inferred state xnew
% xhat = A*xnew + K*nu;
K = Ppred*C'*inv(S); %% Kalman gain 2 rows 1 col (scalar
xnew = xpred + K*nu; %% new state
Pnew = Ppred - Ppred*K*C; %% new covariance
yhat = C*xnew;
VVnew = (eye(2) - K*C)*A*V1;
end
function converged = em_converged(loglik, previous_loglik, threshold)
% EM_CONVERGED Has EM converged?
% [converged, decrease] = em_converged(loglik, previous_loglik, threshold)
%
% We have converged if
% |f(t) - f(t-1)| / avg < threshold,
% where avg = (|f(t)| + |f(t-1)|)/2 and f is log lik.
% threshold defaults to 1e-4.
% This stopping criterion is from Numerical Recipes in C p42
if nargin < 3
threshold = 1e-4;
end
%log likelihood should increase
if loglik - previous_loglik < -1e-3 % allow for a little imprecision
fprintf(1, '******likelihood decreased from %6.4f to %6.4f!\n', previous_loglik,loglik);
end
delta_loglik = abs(loglik - previous_loglik);
avg_loglik = (abs(loglik) + abs(previous_loglik) + eps)/2;
if (delta_loglik / avg_loglik) < threshold
converged = 1
else converged = 0
end