Analysis of Financial Time Series in Stan - Chapter 3 - GARCHs
In this series we are trying to reproduce the models and examples listed in the book “Analysis of Financial Time Series, 3rd Edition”, by Ruey S. Tsay, using Stan https://mc-stan.org/ and the package RStan https://cran.r-project.org/web/packages/rstan/index.html. The main repository for the presented models and data can be found at https://github.com/marcomarconi/AFTS_with_Stan.
Most of the models listed in chapter 3 were already translated into Stan as part of the official documentation and can be found at https://mc-stan.org/docs/2_26/stan-users-guide/time-series-chapter.html. Still, we will go through them in the current document.
{
require(tidyverse)
require(rstan)
require(bayesplot)
require(loo)
require(LaplacesDemon)
library(xts)
library(quantmod)
library(FKF)
library(MARSS)
library(forecast)
rstan_options(auto_write = TRUE)
rstan_options(javascript = FALSE)
theme_set(theme_classic(base_size = 24))
}
The GARCH Model
The typical GARCH(1,1) model can be coded in Stan as described in https://mc-stan.org/docs/2_26/stan-users-guide/modeling-temporal-heteroscedasticity.html.
We are going to fit this model and its extension AR(3)-GARCH(1) to the S&P500 data:
sp500 <- scan("data/sp500.dat.txt")
plot.ts(sp500)
Acf(sp500)
Pacf(sp500)
A GARCH(K,K) model can be coded in Stan as follow:
data {
int<lower=0> N;
vector[N] y;
int K;
real sigma1;
}
parameters {
real mu;
real<lower=0> alpha0;
real<lower=0,upper=1> alpha1;
real<lower=0,upper=1> beta1;
}
transformed parameters {
real<lower=0> sigma[N];
for(i in 1:K)
sigma[i] = sigma1;
for (t in (K+1):N)
sigma[t] = sqrt(alpha0
+ alpha1 * pow(y[t-K] - mu, 2)
+ beta1 * pow(sigma[t-1], 2));
}
model {
y ~ normal(mu, sigma);
}
generated quantities {
vector[N] log_lik;
{
for (t in 1:N)
log_lik[t] = normal_lpdf(y[t] | mu, sigma[t]);
}
}
While a AR(K)-GARCH(1,1) model can be coded in Stan as follow:
data {
int<lower=0> N;
vector[N] y;
int K;
}
parameters {
real<lower=0> alpha0;
real<lower=0,upper=1> alpha1;
real<lower=0,upper=(1-alpha1)> beta1;
real<lower=0> sigma1;
real ar0;
vector[K] ar;
}
transformed parameters {
real<lower=0> sigma[N];
sigma[1] = sigma1;
for (t in 2:N)
sigma[t] = sqrt(alpha0
+ alpha1 * pow(y[t-1] - ar0, 2)
+ beta1 * pow(sigma[t-1], 2));
}
model {
alpha0 ~ normal(0, 1);
alpha1 ~ normal(0, 1);
beta1 ~ normal(0, 1);
sigma1 ~ cauchy(0, 1);
ar0 ~ normal(0, 1);
ar ~ normal(0, 1);
for (n in (K+1):N) {
real mu = ar0;
for (k in 1:K)
mu += ar[k] * y[n-k];
y[n] ~ normal(mu, sigma[n]);
}
}
generated quantities {
vector[N] log_lik;
{
real log_lik_mu = 0;
for (t in 1:K)
log_lik[t] = normal_lpdf(y[t] | ar0 / (1 - sum(ar)), sigma[t]);
for (t in (K+1):N) {
real mu = ar0;
for (k in 1:K)
mu += ar[k] * y[t-k];
log_lik[t] = normal_lpdf(y[t] | mu, sigma[t]);
log_lik_mu += log_lik[t];
}
log_lik_mu /= N-(K+1);
for (t in 1:K)
log_lik[t] = log_lik_mu;
}
}
Let’s fit both models to the data and check the estimated parameters:
GARCH <- stan_model("models/Chapter_3/GARCH.stan")
ARGARCH <- stan_model("models/Chapter_3/AR-GARCH.stan")
fit_garch_sp500 <- sampling(GARCH, data = list(N = length(sp500), y = sp500, K = 1, sigma1 = sd(sp500)), chains = 4, refresh=0)
fit_argarch_sp500 <- sampling(ARGARCH, data = list(N = length(sp500), y = sp500, K = 3), chains = 4, refresh=0)
print(fit_garch_sp500, pars=names(fit_garch_sp500)[1:4], digits=5)
## Inference for Stan model: GARCH.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
## mu 0.00755 0.00002 0.00155 0.00456 0.00646 0.00755 0.00861 0.01053 4505
## alpha0 0.00009 0.00000 0.00003 0.00004 0.00007 0.00009 0.00011 0.00016 1383
## alpha1 0.13252 0.00063 0.02372 0.09164 0.11563 0.13085 0.14701 0.18334 1421
## beta1 0.84285 0.00063 0.02286 0.79597 0.82756 0.84453 0.85883 0.88338 1322
## Rhat
## mu 0.99945
## alpha0 1.00294
## alpha1 1.00176
## beta1 1.00410
##
## Samples were drawn using NUTS(diag_e) at Sat May 15 20:58:05 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
print(fit_argarch_sp500, pars=names(fit_argarch_sp500)[1:8], digits=5)
## Inference for Stan model: AR-GARCH.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5%
## alpha0 0.00009 0.00000 0.00003 0.00004 0.00007 0.00009 0.00011 0.00016
## alpha1 0.12492 0.00037 0.02160 0.08558 0.10964 0.12380 0.13899 0.17020
## beta1 0.84788 0.00039 0.02264 0.80091 0.83302 0.84872 0.86371 0.88960
## sigma1 0.02450 0.00032 0.02017 0.00080 0.00950 0.02020 0.03455 0.07304
## ar0 0.00766 0.00002 0.00166 0.00441 0.00656 0.00763 0.00877 0.01094
## ar[1] 0.02007 0.00068 0.04114 -0.06291 -0.00784 0.02073 0.04806 0.09819
## ar[2] -0.00903 0.00064 0.03860 -0.08555 -0.03404 -0.00942 0.01693 0.06701
## ar[3] -0.01074 0.00060 0.03944 -0.08747 -0.03672 -0.00984 0.01516 0.06690
## n_eff Rhat
## alpha0 2577 1.00053
## alpha1 3331 0.99982
## beta1 3419 0.99962
## sigma1 3913 1.00077
## ar0 5062 0.99945
## ar[1] 3635 1.00045
## ar[2] 3691 1.00018
## ar[3] 4349 0.99959
##
## Samples were drawn using NUTS(diag_e) at Sat May 15 20:59:00 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
Both models produce estimations that are in close agreement with the ones shown in the book.
In the book, the simpler GARCH(1,1) is preferred over the AR(3)-GARCH(1,1) because the AR coefficient of the latter are close to zero and deemed not significant. Let’s see what we get by comparing the two models with loo:
a <- extract_log_lik(fit_garch_sp500, merge_chains = FALSE)[,,4:(length(sp500))]
b <- extract_log_lik(fit_argarch_sp500, merge_chains = FALSE)[,,4:(length(sp500))]
r_eff_a <- relative_eff(exp(a))
r_eff_b <- relative_eff(exp(b))
loo_compare(loo(a, r_eff = r_eff_a, moment_match = TRUE), loo(b, r_eff = r_eff_b, moment_match = TRUE))
## Warning: Some Pareto k diagnostic values are slightly high. See help('pareto-k-diagnostic') for details.
## elpd_diff se_diff
## model1 0.0 0.0
## model2 -0.9 0.9
We can see that there is little difference between the two models, and therefore the simpler model should be preferred.
We can finally have a look at the estimated volatility from the GARCH(1,1) model:
extract(fit_garch_sp500, pars="sigma")[[1]] %>% colMeans() %>% plot(type="l")
The book goes on by showing alternative extensions of the GARCH model, namely: I-GARCH, GARCH-M, E-GARCH, T-GARCH and CHARMA. We are reviewing them in detail, but you can find the Stan code in the github folder.
Example 3.4
The example addresses the question on whether the daily volatility of a stock is lower in the summer and, if so, by how much. We will the IBM data:
ibm_sp500 <- read.table("data/m-ibmsplnsu.dat", header=T)
plot.ts(ibm_sp500$ibm)
Let’s fit a simple AR(1)-GARCH(1,1) model first, we will use it later:
fit_argarch_ibm2 <- sampling(ARGARCH, data = list(N = length(ibm_sp500$ibm), y = ibm_sp500$ibm, K = 1), chains = 1, refresh=0)
To study the summer effect on stock volatility of an asset, we define an indicator variable “u” which takes the value 1 if the month is June, July or August, and 0 otherwise. We will the mode described in equation (3.45) in the book which imposes the constraint that the constant term of the volatility equation is zero for the summer months:
\[ \sigma^2_t = \alpha_1\alpha^2_{t-1} + \beta_1\sigma^2_{t-1} + \gamma(1 - u_t) \]
Here is the code in Stan:
data {
int<lower=0> N;
real y[N];
int u[N];
int K;
}
parameters {
real<lower=0,upper=1> alpha1;
real<lower=0,upper=1> beta1;
real<lower=0> sigma1;
real gamma;
real ar0;
vector[K] ar;
}
transformed parameters {
real<lower=0> sigma[N];
sigma[1] = sigma1;
for (t in 2:N)
sigma[t] = sqrt(
alpha1 * pow(y[t-1] - ar0, 2)
+ beta1 * pow(sigma[t-1], 2)
+ gamma * (1 - u[t])
);
}
model {
alpha1 ~ normal(0, 1);
beta1 ~ normal(0, 1);
sigma1 ~ cauchy(0, 1);
ar0 ~ normal(0, 1);
ar ~ normal(0, 1);
for (n in (K+1):N) {
real mu = ar0;
for (k in 1:K)
mu += ar[k] * y[n-k];
y[n] ~ normal(mu, sigma[n]);
}
}
generated quantities {
vector[N] log_lik;
{
real log_lik_mu = 0;
for (t in 1:K)
log_lik[t] = normal_lpdf(y[t] | ar0 / (1 - sum(ar)), sigma[t]);
for (t in (K+1):N) {
real mu = ar0;
for (k in 1:K)
mu += ar[k] * y[t-k];
log_lik[t] = normal_lpdf(y[t] | mu, sigma[t]);
log_lik_mu += log_lik[t];
}
log_lik_mu /= N-(K+1);
for (t in 1:K)
log_lik[t] = log_lik_mu;
}
}
Let’s fit it to the data:
ARGARCH_I <- stan_model("models/Chapter_3/AR-GARCH-I.stan")
fit_argarch_I_ibm2 <- sampling(ARGARCH_I, data = list(N = length(ibm_sp500$ibm), y = ibm_sp500$ibm, u = ibm_sp500$summer, K = 1), chains = 1, refresh=0)
plot(fit_argarch_I_ibm2, pars=names(fit_argarch_I_ibm2)[1:6])
You can clearly see that the gamma parameter is significantly different from zero.
We can compare the last model with the simple AR(1)-GARCH(1,1):
loo_compare(loo(fit_argarch_ibm2, moment_match = TRUE), loo(fit_argarch_I_ibm2, moment_match = TRUE))
## Warning: Some Pareto k diagnostic values are slightly high. See help('pareto-k-diagnostic') for details.
## elpd_diff se_diff
## model2 0.0 0.0
## model1 -2.5 3.8
Example 3.5
The question we ask in this example is whether the past returns of individual components of the index S&P 500 contribute to the modeling of the S&P 500 index volatility in the presence of its own returns. The IBM stock will be used as explanatory variable:
\[ \sigma^2_t = \alpha_0 + \alpha_1\alpha^2_{t-1} + \alpha_2\alpha^2_{t-2} + \beta_1\sigma^2_{t-1} + \gamma(x_{t-1} - \bar{x})^2 \]
Let’s have a look at the Stan code:
data {
int<lower=0> N;
real y[N];
real x[N];
int K;
}
transformed data {
vector[N] x_std;
real m = mean(x);
real stdev = sd(x);
x_std = (to_vector(x) - m) / stdev; // dividing by stdev avoids divergencies
}
parameters {
real mu;
real<lower=0> sigma1;
real<lower=0> alpha0;
real<lower=0,upper=1> alpha1;
real<lower=0,upper=1> beta1;
real gamma;
}
transformed parameters {
real<lower=0> sigma[N];
for(i in 1:K)
sigma[i] = sigma1;
for (t in (K+1):N)
sigma[t] = sqrt(
alpha0
+ alpha1 * pow(y[t-K] - mu, 2)
+ beta1 * pow(sigma[t-1], 2)
+ gamma * pow(x_std[t-1], 2)
) ;
}
model {
mu ~ normal(0, 1);
sigma1 ~ cauchy(0, 1);
gamma ~ normal(0, 0.01);
y ~ normal(mu, sigma);
}
generated quantities {
vector[N] log_lik;
for (t in 1:N)
log_lik[t] = normal_lpdf(y[t] | mu, sigma[t]);
}
and fit it to the data, together with a simple GARCH(2,1) model:
GARCH <- stan_model("models/Chapter_3/GARCH.stan")
fit_garch_sp500 <- sampling(GARCH , data=list(N = length(ibm_sp500$sp), y = ibm_sp500$sp, K = 2, sigma1=sd(ibm_sp500$sp)), chains = 4, refresh=0)
GARCH_x <- stan_model("models/Chapter_3/GARCH_x.stan")
fit_garch_x_sp500 <- sampling(GARCH_x , data=list(N = length(ibm_sp500$sp), y = ibm_sp500$sp, x=ibm_sp500$ibm, K = 2), chains = 4, refresh=0)
Contrary to the book (which estimate a gamma = 0.007±0.0039) the gamma parameters is no different from zero and the two models performs pretty similar:
print(fit_garch_x_sp500, pars=names(fit_garch_x_sp500)[1:6])
## Inference for Stan model: GARCH_x.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## mu 0.57 0.00 0.14 0.30 0.47 0.57 0.66 0.84 3333 1
## sigma1 3.92 0.02 1.16 2.21 3.08 3.73 4.56 6.65 3191 1
## alpha0 0.82 0.01 0.30 0.32 0.61 0.79 1.00 1.48 3162 1
## alpha1 0.16 0.00 0.03 0.11 0.14 0.16 0.18 0.22 1901 1
## beta1 0.82 0.00 0.03 0.77 0.81 0.83 0.84 0.87 1628 1
## gamma 0.00 0.00 0.01 -0.02 -0.01 0.00 0.01 0.02 3503 1
##
## Samples were drawn using NUTS(diag_e) at Sat May 15 21:00:26 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
a <- extract_log_lik(fit_garch_sp500, merge_chains = FALSE)[,,4:(nrow(ibm_sp500))]
b <- extract_log_lik(fit_garch_x_sp500, merge_chains = FALSE)[,,4:(nrow(ibm_sp500))]
r_eff_a <- relative_eff(exp(a))
r_eff_b <- relative_eff(exp(b))
loo_compare(loo(a, r_eff = r_eff_a, moment_match = TRUE), loo(b, r_eff = r_eff_b, moment_match = TRUE))
## Warning: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
## elpd_diff se_diff
## model2 0.0 0.0
## model1 -1.2 0.6
Comments
Post a Comment