at least one couple (id-time) has NA in at least one index dimension in resulting pdata.frame
to find out which, use e.g. table(index(your_pdataframe), useNA = "ifany")
Iot obtain a balanced panel dataset, districts with missing cells were excluded. Using balance.type="shared"
in make.pbalanced(...)
resulted in an empty set.
df[order(df$aasta),] %>% ggplot() +
geom_point(aes(evote, turnout, color=ONIMI), show.legend = F, size=.5, alpha=.5) +
geom_line(aes(evote, turnout, color=ONIMI, group=ONIMI), show.legend = F, size=.5, alpha=.3) +
theme_bw()
f = as.formula(turnout ~ evote + competition + candidates + log(valijad_nimekirjas) + aasta)
f
turnout ~ evote + competition + candidates + log(valijad_nimekirjas) +
aasta
Year dummies were included iot avoid bias due to low evote diffusion in 2005.
pool = plm(f, data=df, model="pooling")
bw = plm(f, data=df, model="between")
fe = plm(f, data=df, model="within")
re = plm(f, data=df, model="random")
stargazer(pool,bw,fe,re,
column.labels=c("Pooled OLS",
"Between effects",
"Fixed effects",
"Random effects"),
star.cutoffs=c(0.05, 0.01, 0.001),
omit = c("Constant"),
type="html")
Dependent variable: | ||||
turnout | ||||
Pooled OLS | Between effects | Fixed effects | Random effects | |
(1) | (2) | (3) | (4) | |
evote | 0.015 | -0.562** | 0.262*** | 0.245*** |
(0.067) | (0.197) | (0.054) | (0.051) | |
competition | 0.046*** | 0.036 | 0.036*** | 0.039*** |
(0.010) | (0.029) | (0.007) | (0.007) | |
candidates | 0.016*** | 0.035*** | 0.007*** | 0.009*** |
(0.002) | (0.007) | (0.002) | (0.002) | |
log(valijad_nimekirjas) | -0.052*** | -0.070*** | -0.019 | -0.042*** |
(0.003) | (0.008) | (0.010) | (0.004) | |
aasta2002 | -0.002 | 0.003 | 0.002 | |
(0.008) | (0.005) | (0.005) | ||
aasta2005 | -0.025*** | -0.030*** | -0.029*** | |
(0.007) | (0.005) | (0.005) | ||
aasta2009 | 0.046*** | 0.012 | 0.016 | |
(0.012) | (0.009) | (0.008) | ||
aasta2013 | 0.022 | -0.027* | -0.023* | |
(0.016) | (0.012) | (0.012) | ||
Observations | 1,035 | 207 | 1,035 | 1,035 |
R2 | 0.308 | 0.363 | 0.348 | 0.327 |
Adjusted R2 | 0.302 | 0.350 | 0.177 | 0.322 |
F Statistic | 57.027*** (df = 8; 1026) | 28.768*** (df = 4; 202) | 54.641*** (df = 8; 820) | 62.313*** (df = 8; 1026) |
Note: | p<0.05; p<0.01; p<0.001 |
pFtest(fe, pool)
F test for individual effects
data: f
F = 9.503, df1 = 206, df2 = 820, p-value < 2.2e-16
alternative hypothesis: significant effects
FEs are different from zero. FE should be preferred.
phtest(fe, re)
Hausman Test
data: f
chisq = 91.261, df = 8, p-value = 2.578e-16
alternative hypothesis: one model is inconsistent
FE should be preferred.
bptest(f, data=df, studentize=F)
Breusch-Pagan test
data: f
BP = 60.484, df = 8, p-value = 3.745e-10
pcdtest(fe, test="cd")
Pesaran CD test for cross-sectional dependence in panels
data: turnout ~ evote + competition + candidates + log(valijad_nimekirjas) + aasta
z = -0.27983, p-value = 0.7796
alternative hypothesis: cross-sectional dependence
Heteroscedasticity is present.
coeftest(fe, vcovHC(fe, method="arellano", cluster="group"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
evote 0.2617667 0.0670964 3.9014 0.0001035 ***
competition 0.0358209 0.0087677 4.0855 4.829e-05 ***
candidates 0.0074265 0.0013622 5.4520 6.594e-08 ***
log(valijad_nimekirjas) -0.0189745 0.0136921 -1.3858 0.1661861
aasta2002 0.0031496 0.0048959 0.6433 0.5201916
aasta2005 -0.0303476 0.0050307 -6.0325 2.441e-09 ***
aasta2009 0.0124613 0.0107182 1.1626 0.2453184
aasta2013 -0.0266680 0.0152492 -1.7488 0.0806976 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
resid.fe = resid(fe)
names(resid.fe) = paste0(df$ONIMI,"-",df$aasta)
fe.res = data.frame(ONIMI = gsub("-*[[:digit:]]", "", names(resid.fe)),
year = gsub("^.*\\-", "", names(resid.fe)),
residuals = resid(fe),
row.names = NULL) %>%
dcast(ONIMI ~ year)
coord.res = coord
coord.res@data = left_join(coord.res@data,fe.res, by="ONIMI")
qtm(shp = coord.res, fill = c("1999", "2002", "2005","2009","2013"), fill.palette ="Blues", ncol = 3)
FE residuals do seem to have spatial correlation.
dists = as.matrix(dist(cbind(as.vector(coordinates(coord)[,1]), as.vector(coordinates(coord)[,2]))))
W = mat2listw(dists, style="W") # row standardised
B = mat2listw(dists, style="B") # basic binary coding
C = mat2listw(dists, style="C") # globally standardised
U = mat2listw(dists, style="U") # equal to C divided by the number of neighbours
minmax = mat2listw(dists, style="minmax")
S = mat2listw(dists, style="S") # variance-stabilizing coding scheme
wm = W
Neighbour based weights did not work for some reason.
bsktest(f, df, listw = wm, test = "LM1")
Baltagi, Song and Koh SLM1 marginal test
data: turnout ~ evote + competition + candidates + log(valijad_nimekirjas) + aasta
LM1 = 27.267, p-value < 2.2e-16
alternative hypothesis: Random effects
Random regional effects are present.
bsktest.mod = function (formula, data, index = NULL, listw, standardize=F, ...)
{
if (!is.null(index)) {
data <- plm.data(data, index)
}
index <- data[, 1]
tindex <- data[, 2]
x <- model.matrix(formula, data = data)
y <- model.response(model.frame(formula, data = data))
cl <- match.call()
names(index) <- row.names(data)
ind <- index[which(names(index) %in% row.names(x))]
tind <- tindex[which(names(index) %in% row.names(x))]
oo <- order(tind, ind)
x <- x[oo, ]
y <- y[oo]
ind <- ind[oo]
tind <- tind[oo]
N <- length(unique(ind))
k <- dim(x)[[2]]
time <- max(tapply(x[, 1], ind, length))
NT <- length(ind)
ols <- lm(y ~ x - 1)
XpXi <- solve(crossprod(x))
n <- dim(ols$model)[1]
indic <- seq(1, time)
inde <- as.numeric(rep(indic, each = N))
ind1 <- seq(1, N)
inde1 <- as.numeric(rep(ind1, time))
bOLS <- coefficients(ols)
e <- as.matrix(residuals(ols))
ee <- crossprod(e)
Ws <- listw2dgCMatrix(listw)
Wst <- t(Ws)
WWp <- (Ws + Wst)/2
yy <- function(q) {
wq <- WWp %*% q
wq <- as.matrix(wq)
}
IWWpe <- unlist(tapply(e, inde, yy))
H <- crossprod(e, IWWpe)/crossprod(e)
W2 <- Ws %*% Ws
WW <- crossprod(Ws)
tr <- function(R) sum(diag(R))
b <- tr(W2 + WW)
LM2 <- sqrt((N^2 * time)/b) * as.numeric(H)
s <- NT - k
lag <- function(QQ) lag.listw(listw, QQ)
fun2 <- function(Q) unlist(tapply(Q, inde, lag))
Wx <- apply(x, 2, fun2)
WX <- matrix(Wx, NT, k)
XpWx <- crossprod(x, WX)
D2M <- XpWx %*% XpXi
Ed2 <- (time * sum(diag(Ws)) - tr(D2M))/s
WWx <- apply(WX, 2, fun2)
WWX <- matrix(WWx, NT, k)
XpWWX <- crossprod(x, WWX)
spb <- XpWWX %*% XpXi
spbb <- tr(spb)
tpb <- XpWx %*% XpXi %*% XpWx %*% XpXi
fintr2 <- time * tr(W2) - 2 * spbb + tr(tpb)
Vd2 <- 2 * (s * fintr2 - (sum(diag(D2M))^2))/(s^2 * (s +
2))
We <- unlist(tapply(e, inde, function(W) lag.listw(listw,
W)))
d2 <- crossprod(e, We)/ee
SLM2 <- (d2 - Ed2)/sqrt(Vd2)
statistics <- if (standardize)
abs(SLM2)
else abs(LM2)
pval <- 2 * pnorm(statistics, lower.tail = FALSE)
names(statistics) <- if (standardize)
"SLM2"
else "LM2"
method <- "Baltagi, Song and Koh LM2 marginal test"
dname <- deparse(formula)
RVAL <- list(statistic = statistics, method = method, p.value = pval,
data.name = deparse(formula), alternative = "Spatial autocorrelation")
class(RVAL) <- "htest"
return(RVAL)
}
bsktest.mod(f, df, listw = wm, test="LM2")
Baltagi, Song and Koh LM2 marginal test
data: turnout ~ evote + competition + candidates + log(valijad_nimekirjas) + aasta
LM2 = 1.7307, p-value = 0.08351
alternative hypothesis: Spatial autocorrelation
Some spatial autocorrelation is detected.
sphtest(f, data = df, listw = wm, spatial.model = "error", method = "ML")
Hausman test for spatial models
data: x
chisq = 1.2233, df = 8, p-value = 0.9964
alternative hypothesis: one model is inconsistent
RE can be used.
RE = spml(f, df, listw=wm, model="random", lag=T, spatial.error="b")
summary(RE)
ML panel with spatial lag, random effects, spatial error correlation
Call:
spreml(formula = formula, data = data, index = index, w = listw2mat(listw),
w2 = listw2mat(listw2), lag = lag, errors = errors, cl = cl)
Residuals:
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.666 -0.518 -0.469 -0.463 -0.420 -0.153
Error variance parameters:
Estimate Std. Error t-value Pr(>|t|)
phi 1.76861 0.21941 8.0608 7.579e-16 ***
rho -0.69059 0.55229 -1.2504 0.2112
Spatial autoregressive coefficient:
Estimate Std. Error t-value Pr(>|t|)
lambda -0.81586 0.47933 -1.7021 0.08874 .
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
(Intercept) 1.2613525 0.0314974 40.0462 < 2.2e-16 ***
evote 0.2503306 0.0504739 4.9596 7.064e-07 ***
competition 0.0383488 0.0071605 5.3556 8.529e-08 ***
candidates 0.0085610 0.0014877 5.7547 8.680e-09 ***
log(valijad_nimekirjas) -0.0410705 0.0043497 -9.4422 < 2.2e-16 ***
aasta2002 0.0105839 0.0027759 3.8127 0.0001375 ***
aasta2005 -0.0508461 0.0027576 -18.4386 < 2.2e-16 ***
aasta2009 0.0581982 0.0075353 7.7234 1.133e-14 ***
aasta2013 0.0015637 0.0107877 0.1449 0.8847517
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
GM.RE = spgm(f, df,
listw = wm,
model = "random",
moments = "fullweights",
lag=T, spatial.error = T)
GM.FE = spgm(f, df,
listw = wm,
model = "within",
moments = "fullweights",
lag=T, spatial.error = T)
sphtest(GM.RE, GM.FE,mehod="GM")
Hausman test for spatial models
data: f
chisq = 36.346, df = 9, p-value = 3.442e-05
alternative hypothesis: one model is inconsistent
Hausman suggests FE.
summary(GM.FE)
Spatial panel fixed effects GM model
Call:
spgm(formula = f, data = df, listw = wm, model = "within", lag = T,
spatial.error = T, moments = "fullweights")
Residuals:
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.74 1.89 1.94 1.94 1.98 2.28
Estimated spatial coefficient, variance components and theta:
Estimate
rho -0.0018148
sigma^2_v 0.0020565
Spatial autoregressive coefficient:
Estimate Std. Error t-value Pr(>|t|)
lambda -2.32685 0.90529 -2.5703 0.01016 *
Coefficients:
Estimate Std. Error t-value Pr(>|t|)
evote 0.2615731 0.0529095 4.9438 7.662e-07 ***
competition 0.0350221 0.0073358 4.7741 1.805e-06 ***
candidates 0.0072969 0.0015145 4.8182 1.449e-06 ***
log(valijad_nimekirjas) -0.0192527 0.0097063 -1.9835 0.0473096 *
aasta2002 0.0251636 0.0096882 2.5973 0.0093949 **
aasta2005 -0.0932506 0.0248883 -3.7468 0.0001791 ***
aasta2009 0.1354186 0.0485911 2.7869 0.0053215 **
aasta2013 0.0458654 0.0305659 1.5005 0.1334741
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1