R code for bootstrapped confidence intervals for means and medians from a skewed sample
The above codes generates the results from the exploratory data analysis grad talk and shows the undue influence of outliers in computing means from skewed data.
y <- c(0,0,0,4,4,5,5,5,6,6,6,6,6,6,6)
nsamp <- 1000
b <- rep(0,nsamp)
c <- rep(0,nsamp)
for (j in 1:nsamp) {
x <- 1:15
yb <- sample(x, size=15, replace=T)
y2 <- y[yb]
b[j] <- median(y2)
c[j] <- mean(y2)
}
% Estimate of the mean of the median
md <- median(b)
mn <- median(c)
% 95% CI
% make sure nsamp is a multiple of 20
bs <- sort(b)
ind <- trunc(0.05*nsamp)
bs[ind]
ind <- trunc(0.95*nsamp)
bs[ind]
cs <- sort(c)
ind <- trunc(0.05*nsamp)
cs[ind]
ind <- trunc(0.95*nsamp)
cs[ind]