R code for bootstrapped confidence intervals for means and medians from a skewed sample
The above codes generates the results from the exploratory data analysis grad talk and shows the undue influence of outliers in computing means from skewed data.
y <- c(0,0,0,4,4,5,5,5,6,6,6,6,6,6,6) nsamp <- 1000 b <- rep(0,nsamp) c <- rep(0,nsamp) for (j in 1:nsamp) { x <- 1:15 yb <- sample(x, size=15, replace=T) y2 <- y[yb] b[j] <- median(y2) c[j] <- mean(y2) } % Estimate of the mean of the median md <- median(b) mn <- median(c) % 95% CI % make sure nsamp is a multiple of 20 bs <- sort(b) ind <- trunc(0.05*nsamp) bs[ind] ind <- trunc(0.95*nsamp) bs[ind] cs <- sort(c) ind <- trunc(0.05*nsamp) cs[ind] ind <- trunc(0.95*nsamp) cs[ind]