# # air conditioning data example # # # Generate data # y <- c(3, 5, 7, 18, 43, 85, 91, 98, 100, 130, 230, 487) n <- length(y) # Ex. 1.1 # Is Exponential assumption reasonable? par(mfrow=c(2,1)) # draw EDF and parametric fit plot(ecdf(y), main='Empirical Distribution of Y') curve(pexp(x, 1/mean(y)), add=TRUE, col='red') legend(200, 0.6, c('EDF','Fitted model (Exponential)'), lty=c(1,1), col=c('black','red')) # Q-Q plot of EDF vs parametric fit plot(qexp(ppoints(y), 1/mean(y)), sort(y), xlab='Quantiles of Exponential Distribution', ylab='Quantiles of Y') abline(0,1) # Ex 2.1. Sample mean is 108.0833 mean(y) # Ex 2.3. approximate normal CI # 46.93055 169.23611 c(mean(y)-qnorm(.975)*mean(y)/sqrt(n), mean(y)+qnorm(.975)*mean(y)/sqrt(n)) # Ex 2.5. Simulation vs Theory R <- 500 # Try different R values, like 10, 100 t.stars <- numeric(R) for(r in 1:R){ y.stars <- rexp(n, 1/mean(y)) t.stars[r] <- mean(y.stars) } mean(t.stars)-mean(y) # compare this with 'correct' bias=0 var(t.stars) # compare this with 'correct' var=973.5 ## Repeat the above many times. Do we get consistent results? mean(y)^2/n # 'correct' var (y-bar)^2/n # Ex 2.5. (complete) n.trials <- 7 Rs <-seq(10, 500, length=10) boot.bias <- matrix(NA, nrow=length(Rs),ncol=n.trials ) boot.var <- matrix(NA, nrow=length(Rs),ncol=n.trials ) for(trial in 1:n.trials){ R <- max(Rs) t.stars <- numeric(R) for(r in 1:R){ y.stars <- rexp(n, 1/mean(y)) t.stars[r] <- mean(y.stars) } for(i in 1:length(Rs)){ boot.bias[i, trial] <- mean(t.stars[1:Rs[i]])-mean(y) boot.var[i, trial] <- var(t.stars[1:Rs[i]]) } } par(mfrow=c(1,2)) matplot(Rs, boot.bias, log='x', type='b', pch=1) abline(h=0) matplot(Rs, boot.var, log='x', type='b', pch=1) abline(h=mean(y)^2/n) # Ex 2.6. Normal approximation # vs G_R estimate R <- 999 # Also try R=99 t.stars <- numeric(R) for(r in 1:R){ y.stars <- rexp(n, 1/mean(y)) t.stars[r] <- mean(y.stars) } # Normal and Gamma Q-Q plot par(mfrow=c(1,2)) qqnorm(t.stars) qqline(t.stars) plot(qgamma(ppoints(t.stars), n, n/mean(y)), sort(t.stars), xlab='Exponential model quantiles', ylab='t*') abline(0,1) # Distribution G_R of t* par(mfrow=c(1,1)) hist(t.stars, nclass=20, prob=TRUE, col='gray', border='white') lines(density(t.stars)) # # Ex 2.9. Nonparametric bootstrap # R <- 999 t.stars <- numeric(R) set.seed(101) for(r in 1:R){ y.stars <- sample(y, replace=TRUE) t.stars[r] <- mean(y.stars) } # Exp and Gamma Q-Q plot par(mfrow=c(1,2)) plot(qgamma(ppoints(t.stars), n, n/mean(y)), sort(t.stars), xlab='Exponential model quantiles', ylab='t*') abline(0,1) plot(qgamma(ppoints(t.stars), n*0.71, 0.71*n/mean(y)), sort(t.stars), xlab='Gamma model quantiles', ylab='t*') abline(0,1) # # Simple 95% confidence intervals # # 25.1625 171.3375 alpha <- .025 par(mfrow=c(1,1)) plot(ecdf(t.stars)) abline(h=c(alpha, 1-alpha)) abline(v=quantile(t.stars, prob=c(alpha, 1-alpha))) c(mean(y)-(quantile(t.stars, 1-alpha) - mean(y)), mean(y)-(quantile(t.stars, alpha) - mean(y))) par(mfrow=c(1,1)) hist(t.stars, nclass=50, col='gray') abline(v=c(mean(y)-(quantile(t.stars, 1-alpha) - mean(y)), mean(y)-(quantile(t.stars, alpha) - mean(y))), col='red') abline(v=c(mean(y)-qnorm(.975)*mean(y)/sqrt(n), mean(y)+qnorm(.975)*mean(y)/sqrt(n)), col='blue')