## ----echo=FALSE,eval=TRUE------------------------------------------------
options(continue="  ")

## ------------------------------------------------------------------------
options(digits=3)
options(width=72) # narrow output 
p = c(.1,.2,.3)
x = runif(10000)
mycat1 = numeric(10000)
for (i in 0:length(p)) { 
    mycat1 = mycat1 + (x >= sum(p[0:i])) 
    }
table(mycat1)

## ------------------------------------------------------------------------
mycat2 = cut(runif(10000), c(0, 0.1, 0.3, 0.6, 1))
summary(mycat2)

mycat3 = sample(1:4, 10000, rep=TRUE, prob=c(.1,.2,.3,.4))
table(mycat3)

## ------------------------------------------------------------------------
intercept = -1
beta = 0.5
n = 5000
xtest = rnorm(n, mean=1, sd=1)
linpred = intercept + (xtest * beta)
prob = exp(linpred)/(1 + exp(linpred))
ytest = ifelse(runif(n) < prob, 1, 0)

## ------------------------------------------------------------------------
coef(glm(ytest ~ xtest, family=binomial))

## ------------------------------------------------------------------------
n = 1500; p = 3; sigbsq = 4
beta = c(-2, 1.5, 0.5, -1)
id = rep(1:n, each=p)         # 1 1 ... 1 2 2 ... 2 ... n
x1 = as.numeric(id < (n+1)/2) # 1 1 ... 1 0 0 ... 0
randint = rep(rnorm(n, 0, sqrt(sigbsq)), each=p)
x2 = rep(1:p, n)              # 1 2 ... p 1 2 ... p ...
x3 = runif(p*n)
linpred = beta[1] + beta[2]*x1 + beta[3]*x2 + beta[4]*x3 + randint
expit = exp(linpred)/(1 + exp(linpred))
y = runif(p*n) < expit        # generate a logical as our outcome

## ------------------------------------------------------------------------
library(lme4)
glmmres = glmer(y ~ x1 + x2 + x3 + (1|id), family=binomial(link="logit"))
summary(glmmres)

## ----message=FALSE-------------------------------------------------------
p1 = .15; p2 = .25; corr = 0.4; n = 10000
p1p2 = corr*sqrt(p1*(1-p1)*p2*(1-p2)) + p1*p2
library(Hmisc)
vals = rMultinom(matrix(c(1-p1-p2+p1p2, p1-p1p2, p2-p1p2, p1p2), 
   nrow=1, ncol=4), n)
y1 = rep(0, n); y2 = rep(0, n)    # put zeroes everywhere
y1[vals==2 | vals==4] = 1         # and replace them with ones
y2[vals==3 | vals==4] = 1         # where needed
rm(vals, p1, p2, p1p2, corr, n)   # cleanup

## ------------------------------------------------------------------------
cor(y1, y2)
table(y1)
table(y2)

## ------------------------------------------------------------------------
# generate data from Cox model 
n = 10000
beta1 = 2; beta2 = -1
lambdaT = .002	# baseline hazard
lambdaC = .004  # hazard of censoring
x1 = rnorm(n)   # standard normal
x2 = rnorm(n)
# true event time
T = rweibull(n, shape=1, scale=lambdaT*exp(-beta1*x1-beta2*x2)) 
C = rweibull(n, shape=1, scale=lambdaC)   #censoring time
time = pmin(T,C)  #observed time is min of censored and true
censored = (time==C)   # set to 1 if event is censored
# fit Cox model
library(survival)
survobj = coxph(Surv(time, (1-censored))~ x1 + x2, method="breslow")

## ------------------------------------------------------------------------
table(censored)
print(survobj)
confint(survobj)

## ------------------------------------------------------------------------
alphafun = function(x, y) { 
   return(exp(-y^4+x^4)*(1+abs(y))^3*
      (1+abs(x))^-3) 
} 

## ------------------------------------------------------------------------
burnin=5000; numvals=5000; thin = 10 
xn = numeric(burnin + numvals*thin)
xn[1] = rnorm(1)         # starting value
for (i in 2:(burnin + numvals*thin)) {
  propy = rnorm(1, xn[i-1], 1) 
  alpha = min(1, alphafun(xn[i-1], propy)) 
  xn[i] = sample(c(propy, xn[i-1]), 1, prob=c(alpha, 1-alpha))
} 
sample = xn[5000 + (1:numvals) * 10] 

## ------------------------------------------------------------------------
f = function(x) {
   exp(-x^4)*(1+abs(x))^3
}
integral = integrate(f, 0, Inf)
c = 2 * integral$value; c
pdfeval = function(x) {
   return(1/6.809610784*exp(-x^4)*(1+abs(x))^3)
}

## ----mhfigr,echo=TRUE, eval=TRUE-----------------------------------------
curve(pdfeval, from=-2, to=2, lwd=2, lty=2, type="l", 
   ylab="probability", xlab="Y")
lines(density(sample), lwd=2, lty=1)
legend(-1, .15, legend=c("True", "Simulated"), 
   lty=2:1, lwd=2, cex=1.4, bty="n")

## ------------------------------------------------------------------------
set.seed(42)
numsim = 1000; n=500
lower = numeric(numsim); upper = numeric(numsim)
for (i in 1:numsim) {
   x = rexp(n, rate=1)   # skewed
   testresult = t.test(x, mu=1)
   lower[i] = testresult$conf.int[1] > 1
   upper[i] = testresult$conf.int[2] < 1
}

## ------------------------------------------------------------------------
library(mosaic)
tally(~ lower, format="percent")
tally(~ upper, format="percent")

## ----eval=FALSE----------------------------------------------------------
## runttest = function(x) {return(CI=confint(t.test(x, mu=1)))}
## xmat = matrix(rexp(numsim*n), nrow=n)
## results = apply(xmat, 2, runttest)
## tally(~ results[2,] > 1)
## tally(~ results[3,] < 1)

## ------------------------------------------------------------------------
givedips = function(n) { 
  students = 1:n
  diploma = sample(students, n)
  return(sum(students==diploma))
}
res = replicate(10000, givedips(650))

## ------------------------------------------------------------------------
table(res)
library(mosaic)
favstats(res)

## ------------------------------------------------------------------------
numsim = 10000
doors = 1:3
opendoor = function(x) { 
   # input x is a vector with two values
   # first element is winner, second is choice
   if (x[1]==x[2])    # guess was right
      return(sample(doors[-c(x[1])], 1)) 
   else return(doors[-c(x[1],x[2])])
}

## ------------------------------------------------------------------------
opendoor(c(1, 1))   # can return 2 or 3
opendoor(c(1, 2))   # must return 3!

## ------------------------------------------------------------------------
swapdoor = function(x) { return(doors[-c(x[1], x[2])]) }
swapdoor(c(1,1))
swapdoor(c(1,2))

## ------------------------------------------------------------------------
winner = sample(doors, numsim, replace=TRUE)
choice = sample(doors, numsim, replace=TRUE)
open = apply(cbind(winner, choice), 1, opendoor)
newchoice = apply(cbind(open, choice), 1, swapdoor)

## ------------------------------------------------------------------------
cat("Without switching, won ",
   round(sum(winner==choice)/numsim*100, 1),
      " percent of the time.\n", sep="")
cat("With switching, won ",
   round(sum(winner==newchoice)/numsim*100, 1),
      " percent of the time.\n", sep="")

## ------------------------------------------------------------------------
options(digits=6)
library(mosaic)
rate = 1/3
F1 = antiD((lambda*exp(-lambda*t)) ~ t, lambda=rate) # f(T)
F2 = antiD((t*lambda*exp(-lambda*t)) ~ t, lambda=rate) # E[T]
2*(F1(t=2) - F1(t=0)) + (F2(t=Inf) - F2(t=2))

## ----echo=FALSE----------------------------------------------------------
set.seed(1000)

## ------------------------------------------------------------------------
numsim = 100000
fail = rexp(numsim, rate=rate)
# map all values less than 2 to be 2
fail[fail<2] = 2  # or mean(pmax(2, fail))
confint(t.test(~ fail))