# Statistical Rethinking Chapter 3 Exercises

knitr::opts_chunk\$set(echo = TRUE)
library(rethinking)


## Preliminary Code

p_grid <- seq(from=0, to=1, length.out=1000)
prior <- rep(1, 1000)
likelihood <- dbinom(6, size=9, prob=p_grid)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
plot(p_grid, posterior)


samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)


## 3E1.

sum(samples < 0.2) / length(samples)

## [1] 5e-04


## 3E2.

sum(samples > 0.8) / length(samples)

## [1] 0.1181


## 3E4.

quantile(samples, 0.2)

##       20%
## 0.5155155


## 3E5.

quantile(samples, 0.8)

##       80%
## 0.7597598


## 3E6.

HPDI(samples, prob=0.66)

##     |0.66     0.66|
## 0.5095095 0.7807808


## 3E7.

PI(samples, prob=0.66)

##       17%       83%
## 0.4984985 0.7737738


## 3M1.

p_grid <- seq(from=0, to=1, length.out=1e3)
prior <- rep(1, length(p_grid))
likelihood <- dbinom(8, 15, prob=p_grid)
posterior <- likelihood * prior
plot(p_grid, posterior)


## 3M2.

samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)
HPDI(samples, prob=0.90)

##      |0.9      0.9|
## 0.3343343 0.7177177


## 3M3.

Construct a posterior predictive check

samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)
w <- rbinom(1e4, size=15, prob=samples)
sum(w == 8) / 1e4

## [1] 0.1471


## 3M4.

w <- rbinom(1e4, size=9, prob=samples)
sum(w==6) / 1e4

## [1] 0.1732


## 3M5.

#### Redo 3M1.

p_grid <- seq(from=0, to=1, length.out=1000)
prior <- ifelse(p_grid < 0.5, 0, 1)
likelihood <- dbinom(x=8, size=15, prob=p_grid)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
plot(p_grid, posterior, type="l")


#### Redo 3M2.

samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)
HPDI(samples, prob=0.9)

##      |0.9      0.9|
## 0.5005005 0.7117117


#### Redo 3M3.

samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)
w <- rbinom(1e4, size=15, prob=samples)
sum(w == 8) / 1e4

## [1] 0.1522


#### Redo 3M4.

samples <- sample(p_grid, prob=posterior, size=1e4, replace=TRUE)
w <- rbinom(1e4, size=9, prob=samples)
sum(w == 6) / 1e4

## [1] 0.2337


## 3M6.

You want the 99% percentile interval of the posterior distribution of p to be only 0.05 wide. This means the distance between the upper and lower bound of the interval should be 0.05. How many times will you have to toss the globe to do this?

real_water <- 0.7
p_grid <- seq(from=0, to=1, length.out=1e4)
prior <- rep(1, length(p_grid))

posteriors <- function(N) {
likelihood <- dbinom(round(N*real_water), size=N, prob=p_grid)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
return(posterior)
}

for (i in 1:3000) {
posterior = posteriors(i)
samples <- sample(p_grid, prob=posterior, size=1e4, replace=T)
interval <- PI(samples, prob=0.99)
diff <- (interval[2] - interval[1])
if (diff <= 0.05) {
print(i)
break
}
}

## [1] 2133


# Hard Problems

data(homeworkch3)


# 3H1.

p_grid <- seq(from=0, to=1, length.out=1e3)
prior <- rep(1, length(p_grid))
likelihood <- dbinom(x=sum(birth1) + sum(birth2), size=length(birth1)+length(birth2), prob=p_grid)
posterior <- likelihood * prior
posterior <- posterior / sum(posterior)
max_p = p_grid[which.max(posterior)]
print(max_p)

## [1] 0.5545546

plot(p_grid, posterior,type="l", main="posterior of overall male birth rate")
abline(v=max_p, lty=2)


## 3H2.

samples <- sample(p_grid, size=1e4, replace=T, prob=posterior)
hist(samples)


print(HPDI(samples, prob=0.5))

##      |0.5      0.5|
## 0.5305305 0.5765766

print(HPDI(samples, prob=0.89))

##     |0.89     0.89|
## 0.4994995 0.6096096

print(HPDI(samples, prob=0.97))

##     |0.97     0.97|
## 0.4774775 0.6266266


## 3H3.

samples <- sample(p_grid, size=1e4, replace=T, prob=posterior)
bsimulation <- rbinom(n=1e4, size=200, prob=samples)
hist(bsimulation, main="Posterior Predictive")
abline( v=sum(birth1)+sum(birth2) , col="red" )


p.samples <- sample(p_grid, size=10000 , replace=TRUE , prob=posterior)
bsim <- rbinom(10000 , size=200 , prob=samples)
# adj value makes a strict histogram, with spikes at integers
abline( v=sum(birth1)+sum(birth2) , col="red" )


## 3H4.

samples <- sample(x=p_grid, size=1e4, replace=T, prob=posterior)
bsim <- rbinom(n=1e4, size=100, prob=samples)

female.first.borns <- length(which(birth1 == 0))