options(repr.plot.width=20, repr.plot.height=10.5)   # this command just formats the size of the figures. Adapt to view them nicelyin your browser.

# Understand how estimator functions estimate parameters.

# define the parameters of the mathematical model.
# we use a Normal distribution, N(mu,sigma^2)

mu <- 10
sigma <- 2
n <- 15

data <- rnorm(n, mu, sigma) # generate some normal distributed data
samplemean=mean(data)        # compute the sample mean. This will be different from mu!
samplesd=sd(data)

histdata <-hist(data,col="grey")
abline(v=samplemean, col="red",lwd=2)



x=seq(mu-5*sigma,mu+5*sigma,by=0.01)
lines(x,5*max(histdata$counts)*dnorm(x,mu,sigma), col="blue",lwd=2)
abline(v=mu, col="blue",lwd=2)

# myownestimator

# Lets invent our own estimators for mu
# midpoint between the most closely spaced pair of datapoints.
# idea: points close to the mean are most likely, there datapoint density will be high

m <- 100    #number of trials
n <- 10     #number of datapoints in each experiment
mu <- 10    #true mean
sigma <- 2  #true standard deviation

meanlist <- numeric(length = m) 
myownestlist <- numeric(length = m)
for (i in seq(1,m,by=1))
{
data <- rnorm(n, mu, sigma)

index=which.min(diff(sort(data))) #find datapoints which are closest together
myownest=(sort(data)[index+1]+sort(data)[index])/2 # compute my estimator

meanlist[i]=mean(data)
myownestlist[i]=myownest
}


p1 <- hist(meanlist,breaks = seq(0,20,by=0.5))                     # centered at 4
p2 <- hist(myownestlist,breaks = seq(0,20,by=0.5))                     # centered at 6
plot( p1, col=rgb(0,0,1,1/4), xlim=c(0,20))  # first histogram
plot( p2, col=rgb(1,0,0,1/4), xlim=c(0,20), add=T)  # second

cat("Sample average: mean=",mean(meanlist),", sd=",sd(meanlist))
cat("Myownestimator: mean=",mean(myownestlist),", sd=",sd(myownestlist))

Sample average: mean= 9.961115 , sd= 0.6627044

Myownestimator: mean= 9.990962 , sd= 1.356413