In [9]:
options(repr.plot.width=20, repr.plot.height=13) # this command just formats the size of the figures. Adapt to view them nicely
# in your browser.
In [10]:
## This simple script should visualize the idea behind factors
# In this example, we look at the quality control of a yogurt factory. The measure once a day
# the quality of the batch. What can we learn?
#set up some data
X=seq(30,40,by=0.001)
Y=25+2*X+rnorm(length(X),mean = 0,sd = 3)
# lets look at the data
paste(sample(Y)[seq(1,10)])
plot(sample(X),Y,xlab="date of fabrication", ylab="yogurt quality")
paste("average: ",mean(Y))
paste("standard deviation: ",sd(Y))
hist(Y, xlab="yogurt quality")
- '99.1380143723532'
- '91.4334274715176'
- '91.7531928989112'
- '97.5141066288831'
- '101.507490277111'
- '89.5366037449317'
- '96.1684568342183'
- '90.1495136803211'
- '85.8783531818904'
- '87.3921988586717'
'average: 94.9751784199516'
'standard deviation: 6.55306488959065'
In [8]:
# Now the factory also measures the temperature in the fermenter, X.
# We now have 2 columns of data.
paste(X[seq(10,20)]," ",Y[seq(10,20)])
- '30.009 84.2109086260335'
- '30.01 84.7907227635593'
- '30.011 89.4365728261784'
- '30.012 82.8272707979886'
- '30.013 78.67087444295'
- '30.014 86.1397619805142'
- '30.015 88.6385654864689'
- '30.016 83.1165857229854'
- '30.017 84.832185225203'
- '30.018 88.0386359205701'
- '30.019 80.601045609256'
In [11]:
#Lets plot the quality Y against temperature X instead of day of fabrication
plot(X,Y,xlab="Temperature", ylab="yogurt quality")
In [14]:
# Clearly, a large part of the data scatter can be explained by a trend of the quality changing with temperature.
# Lets calculate the variance of the data between 30-31 degree.
paste("standard deviation between 30-31: ",sd(Y[1:1000]))
paste("standard deviation of full dataset: ",sd(Y))
'standard deviation between 30-31: 3.16740563302703'
'standard deviation of full dataset: 6.48188917156064'
In [ ]: