In [9]:
options(repr.plot.width=20, repr.plot.height=13)   # this command just formats the size of the figures. Adapt to view them nicely
                                                    # in your browser.
In [10]:
## This simple script should visualize the idea behind factors

# In this example, we look at the quality control of a yogurt factory. The measure once a day
# the quality of the batch. What can we learn?

#set up some data
X=seq(30,40,by=0.001)
Y=25+2*X+rnorm(length(X),mean = 0,sd = 3)

# lets look at the data

paste(sample(Y)[seq(1,10)])

plot(sample(X),Y,xlab="date of fabrication", ylab="yogurt quality")

paste("average: ",mean(Y))
paste("standard deviation: ",sd(Y))


hist(Y, xlab="yogurt quality")
  1. '99.1380143723532'
  2. '91.4334274715176'
  3. '91.7531928989112'
  4. '97.5141066288831'
  5. '101.507490277111'
  6. '89.5366037449317'
  7. '96.1684568342183'
  8. '90.1495136803211'
  9. '85.8783531818904'
  10. '87.3921988586717'
'average: 94.9751784199516'
'standard deviation: 6.55306488959065'
No description has been provided for this image
No description has been provided for this image
In [8]:
# Now the factory also measures the temperature in the fermenter, X. 
# We now have 2 columns of data.
paste(X[seq(10,20)],"          ",Y[seq(10,20)])
  1. '30.009 84.2109086260335'
  2. '30.01 84.7907227635593'
  3. '30.011 89.4365728261784'
  4. '30.012 82.8272707979886'
  5. '30.013 78.67087444295'
  6. '30.014 86.1397619805142'
  7. '30.015 88.6385654864689'
  8. '30.016 83.1165857229854'
  9. '30.017 84.832185225203'
  10. '30.018 88.0386359205701'
  11. '30.019 80.601045609256'
In [11]:
#Lets plot the quality Y against temperature X instead of day of fabrication
plot(X,Y,xlab="Temperature", ylab="yogurt quality")
No description has been provided for this image
In [14]:
# Clearly, a large part of the data scatter can be explained by a trend of the quality changing with temperature.
# Lets calculate the variance of the data between 30-31 degree.

paste("standard deviation between 30-31: ",sd(Y[1:1000]))
paste("standard deviation of full dataset: ",sd(Y))
'standard deviation between 30-31: 3.16740563302703'
'standard deviation of full dataset: 6.48188917156064'
In [ ]: