Check QR computation of least squares \(\beta\)

Get data.

cd = read.csv("http://www.rob-mcculloch.org/data/susedcars.csv")
y = cd$price/1000
n = length(y)
X = cbind(rep(1,n),cd$mileage/1000,cd$year)
ddf = data.frame(X,y)

Get QR.

qrres = qr(X)
Q = qr.Q(qrres)
R = qr.R(qrres)
#check
temp = Q %*% R
summary(temp - X)
##        V1                   V2                   V3           
##  Min.   :-1.110e-16   Min.   :-2.842e-14   Min.   :3.001e-11  
##  1st Qu.:-1.110e-16   1st Qu.: 0.000e+00   1st Qu.:3.001e-11  
##  Median :-1.110e-16   Median : 7.105e-15   Median :3.001e-11  
##  Mean   :-1.080e-16   Mean   : 9.218e-15   Mean   :3.101e-11  
##  3rd Qu.:-1.110e-16   3rd Qu.: 1.421e-14   3rd Qu.:3.001e-11  
##  Max.   : 2.887e-15   Max.   : 6.821e-13   Max.   :9.836e-10

Get \(\hat{\beta}\) using QR.

bhat1 = solve(R) %*% t(Q) %*% y
print(bhat1)
##               [,1]
## [1,] -5365.4898723
## [2,]    -0.1537219
## [3,]     2.6943495

Get from R.

lmres = lm(y~.-1,ddf) # have 1 in X so don't fit an intercept
print(lmres$coef)
##            X1            X2            X3 
## -5365.4898723    -0.1537219     2.6943495

Did it the stupid way above by inverting R.
Often it is a good idea to do stupid but simple first, and then be smart!!
Better to use backsolve than invert R.

qy = t(Q) %*% y
bhat2 = backsolve(R,qy)
print(bhat2)
##               [,1]
## [1,] -5365.4898723
## [2,]    -0.1537219
## [3,]     2.6943495

Orthogonal Projections

Get cars data.

cd = read.csv("http://www.rob-mcculloch.org/data/susedcars.csv")
cd = cd[,c(1,4,5)]
cd$price = cd$price/1000
cd$mileage = cd$mileage/1000
head(cd)
##    price mileage year
## 1 43.995  36.858 2008
## 2 44.995  46.883 2012
## 3 25.999 108.759 2007
## 4 33.880  35.187 2007
## 5 34.895  48.153 2007
## 6  5.995 121.748 2002

Regress price on mileage alone.

lm1 = lm(price~mileage,cd)
print(summary(lm1))
## 
## Call:
## lm(formula = price ~ mileage, data = cd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.670  -7.063   0.239   6.293  37.024 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 56.35978    0.67063   84.04   <2e-16 ***
## mileage     -0.34997    0.00787  -44.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.67 on 998 degrees of freedom
## Multiple R-squared:  0.6646, Adjusted R-squared:  0.6643 
## F-statistic:  1978 on 1 and 998 DF,  p-value: < 2.2e-16
coef1 = lm1$coef
print(coef1)
## (Intercept)     mileage 
##  56.3597845  -0.3499745

price on (mileage,year).

lm2 = lm(price~.,cd)
print(summary(lm2))
## 
## Call:
## lm(formula = price ~ ., data = cd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.857  -4.855  -1.670   3.483  34.499 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.365e+03  1.716e+02  -31.27   <2e-16 ***
## mileage     -1.537e-01  8.339e-03  -18.43   <2e-16 ***
## year         2.694e+00  8.526e-02   31.60   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.544 on 997 degrees of freedom
## Multiple R-squared:  0.8325, Adjusted R-squared:  0.8321 
## F-statistic:  2477 on 2 and 997 DF,  p-value: < 2.2e-16
coef2 = lm2$coef
print(coef2)
##   (Intercept)       mileage          year 
## -5365.4898723    -0.1537219     2.6943495

mileage on year.

lm3 = lm(mileage~year,cd)
M.Y = lm3$residuals

price on M.Y

tdf = data.frame(price = cd$price, M.Y)
lm4 = lm(price~M.Y,tdf)
print(summary(lm4))
## 
## Call:
## lm(formula = price ~ M.Y, data = tdf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.394 -15.458  -0.519  12.592  48.295 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 30.58332    0.56562  54.070  < 2e-16 ***
## M.Y         -0.15372    0.01977  -7.775 1.88e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.89 on 998 degrees of freedom
## Multiple R-squared:  0.05711,    Adjusted R-squared:  0.05616 
## F-statistic: 60.45 on 1 and 998 DF,  p-value: 1.876e-14
coef4 = lm4$coef
print(coef4)
## (Intercept)         M.Y 
##  30.5833180  -0.1537219

How does the coefficient of mileage in R2 compare with the coefficent of M.Y in R4?

print(coef2)
##   (Intercept)       mileage          year 
## -5365.4898723    -0.1537219     2.6943495
print(coef4)
## (Intercept)         M.Y 
##  30.5833180  -0.1537219

They are exactly the same !!!!!

Let’s also check the standard errors.

sigmahat = summary(lm2)$sigma
check = sigmahat/sqrt(sum(M.Y^2))
cat('stderr of mileage in R2 should be: ',check,'\n')
## stderr of mileage in R2 should be:  0.008338762

Properities of the Multivariate Normal

p = 5
rho = .8
Sigma = matrix(rep(rho,p^2),ncol=p)
diag(Sigma) = 1.0
print(Sigma)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]  1.0  0.8  0.8  0.8  0.8
## [2,]  0.8  1.0  0.8  0.8  0.8
## [3,]  0.8  0.8  1.0  0.8  0.8
## [4,]  0.8  0.8  0.8  1.0  0.8
## [5,]  0.8  0.8  0.8  0.8  1.0

##############################
## (Y1,Y2) ~ N((0,0),Sigma11)
Sigma11 = Sigma[1:2,1:2]
print(Sigma11)
##      [,1] [,2]
## [1,]  1.0  0.8
## [2,]  0.8  1.0

ii1 = 1:2; ii2 = 3:5
S11 = Sigma[ii1,ii1]
S12 = Sigma[ii1,ii2]
S21 = Sigma[ii2,ii1]
S22 = Sigma[ii2,ii2]

B = S12 %*% solve(S22)
S1g2 = S11 - S12 %*% solve(S22) %*% S21

y3t5 = matrix(c(.23,-.65,-.3),ncol=1)


mu = B %*% y3t5

# conditional mean is:
print(mu)
##            [,1]
## [1,] -0.2215385
## [2,] -0.2215385

#conditional var is:
print(S1g2)
##            [,1]       [,2]
## [1,] 0.26153846 0.06153846
## [2,] 0.06153846 0.26153846

L = t(chol(Sigma))
print(L)
##      [,1]      [,2]      [,3]      [,4]      [,5]
## [1,]  1.0 0.0000000 0.0000000 0.0000000 0.0000000
## [2,]  0.8 0.6000000 0.0000000 0.0000000 0.0000000
## [3,]  0.8 0.2666667 0.5374838 0.0000000 0.0000000
## [4,]  0.8 0.2666667 0.1653796 0.5114083 0.0000000
## [5,]  0.8 0.2666667 0.1653796 0.1203314 0.4970501

##check
max(abs(Sigma - L %*% t(L)))
## [1] 1.110223e-16

solve(L)
##            [,1]       [,2]       [,3]       [,4]    [,5]
## [1,]  1.0000000  0.0000000  0.0000000  0.0000000 0.00000
## [2,] -1.3333333  1.6666667  0.0000000  0.0000000 0.00000
## [3,] -0.8268982 -0.8268982  1.8605210  0.0000000 0.00000
## [4,] -0.6016568 -0.6016568 -0.6016568  1.9553847 0.00000
## [5,] -0.4733811 -0.4733811 -0.4733811 -0.4733811 2.01187

temp = eigen(Sigma)
P = temp$vectors
Dh = diag(sqrt(temp$values))
A = P %*% Dh

##check
max(abs(Sigma-A%*%t(A)))
## [1] 6.661338e-16

set.seed(88)
n = 10000
p = nrow(Sigma)
#Y = t(A %*% matrix(rnorm(n*p),nrow=p))
Y = t(L %*% matrix(rnorm(n*p),nrow=p))

#mle
muhat = apply(Y,2,mean)
Shat = var(Y)*((n-1)/n)
print(muhat)
## [1] -0.0139021590 -0.0131906902  0.0009299157 -0.0053310735 -0.0057425149
print(Shat[1:3,1:3])
##           [,1]      [,2]      [,3]
## [1,] 1.0047487 0.8056103 0.8124095
## [2,] 0.8056103 1.0053794 0.8137901
## [3,] 0.8124095 0.8137901 1.0145577

set.seed(88)
n = 50
p = nrow(Sigma)
Y = t(A %*% matrix(rnorm(n*p),nrow=p))
#Y = t(L %*% matrix(rnorm(n*p),nrow=p))

#mle
muhat = apply(Y,2,mean)
Shat = var(Y)*((n-1)/n)
print(muhat)
## [1] 0.11383351 0.01837374 0.09299554 0.05163164 0.08379574
print(Shat[1:3,1:3])
##           [,1]      [,2]      [,3]
## [1,] 0.6882551 0.5220177 0.5495150
## [2,] 0.5220177 0.6866435 0.5137173
## [3,] 0.5495150 0.5137173 0.7938185

I guess “close” depends on the application, but not as close as with the larger sample size.

Basic Gaussian Process Regression

True function.

n=100
x = seq(from=-2,to=2,length.out=100)
fx = x^3

\(\Sigma\) matrix.

sf =  4
l = .1

Sigma = matrix(0.0,n,n)
for(i in 1:n) {
   for(j in 1:n) {
      stand = (x[i]-x[j])/l
      Sigma[i,j] = (sf^2) * exp(-.5*stand^2)
   }
}

Conditional distribution.

Iy = 76:85
Ix = c(1:75,86:100)

Syy = Sigma[Iy,Iy]
Sxx = Sigma[Ix,Ix]

Syx = Sigma[Iy,Ix]
Sxy = Sigma[Ix,Iy]

xp = fx[Ix]

B = Syx %*% solve(Sxx)
mygx = B %*% xp
Sygx = Syy - B %*% Sxy

Simulate from conditional.

set.seed(34)
nd = 1000
Lygx = t(chol(Sygx))
dygx = as.double(mygx) + Lygx %*% matrix(rnorm(10*nd),nrow=10)

Plot.

### plot GP inference
plot(x,fx,type="n")
qmat = apply(t(dygx),2,quantile,probs=c(.1,.5,.9))
for(i in 1:nrow(dygx)) {
    ii = 75+i
    lines(x[rep(ii,2)],qmat[c(1,3),i],col='red')
    points(x[ii],qmat[2,i],col="red",pch=16)
}
lines(x,fx,col='gray',lwd=3)

hw2-sol

Rob McCulloch

3/24/2026

Check QR computation of least squares \(\beta\)

Orthogonal Projections

Properities of the Multivariate Normal

Basic Gaussian Process Regression