# chapter 8 in MASS, smoothing, etc. > library(MASS) > data(wtloss) > names(wtloss) [1] "Days" "Weight" > plot(wtloss) > help(nls) > wtloss.st <- c(b0 = 90, b1 = 95, th = 120) > wtloss.st b0 b1 th 90 95 120 > out <- nls(Weight ~ b0 + b1 * 2^(- Days / th), data = wtloss, start = wtloss.st, trace = TRUE) 67.54349 : 90 95 120 40.18081 : 82.72629 101.30457 138.71374 39.24489 : 81.39868 102.65836 141.85859 39.2447 : 81.37375 102.68417 141.91052 > names(out) [1] "m" "data" "call" > out Nonlinear regression model model: Weight ~ b0 + b1 * 2^(-Days/th) data: wtloss b0 b1 th 81.37375 102.68417 141.91052 residual sum-of-squares: 39.2447 > class(out) [1] "nls" > summary(out) Formula: Weight ~ b0 + b1 * 2^(-Days/th) Parameters: Estimate Std. Error t value Pr(>|t|) b0 81.374 2.269 35.86 <2e-16 *** b1 102.684 2.083 49.30 <2e-16 *** th 141.911 5.295 26.80 <2e-16 *** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.8949 on 49 degrees of freedom Correlation of Parameter Estimates: b0 b1 b1 -0.9891 th -0.9857 0.9561 # nonparametric regression, section 8.7 in MASS > data(GAGurine) > names(GAGurine) [1] "Age" "GAG" > plot(GAGurine) > library(mgcv) This is mgcv 0.8-9 > out <- gam(GAG ~ s(Age, bs = "cr"), data = GAGurine) > summary(out) Family: gaussian Link function: identity Formula: GAG ~ s(Age, bs = "cr") Parametric coefficients: Estimate std. err. t ratio Pr(>|t|) constant 12.209 0.2924 41.75 < 2.22e-16 Approximate significance of smooth terms: edf chi.sq p-value s(Age) 8.762 893.09 < 2.22e-16 R-sq.(adj) = 0.739 Deviance explained = 74.6% GCV score = 21.79 Scale est. = 21.113 n = 314 > gam.check(out) Smoothing parameter selection converged after 1 iteration. > out.log <- gam(log(GAG) ~ s(Age, bs = "cr"), data = GAGurine) > gam.check(out.log) Smoothing parameter selection converged after 1 iteration. > attach(GAGurine) > plot(Age, log(GAG)) > curve(predict(out, newdata = data.frame(x = x)), + add = TRUE) Error in xy.coords(x, y) : x and y lengths differ > curve(predict(out.log, newdata = data.frame(Age = x)), + add = TRUE) > summary(out.log) Family: gaussian Link function: identity Formula: log(GAG) ~ s(Age, bs = "cr") Parametric coefficients: Estimate std. err. t ratio Pr(>|t|) constant 2.2508 0.01865 120.7 < 2.22e-16 Approximate significance of smooth terms: edf chi.sq p-value s(Age) 6.426 1280 < 2.22e-16 R-sq.(adj) = 0.803 Deviance explained = 80.7% GCV score = 0.090034 Scale est. = 0.087905 n = 314 > out.poly <- lm(log(GAG) ~ poly(Age, 7)) > curve(predict(out.poly, newdata = data.frame(Age = x)), add = TRUE, col = "red") > summary(out.poly) Call: lm(formula = log(GAG) ~ poly(Age, 7)) Residuals: Min 1Q Median 3Q Max -1.33661 -0.17669 -0.01068 0.14801 1.06409 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 2.364e+00 1.660e-02 142.427 < 2e-16 *** poly(Age, 7)1 -1.004e+01 2.942e-01 -34.145 < 2e-16 *** poly(Age, 7)2 2.558e+00 2.942e-01 8.697 < 2e-16 *** poly(Age, 7)3 -2.118e+00 2.942e-01 -7.200 4.68e-12 *** poly(Age, 7)4 3.269e-01 2.942e-01 1.111 0.267376 poly(Age, 7)5 1.323e-02 2.942e-01 0.045 0.964161 poly(Age, 7)6 1.122e+00 2.942e-01 3.813 0.000166 *** poly(Age, 7)7 1.401e-04 2.942e-01 0.000476 0.999620 --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 0.2942 on 306 degrees of freedom Multiple R-Squared: 0.8105, Adjusted R-squared: 0.8062 F-statistic: 187 on 7 and 306 DF, p-value: < 2.2e-16 > library(splines) > out.ns <- lm(log(GAG) ~ ns(Age, df = 6.426)) Error in qr(t(const)) : NA/NaN/Inf in foreign function call (arg 1) > out.ns <- lm(log(GAG) ~ ns(Age, df = 6)) > curve(predict(out.ns, newdata = data.frame(Age = x)), add = TRUE, col = "seagreen") > lines(smooth.spline(Age, log(GAG)), col = "blue")