Skip to contents
library(earth)
#> Loading required package: Formula
#> Loading required package: plotmo
#> Loading required package: plotrix
#> Loading required package: TeachingDemos
library(earth.dof.patch) # our proposed package

Generate simulation data,

gen_data_mars = function(N = 100, p = 2) {
  x1 = rnorm(N)
  x2 = rnorm(N)
  x1_plus = (x1 > 1) * (x1 - 1)
  x2_plus = (x2 > 0.8) * (x2 - 0.8)
  y0 = x1_plus + x1_plus * x2_plus
  y = y0 + rnorm(N) * 0.12
  x = cbind(x1, x2, matrix(rnorm(N*(p-2)), nrow = N))
  return(list(x = x, y = y, y0 = y0))
}
set.seed(1234)
N = 100
p = 50
d = 1
nk = 100
train_data = gen_data_mars(N, p)
test_data = gen_data_mars(N*10, p)
fit1 = earth(train_data$x, train_data$y, degree = d, nk = nk)
# calculate penalty factor
res = sol_mars_df_and_penalty(d = 1, n = N, p = p, nk = nk)
fit2 = earth(train_data$x, train_data$y, penalty = res$penalty, nk = nk)
yhat1 = predict(fit1, test_data$x)
yhat2 = predict(fit2, test_data$x)
mse1 = mean((yhat1 - test_data$y0)^2)
mse2 = mean((yhat2 - test_data$y0)^2)
mse0 = mean((mean(test_data$y) - test_data$y0)^2)
cat("the proportion of MSE decrease is: ", (mse0 - mse1) / mse0, " and ", (mse0 - mse2)/mse0)
#> the proportion of MSE decrease is:  0.64189  and  0.737072

More comprehensive comparisons can be found in our paper.