The data for this lecture comes from the article FiveThirtyEight The Ultimate Halloween Candy Power Ranking by Walt Hickey. To collect data, Hickey and collaborators at FiveThirtyEight set up an experiment people could vote on a series of randomly generated candy match-ups (e.g. Reese’s vs. Skittles). Click here to check out some of the match ups.
The data set contains 12 characteristics and win percentage from 85 candies in the experiment.
Rows: 85
Columns: 13
$ competitorname <chr> "100 Grand", "3 Musketeers", "One dime", "One quarter…
$ chocolate <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ fruity <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE…
$ caramel <lgl> TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,…
$ peanutyalmondy <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, …
$ nougat <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,…
$ crispedricewafer <lgl> TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ hard <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
$ bar <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ pluribus <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE…
$ sugarpercent <dbl> 0.732, 0.604, 0.011, 0.011, 0.906, 0.465, 0.604, 0.31…
$ pricepercent <dbl> 0.860, 0.511, 0.116, 0.511, 0.511, 0.767, 0.767, 0.51…
$ winpercent <dbl> 66.97173, 67.60294, 32.26109, 46.11650, 52.34146, 50.…
candy_rankings_clean <- candy_rankings |>
select(-competitorname) |>
mutate(sugarpercent = sugarpercent*100, # convert proportions into percentages
pricepercent = pricepercent*100, # convert proportions into percentages
across(where(is.logical), ~ factor(.x, levels = c("FALSE", "TRUE")))) # convert logicals into factors
Rows: 85
Columns: 12
$ chocolate <fct> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ fruity <fct> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE…
$ caramel <fct> TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,…
$ peanutyalmondy <fct> FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, …
$ nougat <fct> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,…
$ crispedricewafer <fct> TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ hard <fct> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
$ bar <fct> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ pluribus <fct> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE…
$ sugarpercent <dbl> 73.2, 60.4, 1.1, 1.1, 90.6, 46.5, 60.4, 31.3, 90.6, 6…
$ pricepercent <dbl> 86.0, 51.1, 11.6, 51.1, 51.1, 76.7, 76.7, 51.1, 32.5,…
$ winpercent <dbl> 66.97173, 67.60294, 32.26109, 46.11650, 52.34146, 50.…
dials
which is part of the tidyverse
tune()
dials
which is part of the tidyverse
tune()
dials
which is part of the tidyverse
tune()
grid_regular
tune_grid
dials
which is part of the tidyverse
tune()
grid_regular
tune_grid
Best RMSE
ridge_rmse_final <- finalize_workflow(ridge_wf, best_rmse_ridge)
ridge_rmse_fit <- fit(ridge_rmse_final, data = candy_train)
tidy(ridge_rmse_fit) |>
kable()
term | estimate | penalty |
---|---|---|
(Intercept) | 49.8221817 | 4.641589 |
sugarpercent | 1.6105235 | 4.641589 |
pricepercent | -0.8755351 | 4.641589 |
chocolate_TRUE. | 5.5626786 | 4.641589 |
fruity_TRUE. | 0.8290067 | 4.641589 |
caramel_TRUE. | 0.9577679 | 4.641589 |
peanutyalmondy_TRUE. | 2.8412296 | 4.641589 |
nougat_TRUE. | 0.5018516 | 4.641589 |
crispedricewafer_TRUE. | 1.6202899 | 4.641589 |
hard_TRUE. | -1.2156466 | 4.641589 |
bar_TRUE. | 1.4669984 | 4.641589 |
pluribus_TRUE. | 0.4265386 | 4.641589 |
Best RMSE
Best RMSE
ridge_rmse_final <- finalize_workflow(ridge_wf, best_ose_rmse_ridge)
ridge_rmse_fit <- fit(ridge_rmse_final, data = candy_train)
tidy(ridge_rmse_fit) |>
kable()
term | estimate | penalty |
---|---|---|
(Intercept) | 49.8221817 | 4.641589 |
sugarpercent | 1.6105235 | 4.641589 |
pricepercent | -0.8755351 | 4.641589 |
chocolate_TRUE. | 5.5626786 | 4.641589 |
fruity_TRUE. | 0.8290067 | 4.641589 |
caramel_TRUE. | 0.9577679 | 4.641589 |
peanutyalmondy_TRUE. | 2.8412296 | 4.641589 |
nougat_TRUE. | 0.5018516 | 4.641589 |
crispedricewafer_TRUE. | 1.6202899 | 4.641589 |
hard_TRUE. | -1.2156466 | 4.641589 |
bar_TRUE. | 1.4669984 | 4.641589 |
pluribus_TRUE. | 0.4265386 | 4.641589 |
Best RMSE
candy_test_wpreds <- candy_test |>
mutate(ridge_preds = predict(ridge_rmse_fit, new_data = candy_test)$.pred,
lasso_preds = predict(lasso_rmse_fit, new_data = candy_test)$.pred)
candy_test_wpreds |> rmse(estimate = ridge_preds, truth = winpercent)
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 10.6
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 12.0
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rsq standard 0.476
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rsq standard 0.341
tidymodels
ridge_params <- ridge_wf |>
extract_parameter_set_dials() |>
update(penalty = penalty(c(-2, 1)))
bayes_ridge <- ridge_wf |>
tune_bayes(
resamples = candy_folds,
metrics = candy_metrics, # first metrics is what's optimized (rmse in this case)
initial = tuning_ridge_results,
param_info = ridge_params,
iter = 25
)
tidymodels
tidymodels
tidymodels
penalty | .metric | .estimator | mean | n | std_err | .config | .iter |
---|---|---|---|---|---|---|---|
9.999186 | rmse | standard | 13.20055 | 20 | 0.2720582 | Iter11 | 11 |
9.999034 | rmse | standard | 13.20055 | 20 | 0.2720587 | Iter8 | 8 |
9.996720 | rmse | standard | 13.20057 | 20 | 0.2720676 | Iter7 | 7 |
9.996350 | rmse | standard | 13.20057 | 20 | 0.2720690 | Iter9 | 9 |
9.995326 | rmse | standard | 13.20058 | 20 | 0.2720729 | Iter10 | 10 |
tidymodels
lasso_params <- lasso_wf |>
extract_parameter_set_dials() |>
update(penalty = penalty(c(-2,1)))
bayes_lasso <- lasso_wf |>
tune_bayes(
resamples = candy_folds,
metrics = candy_metrics, # first metrics is what's optimized (rmse in this case)
initial = tuning_lasso_results,
param_info = lasso_params,
iter = 25
)
tidymodels
tidymodels
tidymodels
penalty | .metric | .estimator | mean | n | std_err | .config | .iter |
---|---|---|---|---|---|---|---|
2.936401 | rmse | standard | 12.76520 | 20 | 0.3059454 | Iter16 | 16 |
2.937813 | rmse | standard | 12.76520 | 20 | 0.3059406 | Iter20 | 20 |
2.934824 | rmse | standard | 12.76520 | 20 | 0.3059506 | Iter19 | 19 |
2.940832 | rmse | standard | 12.76521 | 20 | 0.3059305 | Iter25 | 25 |
2.932606 | rmse | standard | 12.76521 | 20 | 0.3059580 | Iter22 | 22 |
tidymodels
tidymodels
tidymodels
tidymodels
penalty | .metric | .estimator | mean | n | std_err | .config | .iter |
---|---|---|---|---|---|---|---|
10.000000 | rmse | standard | 13.20054 | 20 | 0.2720551 | Iter19 | 19 |
9.513817 | rmse | standard | 13.20542 | 20 | 0.2739433 | Iter42 | 42 |
9.079220 | rmse | standard | 13.21026 | 20 | 0.2758191 | Iter31 | 31 |
8.818478 | rmse | standard | 13.21363 | 20 | 0.2770621 | Iter9 | 9 |
8.415657 | rmse | standard | 13.21916 | 20 | 0.2790967 | Iter45 | 45 |
tidymodels
tidymodels
tidymodels
tidymodels
penalty | .metric | .estimator | mean | n | std_err | .config | .iter |
---|---|---|---|---|---|---|---|
4.641589 | rmse | standard | 13.09389 | 20 | 0.3774972 | initial_Preprocessor1_Model09 | 0 |
9.934398 | rmse | standard | 13.20114 | 20 | 0.2723052 | Iter39 | 39 |
9.189444 | rmse | standard | 13.20896 | 20 | 0.2752983 | Iter25 | 25 |
8.786187 | rmse | standard | 13.21407 | 20 | 0.2772191 | Iter37 | 37 |
8.333199 | rmse | standard | 13.22037 | 20 | 0.2795509 | Iter26 | 26 |