The data for this lecture comes from the article FiveThirtyEight The Ultimate Halloween Candy Power Ranking by Walt Hickey. To collect data, Hickey and collaborators at FiveThirtyEight set up an experiment people could vote on a series of randomly generated candy match-ups (e.g. Reese’s vs. Skittles). Click here to check out some of the match ups.
The data set contains 12 characteristics and win percentage from 85 candies in the experiment.
Rows: 85
Columns: 13
$ competitorname <chr> "100 Grand", "3 Musketeers", "One dime", "One quarter…
$ chocolate <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ fruity <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE…
$ caramel <lgl> TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,…
$ peanutyalmondy <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, …
$ nougat <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,…
$ crispedricewafer <lgl> TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ hard <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
$ bar <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ pluribus <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE…
$ sugarpercent <dbl> 0.732, 0.604, 0.011, 0.011, 0.906, 0.465, 0.604, 0.31…
$ pricepercent <dbl> 0.860, 0.511, 0.116, 0.511, 0.511, 0.767, 0.767, 0.51…
$ winpercent <dbl> 66.97173, 67.60294, 32.26109, 46.11650, 52.34146, 50.…
candy_rankings_clean <- candy_rankings |>
select(-competitorname) |>
mutate(sugarpercent = sugarpercent*100, # convert proportions into percentages
pricepercent = pricepercent*100, # convert proportions into percentages
across(where(is.logical), ~ factor(.x, levels = c("FALSE", "TRUE")))) # convert logicals into factorsRows: 85
Columns: 12
$ chocolate <fct> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ fruity <fct> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE…
$ caramel <fct> TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,…
$ peanutyalmondy <fct> FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, …
$ nougat <fct> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE,…
$ crispedricewafer <fct> TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
$ hard <fct> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
$ bar <fct> TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, F…
$ pluribus <fct> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE…
$ sugarpercent <dbl> 73.2, 60.4, 1.1, 1.1, 90.6, 46.5, 60.4, 31.3, 90.6, 6…
$ pricepercent <dbl> 86.0, 51.1, 11.6, 51.1, 51.1, 76.7, 76.7, 51.1, 32.5,…
$ winpercent <dbl> 66.97173, 67.60294, 32.26109, 46.11650, 52.34146, 50.…
dials which is part of the tidyverse
tune()dials which is part of the tidyverse
tune()dials which is part of the tidyverse
tune()grid_regulartune_griddials which is part of the tidyverse
tune()grid_regulartune_gridBest RMSE
ridge_rmse_final <- finalize_workflow(ridge_wf, best_rmse_ridge)
ridge_rmse_fit <- fit(ridge_rmse_final, data = candy_train)
tidy(ridge_rmse_fit) |>
kable()| term | estimate | penalty |
|---|---|---|
| (Intercept) | 49.8221817 | 4.641589 |
| sugarpercent | 1.6105235 | 4.641589 |
| pricepercent | -0.8755351 | 4.641589 |
| chocolate_TRUE. | 5.5626786 | 4.641589 |
| fruity_TRUE. | 0.8290067 | 4.641589 |
| caramel_TRUE. | 0.9577679 | 4.641589 |
| peanutyalmondy_TRUE. | 2.8412296 | 4.641589 |
| nougat_TRUE. | 0.5018516 | 4.641589 |
| crispedricewafer_TRUE. | 1.6202899 | 4.641589 |
| hard_TRUE. | -1.2156466 | 4.641589 |
| bar_TRUE. | 1.4669984 | 4.641589 |
| pluribus_TRUE. | 0.4265386 | 4.641589 |
Best RMSE
Best RMSE
ridge_rmse_final <- finalize_workflow(ridge_wf, best_ose_rmse_ridge)
ridge_rmse_fit <- fit(ridge_rmse_final, data = candy_train)
tidy(ridge_rmse_fit) |>
kable()| term | estimate | penalty |
|---|---|---|
| (Intercept) | 49.8221817 | 4.641589 |
| sugarpercent | 1.6105235 | 4.641589 |
| pricepercent | -0.8755351 | 4.641589 |
| chocolate_TRUE. | 5.5626786 | 4.641589 |
| fruity_TRUE. | 0.8290067 | 4.641589 |
| caramel_TRUE. | 0.9577679 | 4.641589 |
| peanutyalmondy_TRUE. | 2.8412296 | 4.641589 |
| nougat_TRUE. | 0.5018516 | 4.641589 |
| crispedricewafer_TRUE. | 1.6202899 | 4.641589 |
| hard_TRUE. | -1.2156466 | 4.641589 |
| bar_TRUE. | 1.4669984 | 4.641589 |
| pluribus_TRUE. | 0.4265386 | 4.641589 |
Best RMSE
candy_test_wpreds <- candy_test |>
mutate(ridge_preds = predict(ridge_rmse_fit, new_data = candy_test)$.pred,
lasso_preds = predict(lasso_rmse_fit, new_data = candy_test)$.pred)
candy_test_wpreds |> rmse(estimate = ridge_preds, truth = winpercent)# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 10.6
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rmse standard 12.0
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rsq standard 0.476
# A tibble: 1 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 rsq standard 0.341
tidymodelsridge_params <- ridge_wf |>
extract_parameter_set_dials() |>
update(penalty = penalty(c(-2, 1)))
bayes_ridge <- ridge_wf |>
tune_bayes(
resamples = candy_folds,
metrics = candy_metrics, # first metrics is what's optimized (rmse in this case)
initial = tuning_ridge_results,
param_info = ridge_params,
iter = 25
)tidymodelstidymodelstidymodels| penalty | .metric | .estimator | mean | n | std_err | .config | .iter |
|---|---|---|---|---|---|---|---|
| 9.999186 | rmse | standard | 13.20055 | 20 | 0.2720582 | Iter11 | 11 |
| 9.999034 | rmse | standard | 13.20055 | 20 | 0.2720587 | Iter8 | 8 |
| 9.996720 | rmse | standard | 13.20057 | 20 | 0.2720676 | Iter7 | 7 |
| 9.996350 | rmse | standard | 13.20057 | 20 | 0.2720690 | Iter9 | 9 |
| 9.995326 | rmse | standard | 13.20058 | 20 | 0.2720729 | Iter10 | 10 |
tidymodelslasso_params <- lasso_wf |>
extract_parameter_set_dials() |>
update(penalty = penalty(c(-2,1)))
bayes_lasso <- lasso_wf |>
tune_bayes(
resamples = candy_folds,
metrics = candy_metrics, # first metrics is what's optimized (rmse in this case)
initial = tuning_lasso_results,
param_info = lasso_params,
iter = 25
)tidymodelstidymodelstidymodels| penalty | .metric | .estimator | mean | n | std_err | .config | .iter |
|---|---|---|---|---|---|---|---|
| 2.936401 | rmse | standard | 12.76520 | 20 | 0.3059454 | Iter16 | 16 |
| 2.937813 | rmse | standard | 12.76520 | 20 | 0.3059406 | Iter20 | 20 |
| 2.934824 | rmse | standard | 12.76520 | 20 | 0.3059506 | Iter19 | 19 |
| 2.940832 | rmse | standard | 12.76521 | 20 | 0.3059305 | Iter25 | 25 |
| 2.932606 | rmse | standard | 12.76521 | 20 | 0.3059580 | Iter22 | 22 |
tidymodelstidymodelstidymodelstidymodels| penalty | .metric | .estimator | mean | n | std_err | .config | .iter |
|---|---|---|---|---|---|---|---|
| 10.000000 | rmse | standard | 13.20054 | 20 | 0.2720551 | Iter19 | 19 |
| 9.513817 | rmse | standard | 13.20542 | 20 | 0.2739433 | Iter42 | 42 |
| 9.079220 | rmse | standard | 13.21026 | 20 | 0.2758191 | Iter31 | 31 |
| 8.818478 | rmse | standard | 13.21363 | 20 | 0.2770621 | Iter9 | 9 |
| 8.415657 | rmse | standard | 13.21916 | 20 | 0.2790967 | Iter45 | 45 |
tidymodelstidymodelstidymodelstidymodels| penalty | .metric | .estimator | mean | n | std_err | .config | .iter |
|---|---|---|---|---|---|---|---|
| 4.641589 | rmse | standard | 13.09389 | 20 | 0.3774972 | initial_Preprocessor1_Model09 | 0 |
| 9.934398 | rmse | standard | 13.20114 | 20 | 0.2723052 | Iter39 | 39 |
| 9.189444 | rmse | standard | 13.20896 | 20 | 0.2752983 | Iter25 | 25 |
| 8.786187 | rmse | standard | 13.21407 | 20 | 0.2772191 | Iter37 | 37 |
| 8.333199 | rmse | standard | 13.22037 | 20 | 0.2795509 | Iter26 | 26 |