Module 11 Exercise - Machine Learning

Load Libraries

# Data Handling
library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.2.3
Warning: package 'ggplot2' was built under R version 4.2.3
Warning: package 'tibble' was built under R version 4.2.3
Warning: package 'tidyr' was built under R version 4.2.3
Warning: package 'readr' was built under R version 4.2.3
Warning: package 'dplyr' was built under R version 4.2.3
Warning: package 'forcats' was built under R version 4.2.3
Warning: package 'lubridate' was built under R version 4.2.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.1     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Pathing
library(here)
here() starts at C:/Users/Kai/Documents/School/Colleges/UGA/MPH Year/Spring 2023/Modern Data Analysis/kaichen-MADA-portfolio
# Model Handling
library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
✔ broom        1.0.4     ✔ rsample      1.1.1
✔ dials        1.1.0     ✔ tune         1.0.1
✔ infer        1.0.4     ✔ workflows    1.1.3
✔ modeldata    1.1.0     ✔ workflowsets 1.0.0
✔ parsnip      1.0.4     ✔ yardstick    1.1.0
✔ recipes      1.0.5     
Warning: package 'broom' was built under R version 4.2.3
Warning: package 'modeldata' was built under R version 4.2.3
Warning: package 'parsnip' was built under R version 4.2.3
Warning: package 'recipes' was built under R version 4.2.3
Warning: package 'workflows' was built under R version 4.2.3
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter()   masks stats::filter()
✖ recipes::fixed()  masks stringr::fixed()
✖ dplyr::lag()      masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step()   masks stats::step()
• Use suppressPackageStartupMessages() to eliminate package startup messages
library(rpart)

Attaching package: 'rpart'

The following object is masked from 'package:dials':

    prune
library(vip)
Warning: package 'vip' was built under R version 4.2.3

Attaching package: 'vip'

The following object is masked from 'package:utils':

    vi
# Visualizations
library(ggplot2)

Load Data

# Path to Cleaned Data
cleaned_data_path <- here("fluanalysis", "data", "cleaned_data_March.rds")

# Reading in Cleaned Data from March (Categorical Variables Have Already Been Modified)
clean_data_March <- readRDS(cleaned_data_path)

Data Setup

Set Seed for Reproducibility

set.seed(123)

Split Data

# Split Data
data_division <- initial_split(clean_data_March, prop = 70/100, strata = BodyTemp)

# Create Data Frames from Split Data
training_data <- training(data_division)
testing_data <- testing(data_division)

Cross-Validation

post_cv <- vfold_cv(training_data, v = 5, repeats = 5, strata = BodyTemp)

Recipe for Data and Fitting

Set Model to Linear Regression

linear_model <- linear_reg() %>% set_engine("lm")

Workflow

# Recipe
linear_recipe <- recipe(BodyTemp ~ ., data = training_data)

# Make Workflow
linear_workflow <- workflow() %>%
  add_model(linear_model) %>%
  add_recipe(linear_recipe)

Null Model Setup

# Current Null Model
current_null_model <- null_model() %>% 
  set_engine("parsnip") %>% 
  set_mode("regression")

# Null Model Recipe & Workflow
null_recipe <- recipe(BodyTemp ~., data = training_data) %>%
  step_zv(all_predictors())
null_workflow <- workflow() %>%
  add_model(current_null_model) %>%
  add_recipe(null_recipe)

extract_parameter_set_dials(null_workflow)
Collection of 0 parameters for tuning

[1] identifier type       object    
<0 rows> (or 0-length row.names)
# Training Data: RMSE
fitted_null <- null_workflow %>%
  fit(data = training_data)
## Check Fit
fitted_null %>%
  extract_fit_parsnip() %>%
  tidy()
# A tibble: 1 × 1
  value
  <dbl>
1  98.9
## Augmentation
augmented_null <- augment(fitted_null, training_data)
augmented_null %>%
  select(BodyTemp, .pred) %>%
  rmse(BodyTemp, .pred)
# A tibble: 1 × 3
  .metric .estimator .estimate
  <chr>   <chr>          <dbl>
1 rmse    standard        1.21
# Test Data: RMSE
## Augmentation
augmented_null_test <- augment(fitted_null, testing_data)
augmented_null_test %>%
  select(BodyTemp, .pred) %>%
  rmse(BodyTemp, .pred)
# A tibble: 1 × 3
  .metric .estimator .estimate
  <chr>   <chr>          <dbl>
1 rmse    standard        1.16

Model Tuning and Fitting

Tree

# Model Specification
tune_spec <- 
  decision_tree(
    cost_complexity = tune(),
    tree_depth = tune()
  ) %>%
  set_engine("rpart") %>%
  set_mode("regression")

# Grid Creation
tree_grid <- grid_regular(
  cost_complexity(),
  tree_depth(),
  levels = 5
)

# Tree Workflow
tree_workflow <- workflow() %>%
  add_model(tune_spec) %>%
  add_formula(BodyTemp ~.)

tree_res <- tree_workflow %>%
  tune_grid(resamples = post_cv, grid = tree_grid)
! Fold1, Repeat1: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold2, Repeat1: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold3, Repeat1: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold4, Repeat1: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold5, Repeat1: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold1, Repeat2: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold2, Repeat2: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold3, Repeat2: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold4, Repeat2: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold5, Repeat2: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold1, Repeat3: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold2, Repeat3: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold3, Repeat3: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold4, Repeat3: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold5, Repeat3: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold1, Repeat4: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold2, Repeat4: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold3, Repeat4: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold4, Repeat4: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold5, Repeat4: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold1, Repeat5: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold2, Repeat5: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold3, Repeat5: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold4, Repeat5: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
! Fold5, Repeat5: internal:
  There was 1 warning in `dplyr::summarise()`.
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 1`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 4`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 8`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 11`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
  ℹ In argument: `.estimate = metric_fn(truth = BodyTemp, estimate = .pr...
    = na_rm)`.
  ℹ In group 1: `cost_complexity = 0.1`, `tree_depth = 15`.
  Caused by warning:
  ! A correlation computation is required, but `estimate` is constant an...
# Collect Metrics
tree_res %>% collect_metrics() %>%
  mutate(tree_depth = factor(tree_depth)) %>%
  ggplot(aes(cost_complexity, mean, color = tree_depth)) + 
  geom_line(linewidth = 1.5, alpha = 0.6) +
  geom_point(size = 2) + 
  facet_wrap(~ .metric, scales = "free", nrow = 2) +
  scale_color_viridis_d(option = "plasma", begin = .9, end = 0)
Warning: Removed 5 rows containing missing values (`geom_line()`).
Warning: Removed 5 rows containing missing values (`geom_point()`).

# Show and Select From the Best
tree_res %>% show_best("rmse")
# A tibble: 5 × 8
  cost_complexity tree_depth .metric .estimator  mean     n std_err .config     
            <dbl>      <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>       
1    0.0000000001          1 rmse    standard    1.19    25  0.0181 Preprocesso…
2    0.0000000178          1 rmse    standard    1.19    25  0.0181 Preprocesso…
3    0.00000316            1 rmse    standard    1.19    25  0.0181 Preprocesso…
4    0.000562              1 rmse    standard    1.19    25  0.0181 Preprocesso…
5    0.0000000001          4 rmse    standard    1.19    25  0.0185 Preprocesso…
best_tree <- tree_res %>% select_best("rmse")

final_workflow <- tree_workflow %>% finalize_workflow(best_tree)

final_fit <- final_workflow %>% fit(training_data)

# To Be Removed:
# final_fit <- final_workflow %>% last_fit(data_division)

The best tree model, as determined in R, has an RMSE of 1.189. The null model has an RMSE of 1.209.

Tree Model Evaluation

# Model Predictions from Tuned Model vs Actual Outcomes
tree_predictions <- predict(final_fit, training_data)
dummy_column <- c(1:nrow(tree_predictions))
tree_predictions <- cbind(training_data, tree_predictions) %>% 
  select(BodyTemp, .pred) %>% 
  cbind(dummy_column)
tree_predictions <- tree_predictions %>% pivot_longer(BodyTemp:.pred, names_to = "Group", values_to = "Temperature")
## Graph
ggplot(tree_predictions, aes(x = dummy_column, y = Temperature, group = Group, color = Group)) + geom_point() + geom_jitter() + theme(
  axis.title.x = element_blank(), 
  axis.text.x = element_blank(), 
  axis.ticks.x = element_blank())

# Plot Residuals
tree_residuals <- final_fit %>%
  augment(training_data) %>%
  select(.pred, BodyTemp) %>%
  mutate(.resid = BodyTemp - .pred)
## Graph
ggplot(tree_residuals, aes(x = BodyTemp, y = .resid)) + geom_point() + geom_hline(yintercept = 0, color = "black", linewidth = 1)

Lasso Model

# Set Aside Data for Validation
val_set <- validation_split(training_data, strata = BodyTemp, prop = 0.80)

# Build Model for Penalized Linear Regression
lr_mod <- linear_reg(penalty = tune(), mixture = 1) %>%
  set_engine("glmnet")
## Recipe
lr_workflow <- workflow() %>% add_model(lr_mod) %>% add_recipe(linear_recipe)

# Tuning Grid
lr_reg_grid <- tibble(penalty = 10^seq(-4, -1, length.out = 30))
lr_reg_grid %>% top_n(-5)
Selecting by penalty
# A tibble: 5 × 1
   penalty
     <dbl>
1 0.0001  
2 0.000127
3 0.000161
4 0.000204
5 0.000259
lr_reg_grid %>% top_n(5)
Selecting by penalty
# A tibble: 5 × 1
  penalty
    <dbl>
1  0.0386
2  0.0489
3  0.0621
4  0.0788
5  0.1   
lr_res <- lr_workflow %>%
  tune_grid(val_set, grid = lr_reg_grid, 
            control = control_grid(save_pred = TRUE), 
            metrics = metric_set(rmse))
Warning: package 'glmnet' was built under R version 4.2.3
lr_res %>% collect_metrics() %>% 
  ggplot(aes(penalty, mean)) + geom_point() +
  geom_line()

# Check Performance
top_models <- lr_res %>% show_best("rmse", n = 15) %>%
  arrange(penalty)
top_models %>% arrange(mean)
# A tibble: 15 × 7
   penalty .metric .estimator  mean     n std_err .config              
     <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
 1 0.1     rmse    standard    1.23     1      NA Preprocessor1_Model30
 2 0.0788  rmse    standard    1.23     1      NA Preprocessor1_Model29
 3 0.0621  rmse    standard    1.24     1      NA Preprocessor1_Model28
 4 0.0489  rmse    standard    1.25     1      NA Preprocessor1_Model27
 5 0.0386  rmse    standard    1.26     1      NA Preprocessor1_Model26
 6 0.0304  rmse    standard    1.27     1      NA Preprocessor1_Model25
 7 0.0240  rmse    standard    1.27     1      NA Preprocessor1_Model24
 8 0.0189  rmse    standard    1.28     1      NA Preprocessor1_Model23
 9 0.0149  rmse    standard    1.29     1      NA Preprocessor1_Model22
10 0.0117  rmse    standard    1.29     1      NA Preprocessor1_Model21
11 0.00924 rmse    standard    1.29     1      NA Preprocessor1_Model20
12 0.00728 rmse    standard    1.30     1      NA Preprocessor1_Model19
13 0.00574 rmse    standard    1.30     1      NA Preprocessor1_Model18
14 0.00452 rmse    standard    1.30     1      NA Preprocessor1_Model17
15 0.00356 rmse    standard    1.30     1      NA Preprocessor1_Model16
# Select Best Model (Following Example in Tutorial)
lr_best <- lr_res %>% collect_metrics() %>% arrange(penalty) %>% slice(12)
lr_best
# A tibble: 1 × 7
  penalty .metric .estimator  mean     n std_err .config              
    <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
1 0.00137 rmse    standard    1.31     1      NA Preprocessor1_Model12

The best LASSO model, as determined by R, has RMSE = 1.226. Again, the null model has RMSE = 1.209.

LASSO Model Evaluation

# Setup of New Dataframe for Model Evaluation
lr_res %>% select_best("rmse")
# A tibble: 1 × 2
  penalty .config              
    <dbl> <chr>                
1     0.1 Preprocessor1_Model30
LASSO_Predictions <- lr_res$.predictions[[1]] %>% filter(.config == "Preprocessor1_Model30") %>% relocate(.pred, .after = BodyTemp)
dummy_column <- c(1:nrow(LASSO_Predictions))
LASSO_Predictions <- cbind(LASSO_Predictions, dummy_column) %>%
  mutate(.resid = BodyTemp - .pred)


# Plot Residuals
ggplot(LASSO_Predictions, aes(x = BodyTemp, y = .resid)) + geom_point() + geom_jitter() + geom_hline(yintercept = 0, color = "black", linewidth = 1)

# Dataframe Modification for Predicted vs Original Values
LASSO_Predictions <- LASSO_Predictions %>% pivot_longer(BodyTemp:.pred, names_to = "Group", values_to = "Temperature")
## Graph
ggplot(LASSO_Predictions, aes(x = dummy_column, y = Temperature, group = Group, color = Group)) + geom_point() + geom_jitter() + theme(
  axis.title.x = element_blank(), 
  axis.text.x = element_blank(), 
  axis.ticks.x = element_blank())

Random Forest

# How Many Cores?
cores <- parallel::detectCores()

# Enable Parallel Processing
rf_mod <- rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>%
  set_engine("ranger", num.threads = cores) %>%
  set_mode("regression")

# Establish Workflow
rf_workflow <- workflow() %>% add_model(rf_mod) %>% add_recipe(linear_recipe)

extract_parameter_set_dials(rf_mod)
Collection of 2 parameters for tuning

 identifier  type    object
       mtry  mtry nparam[?]
      min_n min_n nparam[+]

Model parameters needing finalization:
   # Randomly Selected Predictors ('mtry')

See `?dials::finalize` or `?dials::update.parameters` for more information.
# Generate 25 Candidate Models
rf_res <- rf_workflow %>% tune_grid(val_set, grid = 25, 
                                    control = control_grid(save_pred =  TRUE),
                                    metrics = metric_set(rmse))
i Creating pre-processing data to finalize unknown parameter: mtry
rf_res %>% show_best(metric = "rmse")
# A tibble: 5 × 8
   mtry min_n .metric .estimator  mean     n std_err .config              
  <int> <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
1     2    18 rmse    standard    1.24     1      NA Preprocessor1_Model14
2     3    26 rmse    standard    1.24     1      NA Preprocessor1_Model24
3     7    31 rmse    standard    1.26     1      NA Preprocessor1_Model08
4     3    13 rmse    standard    1.26     1      NA Preprocessor1_Model01
5     4    20 rmse    standard    1.26     1      NA Preprocessor1_Model22
autoplot(rf_res)

# Select Best Model
rf_best <- rf_res %>% select_best(metric = "rmse")

rf_rmse <- rf_res %>% 
  collect_predictions() %>% 
  rmse(BodyTemp, .pred) %>% 
  mutate(model = "Random Forest")

# Final Model
last_rf_mod <- rand_forest(mtry = 8, min_n = 7, trees = 1000) %>%
  set_engine("ranger", num.cores = cores, importance = "impurity") %>%
  set_mode("regression")

last_rf_workflow <- rf_workflow %>% update_model(last_rf_mod)

last_rf_fit <- last_rf_workflow %>% last_fit(data_division)
! train/test split: preprocessor 1/1, model 1/1: Unused arguments: num.cores
last_rf_fit %>% collect_metrics()
# A tibble: 2 × 4
  .metric .estimator .estimate .config             
  <chr>   <chr>          <dbl> <chr>               
1 rmse    standard     1.23    Preprocessor1_Model1
2 rsq     standard     0.00466 Preprocessor1_Model1
last_rf_fit %>% extract_fit_parsnip() %>% vip(num_features = 20)

The random forest model, according to R, has RMSE = 1.226. The null model has RMSE = 1.209.

Model Evaluation for Random Forest

# Model Evaluation Dataframe Setup
last_rf_fit %>% select_best("rmse")
# A tibble: 1 × 2
  .workflow  .config             
  <list>     <chr>               
1 <workflow> Preprocessor1_Model1
random_forest_eval <- last_rf_fit$.predictions[[1]] %>% 
  filter(.config == "Preprocessor1_Model1") %>% 
  relocate(.pred, .after = BodyTemp)
dummy_column <- c(1:nrow(random_forest_eval))
random_forest_eval <- cbind(random_forest_eval, dummy_column) %>% 
  mutate(.resid = BodyTemp - .pred)

# Plot Residuals
ggplot(random_forest_eval, aes(x = BodyTemp, y = .resid)) + 
  geom_point() + geom_hline(yintercept = 0, color = "black", linewidth = 1)

# Model Predictions from Tuned Model vs Actual Outcomes
random_forest_eval <- random_forest_eval %>% 
  pivot_longer(BodyTemp:.pred, names_to = "Group", values_to = "Temperature")
## Graph
ggplot(random_forest_eval, aes(x = dummy_column, y = Temperature, group = Group, color = Group)) + geom_point() + geom_jitter() + theme(
  axis.title.x = element_blank(), 
  axis.text.x = element_blank(), 
  axis.ticks.x = element_blank())

Model Selection & Final Evaluation

Based on the results, I have chosen the tree model as the best model of the 3 since it is the only one that has a lower RMSE than the null model (meaning that it outperforms the null). The LASSO and random forest models, in contrast, have higher RMSEs than the null (and underperform). However, it is noteworthy that

# Check Performance on Test Data
selected_model <- final_workflow %>% fit(testing_data)
selected_model %>% extract_fit_parsnip()
parsnip model object

n= 222 

node), split, n, deviance, yval
      * denotes terminal node

1) root 222 300.44290 98.93198  
  2) SubjectiveFever_Yes< 0.5 64  46.65938 98.59688 *
  3) SubjectiveFever_Yes>=0.5 158 243.68540 99.06772 *
## RMSE
selected_augment <- augment(selected_model, testing_data) %>% 
  select(BodyTemp, .pred)
selected_augment %>% rmse(BodyTemp, .pred)
# A tibble: 1 × 3
  .metric .estimator .estimate
  <chr>   <chr>          <dbl>
1 rmse    standard        1.14
# Set Up Final Model Dataframe for Evaluation
dummy_column <- c(1:nrow(selected_augment))
selected_augment <- selected_augment %>% 
  cbind(dummy_column) %>% 
  mutate(.resid = BodyTemp - .pred)

# Plot Residuals
ggplot(selected_augment, aes(x = BodyTemp, y = .resid)) + geom_point() + geom_hline(yintercept = 0, color = "black", linewidth = 1)

# Model Predictions from Tuned Model vs Actual Outcomes
selected_augment <- selected_augment %>% pivot_longer(BodyTemp:.pred, names_to = "Group", values_to = "Temperature")
## Graph
ggplot(selected_augment, aes(x = dummy_column, y = Temperature, group = Group, color = Group)) + geom_point() + geom_jitter() + theme(
  axis.title.x = element_blank(), 
  axis.text.x = element_blank(), 
  axis.ticks.x = element_blank())

As the RMSE from the model is 1.143, it performed better than the null model, but only just (RMSE = 1.163).