AE 19: Version your housing model

Suggested answers

Application exercise
Answers
Modified

November 18, 2024

Load the data

library(tidyverse)

housing <- read_csv(file = "data/tompkins-home-sales-geocoded.csv")
glimpse(housing)
Rows: 1,270
Columns: 12
$ sold_date    <date> 2022-09-12, 2022-09-12, 2022-09-12, 2022-09-13, 2022-07-…
$ price        <dbl> 340000, 390000, 625500, 246600, 172000, 205000, 230000, 2…
$ beds         <dbl> 2, 4, 2, 2, NA, 2, 5, 5, 3, 5, 3, 2, 2, 4, 3, 5, 4, 3, 4,…
$ baths        <dbl> 3.0, 3.0, 3.0, 1.5, NA, 1.0, 2.0, 2.0, 2.5, 4.0, 1.0, 1.5…
$ area         <dbl> 1864, 3252, 1704, 1264, 2644, 820, 2900, 2364, 2016, 2882…
$ lot_size     <dbl> 4.50000000, 0.33999082, 65.00000000, 0.21000918, 0.130004…
$ year_built   <dbl> 1999, 1988, 1988, 1953, 1870, 1932, 1850, 1985, 1984, 200…
$ hoa_month    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ town         <chr> "Newfield", "Ithaca", "Dryden", "Ithaca", "Dryden", "Itha…
$ municipality <chr> "Unincorporated", "Unincorporated", "Unincorporated", "It…
$ long         <dbl> -76.59488, -76.45546, -76.35953, -76.52435, -76.29872, -7…
$ lat          <dbl> 42.38609, 42.47046, 42.43971, 42.45208, 42.49046, 42.4273…

Build a model

  • Log transform the price variable
  • Split into training/test set
library(tidymodels)

set.seed(123)
housing_split <- housing |>
  mutate(price = log10(price)) |>
  initial_split(prop = 0.8)

housing_train <- training(housing_split)
housing_test <- testing(housing_split)

Train a linear regression model:

housing_fit <-
  workflow(
    price ~ beds + baths + area + year_built,
    linear_reg()
  ) |>
  fit(data = housing_train)

Create a deployable model object

library(vetiver)
v <- vetiver_model(
  model = housing_fit,
  model_name = "tompkins-housing"
)
v

── tompkins-housing ─ <bundled_workflow> model for deployment 
A lm regression modeling workflow using 4 features
# create a vetiver model with a custom description
v <- vetiver_model(
  model = housing_fit,
  model_name = "tompkins-housing",
  description = "A linear regression model to predict logged sale price of homes in Tompkins County"
)
v

── tompkins-housing ─ <bundled_workflow> model for deployment 
A linear regression model to predict logged sale price of homes in Tompkins
County using 4 features

Pin your model

library(pins)

board <- board_temp()
board |> vetiver_pin_write(v)
# retrieve your model metadata
board |> pin_meta("tompkins-housing")
List of 13
 $ file       : chr "tompkins-housing.rds"
 $ file_size  : 'fs_bytes' int 55.2K
 $ pin_hash   : chr "02bc535b5fd3f6c8"
 $ type       : chr "rds"
 $ title      : chr "tompkins-housing: a pinned list"
 $ description: chr "A linear regression model to predict logged sale price of homes in Tompkins County"
 $ tags       : NULL
 $ urls       : NULL
 $ created    : POSIXct[1:1], format: "2024-11-19 12:40:07"
 $ api_version: int 1
 $ user       :List of 2
  ..$ required_pkgs: chr [1:3] "parsnip" "stats" "workflows"
  ..$ renv_lock    : NULL
 $ name       : chr "tompkins-housing"
 $ local      :List of 3
  ..$ dir    : 'fs_path' chr "/var/folders/qq/43tglwzs4kzcmwhp0tzcrnx00000gp/T/RtmpEu5itE/pins-55754cbeb53e/tompkins-housing/20241119T174007Z-02bc5"
  ..$ url    : NULL
  ..$ version: chr "20241119T174007Z-02bc5"

Store a new version

Train your model with a new algorithm:

housing_fit <-
  workflow(
    price ~ beds + baths + area + year_built + town,
    linear_reg()
  ) |>
  fit(data = housing_train)

Store this new model as a new version of the same pin:

v <- vetiver_model(model = housing_fit, model_name = "tompkins-housing", versioned = TRUE)
board |> vetiver_pin_write(v)

What versions do you have?

board |> pin_versions("tompkins-housing")
# A tibble: 2 × 3
  version                created             hash 
  <chr>                  <dttm>              <chr>
1 20241119T174007Z-02bc5 2024-11-19 12:40:07 02bc5
2 20241119T174007Z-b6f09 2024-11-19 12:40:07 b6f09

Create a new {vetiver} model

Fit a random forest model

rf_rec <- recipe(price ~ beds + baths + area + year_built + town, data = housing_train) |>
  step_impute_mean(all_numeric_predictors()) |>
  step_impute_mode(all_nominal_predictors())

housing_fit <- workflow() |>
  add_recipe(rf_rec) |>
  add_model(rand_forest(trees = 200, mode = "regression")) |>
  fit(data = housing_train)

Store your model:

v <- vetiver_model(housing_fit, model_name = "tompkins-housing", versioned = TRUE)
board |> vetiver_pin_write(v)

Model Card

Open the Model Card template in RStudio by choosing “File” ➡️ “New File” ➡️ “R Markdown” ➡️ “From Template” ➡️ “Vetiver Model Card”.

Create a vetiver REST API

Call your new API endpoints

Run your API in the background

Create a new plumber API R script:

vetiver_write_plumber(board, "tompkins-housing")

Then run plumber-run.R as a background job. This will allow you to run the API locally and still access the console.

Return predictions from your model API:

url <- "http://127.0.0.1:5331/predict"
endpoint <- vetiver_endpoint(url)
predict(endpoint, slice_sample(housing_test, n = 10))

Optional: try /metadata or /ping here:

library(httr2)

url <- "http://127.0.0.1:5331/ping"

request(url) |>
  req_perform() |>
  resp_body_json()

Acknowledgments

sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.4.1 (2024-06-14)
 os       macOS Sonoma 14.6.1
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/New_York
 date     2024-11-19
 pandoc   3.4 @ /usr/local/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package      * version    date (UTC) lib source
 archive        1.1.9      2024-09-12 [1] CRAN (R 4.4.1)
 backports      1.5.0      2024-05-23 [1] CRAN (R 4.4.0)
 bit            4.0.5      2022-11-15 [1] CRAN (R 4.3.0)
 bit64          4.0.5      2020-08-30 [1] CRAN (R 4.3.0)
 broom        * 1.0.6      2024-05-17 [1] CRAN (R 4.4.0)
 bundle         0.1.1      2023-09-09 [1] CRAN (R 4.4.0)
 butcher        0.3.4      2024-04-11 [1] CRAN (R 4.4.0)
 class          7.3-22     2023-05-03 [1] CRAN (R 4.4.0)
 cli            3.6.3      2024-06-21 [1] CRAN (R 4.4.0)
 codetools      0.2-20     2024-03-31 [1] CRAN (R 4.4.1)
 crayon         1.5.3      2024-06-20 [1] CRAN (R 4.4.0)
 data.table     1.15.4     2024-03-30 [1] CRAN (R 4.3.1)
 dials        * 1.3.0      2024-07-30 [1] CRAN (R 4.4.0)
 DiceDesign     1.10       2023-12-07 [1] CRAN (R 4.3.1)
 dichromat      2.0-0.1    2022-05-02 [1] CRAN (R 4.3.0)
 digest         0.6.35     2024-03-11 [1] CRAN (R 4.3.1)
 dplyr        * 1.1.4      2023-11-17 [1] CRAN (R 4.3.1)
 ellipsis       0.3.2      2021-04-29 [1] CRAN (R 4.3.0)
 evaluate       0.24.0     2024-06-10 [1] CRAN (R 4.4.0)
 fansi          1.0.6      2023-12-08 [1] CRAN (R 4.3.1)
 farver         2.1.2      2024-05-13 [1] CRAN (R 4.3.3)
 fastmap        1.2.0      2024-05-15 [1] CRAN (R 4.4.0)
 forcats      * 1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
 foreach        1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
 fs             1.6.4      2024-04-25 [1] CRAN (R 4.4.0)
 furrr          0.3.1      2022-08-15 [1] CRAN (R 4.3.0)
 future         1.33.2     2024-03-26 [1] CRAN (R 4.3.1)
 future.apply   1.11.2     2024-03-28 [1] CRAN (R 4.3.1)
 generics       0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
 ggplot2      * 3.5.1      2024-04-23 [1] CRAN (R 4.3.1)
 globals        0.16.3     2024-03-08 [1] CRAN (R 4.3.1)
 glue           1.8.0      2024-09-30 [1] CRAN (R 4.4.1)
 gower          1.0.1      2022-12-22 [1] CRAN (R 4.3.0)
 GPfit          1.0-8      2019-02-08 [1] CRAN (R 4.3.0)
 gtable         0.3.5      2024-04-22 [1] CRAN (R 4.3.1)
 hardhat        1.4.0      2024-06-02 [1] CRAN (R 4.4.0)
 here           1.0.1      2020-12-13 [1] CRAN (R 4.3.0)
 hms            1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
 htmltools      0.5.8.1    2024-04-04 [1] CRAN (R 4.3.1)
 htmlwidgets    1.6.4      2023-12-06 [1] CRAN (R 4.3.1)
 infer        * 1.0.7      2024-03-25 [1] CRAN (R 4.3.1)
 ipred          0.9-14     2023-03-09 [1] CRAN (R 4.3.0)
 iterators      1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
 jsonlite       1.8.9      2024-09-20 [1] CRAN (R 4.4.1)
 knitr          1.47       2024-05-29 [1] CRAN (R 4.4.0)
 lattice        0.22-6     2024-03-20 [1] CRAN (R 4.4.0)
 lava           1.8.0      2024-03-05 [1] CRAN (R 4.3.1)
 lhs            1.1.6      2022-12-17 [1] CRAN (R 4.3.0)
 lifecycle      1.0.4      2023-11-07 [1] CRAN (R 4.3.1)
 listenv        0.9.1      2024-01-29 [1] CRAN (R 4.3.1)
 lubridate    * 1.9.3      2023-09-27 [1] CRAN (R 4.3.1)
 magrittr       2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
 MASS           7.3-61     2024-06-13 [1] CRAN (R 4.4.0)
 Matrix         1.7-0      2024-03-22 [1] CRAN (R 4.4.0)
 modeldata    * 1.4.0      2024-06-19 [1] CRAN (R 4.4.0)
 modelenv       0.1.1      2023-03-08 [1] CRAN (R 4.3.0)
 nnet           7.3-19     2023-05-03 [1] CRAN (R 4.4.0)
 parallelly     1.37.1     2024-02-29 [1] CRAN (R 4.3.1)
 parsnip      * 1.2.1      2024-03-22 [1] CRAN (R 4.3.1)
 pillar         1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
 pins         * 1.3.0      2023-11-09 [1] CRAN (R 4.4.0)
 pkgconfig      2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
 prodlim        2023.08.28 2023-08-28 [1] CRAN (R 4.3.0)
 purrr        * 1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
 R6             2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
 ranger         0.16.0     2023-11-12 [1] CRAN (R 4.3.1)
 rappdirs       0.3.3      2021-01-31 [1] CRAN (R 4.3.0)
 RColorBrewer   1.1-3      2022-04-03 [1] CRAN (R 4.3.0)
 Rcpp           1.0.13     2024-07-17 [1] CRAN (R 4.4.0)
 readr        * 2.1.5      2024-01-10 [1] CRAN (R 4.3.1)
 recipes      * 1.0.10     2024-02-18 [1] CRAN (R 4.3.1)
 rlang          1.1.4      2024-06-04 [1] CRAN (R 4.3.3)
 rmarkdown      2.27       2024-05-17 [1] CRAN (R 4.4.0)
 rpart          4.1.23     2023-12-05 [1] CRAN (R 4.4.0)
 rprojroot      2.0.4      2023-11-05 [1] CRAN (R 4.3.1)
 rsample      * 1.2.1      2024-03-25 [1] CRAN (R 4.3.1)
 rstudioapi     0.17.0     2024-10-16 [1] CRAN (R 4.4.1)
 scales       * 1.3.0.9000 2024-11-14 [1] Github (r-lib/scales@ee03582)
 sessioninfo    1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
 stringi        1.8.4      2024-05-06 [1] CRAN (R 4.3.1)
 stringr      * 1.5.1      2023-11-14 [1] CRAN (R 4.3.1)
 survival       3.7-0      2024-06-05 [1] CRAN (R 4.4.0)
 tibble       * 3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
 tidymodels   * 1.2.0      2024-03-25 [1] CRAN (R 4.3.1)
 tidyr        * 1.3.1      2024-01-24 [1] CRAN (R 4.3.1)
 tidyselect     1.2.1      2024-03-11 [1] CRAN (R 4.3.1)
 tidyverse    * 2.0.0      2023-02-22 [1] CRAN (R 4.3.0)
 timechange     0.3.0      2024-01-18 [1] CRAN (R 4.3.1)
 timeDate       4032.109   2023-12-14 [1] CRAN (R 4.3.1)
 tune         * 1.2.1      2024-04-18 [1] CRAN (R 4.3.1)
 tzdb           0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
 utf8           1.2.4      2023-10-22 [1] CRAN (R 4.3.1)
 vctrs          0.6.5      2023-12-01 [1] CRAN (R 4.3.1)
 vetiver      * 0.2.5      2023-11-16 [1] CRAN (R 4.4.0)
 vroom          1.6.5      2023-12-05 [1] CRAN (R 4.3.1)
 withr          3.0.2      2024-10-28 [1] CRAN (R 4.4.1)
 workflows    * 1.1.4      2024-02-19 [1] CRAN (R 4.4.0)
 workflowsets * 1.1.0      2024-03-21 [1] CRAN (R 4.3.1)
 xfun           0.45       2024-06-16 [1] CRAN (R 4.4.0)
 yaml           2.3.10     2024-07-26 [1] CRAN (R 4.4.0)
 yardstick    * 1.3.1      2024-03-21 [1] CRAN (R 4.3.1)

 [1] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library

──────────────────────────────────────────────────────────────────────────────