AE 20: More about APIs and Docker

Suggested answers

Application exercise
Answers
Modified

November 18, 2024

Load the data

library(tidyverse)
library(pins)
library(vetiver)

housing <- read_csv(file = "data/tompkins-home-sales-geocoded.csv")
glimpse(housing)
Rows: 1,270
Columns: 12
$ sold_date    <date> 2022-09-12, 2022-09-12, 2022-09-12, 2022-09-13, 2022-07-…
$ price        <dbl> 340000, 390000, 625500, 246600, 172000, 205000, 230000, 2…
$ beds         <dbl> 2, 4, 2, 2, NA, 2, 5, 5, 3, 5, 3, 2, 2, 4, 3, 5, 4, 3, 4,…
$ baths        <dbl> 3.0, 3.0, 3.0, 1.5, NA, 1.0, 2.0, 2.0, 2.5, 4.0, 1.0, 1.5…
$ area         <dbl> 1864, 3252, 1704, 1264, 2644, 820, 2900, 2364, 2016, 2882…
$ lot_size     <dbl> 4.50000000, 0.33999082, 65.00000000, 0.21000918, 0.130004…
$ year_built   <dbl> 1999, 1988, 1988, 1953, 1870, 1932, 1850, 1985, 1984, 200…
$ hoa_month    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ town         <chr> "Newfield", "Ithaca", "Dryden", "Ithaca", "Dryden", "Itha…
$ municipality <chr> "Unincorporated", "Unincorporated", "Unincorporated", "It…
$ long         <dbl> -76.59488, -76.45546, -76.35953, -76.52435, -76.29872, -7…
$ lat          <dbl> 42.38609, 42.47046, 42.43971, 42.45208, 42.49046, 42.4273…

Build a model

  • Log transform the price variable
  • Split into training/test set
library(tidymodels)

set.seed(123)
housing_split <- housing |>
  mutate(price = log10(price)) |>
  initial_split(prop = 0.8)

housing_train <- training(housing_split)
housing_test <- testing(housing_split)

Train a random forest model:

housing_fit <-
  workflow(
    price ~ beds + baths + area + year_built,
    linear_reg()
  ) |>
  fit(data = housing_train)

rf_rec <- recipe(price ~ beds + baths + area + year_built + town, data = housing_train) |>
  step_impute_mean(all_numeric_predictors()) |>
  step_impute_mode(all_nominal_predictors())

housing_fit <- workflow() |>
  add_recipe(rf_rec) |>
  add_model(rand_forest(trees = 200, mode = "regression")) |>
  fit(data = housing_train)

Create a Docker container using a local board

Pin model to a local board

v <- vetiver_model(model = housing_fit, model_name = "tompkins-housing")
v

── tompkins-housing ─ <bundled_workflow> model for deployment 
A ranger regression modeling workflow using 5 features
board <- board_local(versioned = TRUE)

board |>
  vetiver_pin_write(v)

board |>
  pin_meta("tompkins-housing")
List of 13
 $ file       : chr "tompkins-housing.rds"
 $ file_size  : 'fs_bytes' int 762K
 $ pin_hash   : chr "48c642342a237df7"
 $ type       : chr "rds"
 $ title      : chr "tompkins-housing: a pinned list"
 $ description: chr "A ranger regression modeling workflow"
 $ tags       : NULL
 $ urls       : NULL
 $ created    : POSIXct[1:1], format: "2024-11-25 13:15:13"
 $ api_version: int 1
 $ user       :List of 2
  ..$ required_pkgs: chr [1:4] "parsnip" "ranger" "recipes" "workflows"
  ..$ renv_lock    : NULL
 $ name       : chr "tompkins-housing"
 $ local      :List of 3
  ..$ dir    : 'fs_path' chr "~/Library/Application Support/pins/tompkins-housing/20241125T181513Z-48c64"
  ..$ url    : NULL
  ..$ version: chr "20241125T181513Z-48c64"

Create Docker artifacts

vetiver_prepare_docker(
  board,
  "tompkins-housing",
  docker_args = list(port = 8080)
)

Build and test Docker container

docker build -t housing .
docker run -p 8080:8080 housing

Create a Docker container using a cloud board

Pin to Google Cloud

In .Renviron:

GCS_AUTH_FILE="service-auth.json"
library(googleCloudStorageR)

board <- board_gcs(bucket = "info-4940-models", prefix = "bcs88/")

board |>
  vetiver_pin_write(v)

board |>
  pin_meta("tompkins-housing")
List of 13
 $ file       : chr "tompkins-housing.rds"
 $ file_size  : 'fs_bytes' int 762K
 $ pin_hash   : chr "48c642342a237df7"
 $ type       : chr "rds"
 $ title      : chr "tompkins-housing: a pinned list"
 $ description: chr "A ranger regression modeling workflow"
 $ tags       : NULL
 $ urls       : NULL
 $ created    : POSIXct[1:1], format: "2024-11-25 13:15:15"
 $ api_version: int 1
 $ user       :List of 2
  ..$ required_pkgs: chr [1:4] "parsnip" "ranger" "recipes" "workflows"
  ..$ renv_lock    : NULL
 $ name       : chr "tompkins-housing"
 $ local      :List of 3
  ..$ dir    : 'fs_path' chr "~/Library/Caches/pins/gcs-info-4940-models/tompkins-housing/20241125T181515Z-48c64"
  ..$ url    : NULL
  ..$ version: chr "20241125T181515Z-48c64"

Create Docker artifacts

vetiver_prepare_docker(
  board,
  "tompkins-housing",
  docker_args = list(port = 8080)
)

Build and test Docker container

docker build -t housing .
docker run -p 8080:8080 housing

Modify Dockerfile to include GCS authentication

  • Modify plumber.R to include GCS authentication

  • Rebuild Dockerfile using vetiver_write_docker() function to add {googleCloudStorageR} dependency

    vetiver_write_docker(
      board,
      "tompkins-housing",
      port = 8080,
      additional_pkgs = required_pkgs(board)
    )
  • Copy service-auth.json to same directory as Dockerfile

  • Modify Dockerfile to correctly incorporate service-auth.json. After the run apt-get step, add the following lines

    COPY service-auth.json /opt/ml/service-auth.json
    ENV GCS_AUTH_FILE="/opt/ml/service-auth.json"

    Build and test Docker container

docker build -t housing .
docker run -p 8080:8080 housing

Test the API

endpoint <- vetiver_endpoint("http://0.0.0.0:8080/predict")
predict(endpoint, housing_test)

Compute model metrics and store in pin

housing_test_metrics <- augment(housing_fit, housing_test) |>
  metrics(truth = price, estimate = .pred)

v <- vetiver_model(model = housing_fit,
                   model_name = "tompkins-housing",
                   metadata = list(metrics = housing_test_metrics))
v

── tompkins-housing ─ <bundled_workflow> model for deployment 
A ranger regression modeling workflow using 5 features
board |> vetiver_pin_write(v)

Retrieve model metrics

extracted_metrics <- board |>
  pin_meta("tompkins-housing") |>
  pluck("user", "metrics") |>
  as_tibble()

extracted_metrics
# A tibble: 3 × 3
  .metric .estimator .estimate
  <chr>   <chr>          <dbl>
1 rmse    standard       0.174
2 rsq     standard       0.519
3 mae     standard       0.125

What else might you want to store as model metadata? How or when might you use model metadata?

Add response here.

Add a new endpoint

Use the {DALEX} package with {vetiver} to create a new endpoint that returns the Shapley values for a given observation.

Create explainer object

library(DALEX)
library(DALEXtra)

# create explainer object
explainer_tidymodels <- explain(housing_fit, data = housing_train, y = housing_train$price)
Preparation of a new explainer is initiated
  -> model label       :  workflow  (  default  )
  -> data              :  1016  rows  12  cols 
  -> data              :  tibble converted into a data.frame 
  -> target variable   :  1016  values 
  -> predict function  :  yhat.workflow  will be used (  default  )
  -> predicted values  :  No value for predict function target column. (  default  )
  -> model_info        :  package tidymodels , ver. 1.2.0 , task regression (  default  ) 
  -> predicted values  :  numerical, min =  4.628737 , mean =  5.497674 , max =  6.302674  
  -> residual function :  difference between y and yhat (  default  )
  -> residuals         :  numerical, min =  -0.6623709 , mean =  -0.0003453197 , max =  0.3718951  
  A new explainer has been created!  
# pin to board
board |> pin_write(explainer_tidymodels, "tompkins-housing-explainer")

plumber.R

# Generated by the vetiver package; edit with care

library(pins)
library(plumber)
library(rapidoc)
library(vetiver)
library(googleCloudStorageR)
library(DALEXtra)
library(dplyr)

# Packages needed to generate model predictions
if (FALSE) {
  library(parsnip)
  library(ranger)
  library(recipes)
  library(workflows)
}
b <- board_gcs("info-4940-models", prefix = "bcs88/")
v <- vetiver_pin_read(b, "tompkins-housing", version = "20241118T160824Z-48c64")
explainer <- pin_read(b, "tompkins-housing-explainer")

handler_explain <- function(req) {
  new_data <- req$body
  new_data <- vetiver_type_convert(new_data, v$prototype)
  shap <- predict_parts(explainer, new_data, type = "shap", B = 25)
  shap |>
    group_by(variable) |>
    summarize(contribution = mean(contribution))
}

#* @plumber
function(pr) {
  pr |>
    vetiver_api(v) |>
    pr_post(path = "/explain", handler = handler_explain)
}

Acknowledgments

sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.4.1 (2024-06-14)
 os       macOS Sonoma 14.6.1
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/New_York
 date     2024-11-25
 pandoc   3.4 @ /usr/local/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package             * version    date (UTC) lib source
 archive               1.1.9      2024-09-12 [1] CRAN (R 4.4.1)
 askpass               1.2.1      2024-10-04 [1] CRAN (R 4.4.1)
 assertthat            0.2.1      2019-03-21 [1] CRAN (R 4.3.0)
 backports             1.5.0      2024-05-23 [1] CRAN (R 4.4.0)
 bit                   4.0.5      2022-11-15 [1] CRAN (R 4.3.0)
 bit64                 4.0.5      2020-08-30 [1] CRAN (R 4.3.0)
 broom               * 1.0.6      2024-05-17 [1] CRAN (R 4.4.0)
 bundle                0.1.1      2023-09-09 [1] CRAN (R 4.4.0)
 butcher               0.3.4      2024-04-11 [1] CRAN (R 4.4.0)
 cachem                1.1.0      2024-05-16 [1] CRAN (R 4.4.0)
 class                 7.3-22     2023-05-03 [1] CRAN (R 4.4.0)
 cli                   3.6.3      2024-06-21 [1] CRAN (R 4.4.0)
 codetools             0.2-20     2024-03-31 [1] CRAN (R 4.4.1)
 crayon                1.5.3      2024-06-20 [1] CRAN (R 4.4.0)
 curl                  5.2.3      2024-09-20 [1] CRAN (R 4.4.1)
 DALEX               * 2.4.3      2023-01-15 [1] CRAN (R 4.3.0)
 DALEXtra            * 2.3.0      2023-05-26 [1] CRAN (R 4.3.0)
 data.table            1.15.4     2024-03-30 [1] CRAN (R 4.3.1)
 dials               * 1.3.0      2024-07-30 [1] CRAN (R 4.4.0)
 DiceDesign            1.10       2023-12-07 [1] CRAN (R 4.3.1)
 dichromat             2.0-0.1    2022-05-02 [1] CRAN (R 4.3.0)
 digest                0.6.35     2024-03-11 [1] CRAN (R 4.3.1)
 dplyr               * 1.1.4      2023-11-17 [1] CRAN (R 4.3.1)
 ellipsis              0.3.2      2021-04-29 [1] CRAN (R 4.3.0)
 evaluate              0.24.0     2024-06-10 [1] CRAN (R 4.4.0)
 fansi                 1.0.6      2023-12-08 [1] CRAN (R 4.3.1)
 farver                2.1.2      2024-05-13 [1] CRAN (R 4.3.3)
 fastmap               1.2.0      2024-05-15 [1] CRAN (R 4.4.0)
 forcats             * 1.0.0      2023-01-29 [1] CRAN (R 4.3.0)
 foreach               1.5.2      2022-02-02 [1] CRAN (R 4.3.0)
 fs                    1.6.4      2024-04-25 [1] CRAN (R 4.4.0)
 furrr                 0.3.1      2022-08-15 [1] CRAN (R 4.3.0)
 future                1.33.2     2024-03-26 [1] CRAN (R 4.3.1)
 future.apply          1.11.2     2024-03-28 [1] CRAN (R 4.3.1)
 gargle                1.5.2      2023-07-20 [1] CRAN (R 4.3.0)
 generics              0.1.3      2022-07-05 [1] CRAN (R 4.3.0)
 ggplot2             * 3.5.1      2024-04-23 [1] CRAN (R 4.3.1)
 globals               0.16.3     2024-03-08 [1] CRAN (R 4.3.1)
 glue                  1.8.0      2024-09-30 [1] CRAN (R 4.4.1)
 googleAuthR           2.0.2      2024-05-22 [1] CRAN (R 4.4.0)
 googleCloudStorageR * 0.7.0      2021-12-16 [1] CRAN (R 4.4.0)
 gower                 1.0.1      2022-12-22 [1] CRAN (R 4.3.0)
 GPfit                 1.0-8      2019-02-08 [1] CRAN (R 4.3.0)
 gtable                0.3.5      2024-04-22 [1] CRAN (R 4.3.1)
 hardhat               1.4.0      2024-06-02 [1] CRAN (R 4.4.0)
 here                  1.0.1      2020-12-13 [1] CRAN (R 4.3.0)
 hms                   1.1.3      2023-03-21 [1] CRAN (R 4.3.0)
 htmltools             0.5.8.1    2024-04-04 [1] CRAN (R 4.3.1)
 htmlwidgets           1.6.4      2023-12-06 [1] CRAN (R 4.3.1)
 httr                  1.4.7      2023-08-15 [1] CRAN (R 4.3.0)
 infer               * 1.0.7      2024-03-25 [1] CRAN (R 4.3.1)
 ipred                 0.9-14     2023-03-09 [1] CRAN (R 4.3.0)
 iterators             1.0.14     2022-02-05 [1] CRAN (R 4.3.0)
 jsonlite              1.8.9      2024-09-20 [1] CRAN (R 4.4.1)
 knitr                 1.47       2024-05-29 [1] CRAN (R 4.4.0)
 later                 1.3.2      2023-12-06 [1] CRAN (R 4.3.1)
 lattice               0.22-6     2024-03-20 [1] CRAN (R 4.4.0)
 lava                  1.8.0      2024-03-05 [1] CRAN (R 4.3.1)
 lhs                   1.1.6      2022-12-17 [1] CRAN (R 4.3.0)
 lifecycle             1.0.4      2023-11-07 [1] CRAN (R 4.3.1)
 listenv               0.9.1      2024-01-29 [1] CRAN (R 4.3.1)
 lubridate           * 1.9.3      2023-09-27 [1] CRAN (R 4.3.1)
 magrittr              2.0.3      2022-03-30 [1] CRAN (R 4.3.0)
 MASS                  7.3-61     2024-06-13 [1] CRAN (R 4.4.0)
 Matrix                1.7-0      2024-03-22 [1] CRAN (R 4.4.0)
 memoise               2.0.1      2021-11-26 [1] CRAN (R 4.3.0)
 mime                  0.12       2021-09-28 [1] CRAN (R 4.3.0)
 modeldata           * 1.4.0      2024-06-19 [1] CRAN (R 4.4.0)
 modelenv              0.1.1      2023-03-08 [1] CRAN (R 4.3.0)
 nnet                  7.3-19     2023-05-03 [1] CRAN (R 4.4.0)
 openssl               2.2.2      2024-09-20 [1] CRAN (R 4.4.1)
 parallelly            1.37.1     2024-02-29 [1] CRAN (R 4.3.1)
 parsnip             * 1.2.1      2024-03-22 [1] CRAN (R 4.3.1)
 pillar                1.9.0      2023-03-22 [1] CRAN (R 4.3.0)
 pins                * 1.3.0      2023-11-09 [1] CRAN (R 4.4.0)
 pkgconfig             2.0.3      2019-09-22 [1] CRAN (R 4.3.0)
 plumber               1.2.2      2024-03-26 [1] CRAN (R 4.4.0)
 prodlim               2023.08.28 2023-08-28 [1] CRAN (R 4.3.0)
 promises              1.3.0      2024-04-05 [1] CRAN (R 4.3.1)
 purrr               * 1.0.2      2023-08-10 [1] CRAN (R 4.3.0)
 R6                    2.5.1      2021-08-19 [1] CRAN (R 4.3.0)
 ranger                0.16.0     2023-11-12 [1] CRAN (R 4.3.1)
 rappdirs              0.3.3      2021-01-31 [1] CRAN (R 4.3.0)
 RColorBrewer          1.1-3      2022-04-03 [1] CRAN (R 4.3.0)
 Rcpp                  1.0.13     2024-07-17 [1] CRAN (R 4.4.0)
 readr               * 2.1.5      2024-01-10 [1] CRAN (R 4.3.1)
 recipes             * 1.0.10     2024-02-18 [1] CRAN (R 4.3.1)
 rlang                 1.1.4      2024-06-04 [1] CRAN (R 4.3.3)
 rmarkdown             2.27       2024-05-17 [1] CRAN (R 4.4.0)
 rpart                 4.1.23     2023-12-05 [1] CRAN (R 4.4.0)
 rprojroot             2.0.4      2023-11-05 [1] CRAN (R 4.3.1)
 rsample             * 1.2.1      2024-03-25 [1] CRAN (R 4.3.1)
 rstudioapi            0.17.0     2024-10-16 [1] CRAN (R 4.4.1)
 scales              * 1.3.0.9000 2024-11-14 [1] Github (r-lib/scales@ee03582)
 sessioninfo           1.2.2      2021-12-06 [1] CRAN (R 4.3.0)
 stringi               1.8.4      2024-05-06 [1] CRAN (R 4.3.1)
 stringr             * 1.5.1      2023-11-14 [1] CRAN (R 4.3.1)
 survival              3.7-0      2024-06-05 [1] CRAN (R 4.4.0)
 swagger               5.17.14.1  2024-06-28 [1] CRAN (R 4.4.0)
 tibble              * 3.2.1      2023-03-20 [1] CRAN (R 4.3.0)
 tidymodels          * 1.2.0      2024-03-25 [1] CRAN (R 4.3.1)
 tidyr               * 1.3.1      2024-01-24 [1] CRAN (R 4.3.1)
 tidyselect            1.2.1      2024-03-11 [1] CRAN (R 4.3.1)
 tidyverse           * 2.0.0      2023-02-22 [1] CRAN (R 4.3.0)
 timechange            0.3.0      2024-01-18 [1] CRAN (R 4.3.1)
 timeDate              4032.109   2023-12-14 [1] CRAN (R 4.3.1)
 tune                * 1.2.1      2024-04-18 [1] CRAN (R 4.3.1)
 tzdb                  0.4.0      2023-05-12 [1] CRAN (R 4.3.0)
 utf8                  1.2.4      2023-10-22 [1] CRAN (R 4.3.1)
 vctrs                 0.6.5      2023-12-01 [1] CRAN (R 4.3.1)
 vetiver             * 0.2.5      2023-11-16 [1] CRAN (R 4.4.0)
 vroom                 1.6.5      2023-12-05 [1] CRAN (R 4.3.1)
 webutils              1.2.0      2023-11-24 [1] CRAN (R 4.4.0)
 withr                 3.0.2      2024-10-28 [1] CRAN (R 4.4.1)
 workflows           * 1.1.4      2024-02-19 [1] CRAN (R 4.4.0)
 workflowsets        * 1.1.0      2024-03-21 [1] CRAN (R 4.3.1)
 xfun                  0.45       2024-06-16 [1] CRAN (R 4.4.0)
 yaml                  2.3.10     2024-07-26 [1] CRAN (R 4.4.0)
 yardstick           * 1.3.1      2024-03-21 [1] CRAN (R 4.3.1)
 zip                   2.3.1      2024-01-27 [1] CRAN (R 4.4.0)

 [1] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library

──────────────────────────────────────────────────────────────────────────────