AE 20: More about APIs and Docker

Application exercise
Modified

November 19, 2024

Load the data

library(tidyverse)
library(pins)
library(vetiver)
library(googleCloudStorageR)

housing <- read_csv(file = "data/tompkins-home-sales-geocoded.csv")
glimpse(housing)

Build a model

  • Log transform the price variable
  • Split into training/test set
library(tidymodels)

set.seed(123)
housing_split <- housing |>
  mutate(price = log10(price)) |>
  initial_split(prop = 0.8)

housing_train <- training(housing_split)
housing_test <- testing(housing_split)

Train a random forest model:

housing_fit <-
  workflow(
    price ~ beds + baths + area + year_built,
    linear_reg()
  ) |>
  fit(data = housing_train)

rf_rec <- recipe(price ~ beds + baths + area + year_built + town, data = housing_train) |>
  step_impute_mean(all_numeric_predictors()) |>
  step_impute_mode(all_nominal_predictors())

housing_fit <- workflow() |>
  add_recipe(rf_rec) |>
  add_model(rand_forest(trees = 200, mode = "regression")) |>
  fit(data = housing_train)

Create a Docker container using a local board

Pin model to a local board

v <- vetiver_model(model = ______, model_name = ______)
v

board <- ______(versioned = TRUE)

board |>
  ______(v)

Create Docker artifacts

vetiver_prepare_docker(
  ______,
  ______,
  docker_args = list(port = 8080)
)

Build and test Docker container

docker build -t housing .
docker run -p 8080:8080 housing

Test the API

endpoint <- ______("http://0.0.0.0:8080/predict")
predict(endpoint, housing_test)

Compute model metrics and store in pin

housing_test_metrics <- augment(housing_fit, housing_test) |>
  metrics(truth = price, estimate = .pred)

v <- vetiver_model(model = housing_fit,
                   model_name = "tompkins-housing",
                   metadata = ______)
v

board |> vetiver_pin_write(v)

Retrieve model metrics

extracted_metrics <- board |>
  pin_meta("tompkins-housing") |>
  pluck(______, ______) |>
  as_tibble()

extracted_metrics

What else might you want to store as model metadata? How or when might you use model metadata?

Add response here.

Add a new endpoint

Use the {DALEX} package with {vetiver} to create a new endpoint that returns the Shapley values for a given observation.

Create explainer object

library(DALEX)
library(DALEXtra)

# create explainer object
explainer_tidymodels <- explain(housing_fit, data = housing_train, y = housing_train$price)

# pin to board
board |> ______(explainer_tidymodels, "tompkins-housing-explainer")

Acknowledgments