AE 14: Version your housing model

Suggested answers

Application exercise
Answers
R
Python
Modified

October 22, 2025

Load the data

library(tidyverse)
library(vetiver)

housing <- read_csv(file = "data/tompkins-home-sales.csv")
glimpse(housing)
Rows: 1,225
Columns: 12
$ sold_date    <date> 2022-09-12, 2022-09-12, 2022-09-12, 2022-09-13, 2022-03-…
$ price        <dbl> 340000, 390000, 625500, 246600, 205000, 230000, 246000, 3…
$ beds         <dbl> 2, 4, 2, 2, 2, 5, 5, 3, 5, 3, 2, 2, 4, 3, 5, 4, 3, 4, 3, …
$ baths        <dbl> 3.0, 3.0, 3.0, 1.5, 1.0, 2.0, 2.0, 2.5, 4.0, 1.0, 1.5, 2.…
$ area         <dbl> 1864, 3252, 1704, 1264, 820, 2900, 2364, 2016, 2882, 1246…
$ lot_size     <dbl> 4.50000000, 0.33999082, 65.00000000, 0.21000918, 0.239990…
$ year_built   <dbl> 1999, 1988, 1988, 1953, 1932, 1850, 1985, 1984, 2002, 196…
$ hoa_month    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ town         <chr> "Newfield", "Ithaca", "Dryden", "Ithaca", "Ithaca", "Lans…
$ municipality <chr> "Unincorporated", "Unincorporated", "Unincorporated", "It…
$ long         <dbl> -76.59488, -76.45546, -76.35953, -76.52435, -76.48761, -7…
$ lat          <dbl> 42.38609, 42.47046, 42.43971, 42.45208, 42.42739, 42.6182…
import pandas as pd
housing = pd.read_csv('data/tompkins-home-sales.csv')

Build a model

  • Log transform the price variable
  • Split into training/test set
library(tidymodels)
set.seed(123)

housing_split <- housing |>
  mutate(price = log10(price)) |>
  initial_split(prop = 0.8)

housing_train <- training(housing_split)
housing_test <- testing(housing_split)
from sklearn import model_selection
import numpy as np
np.random.seed(123)
X, y = housing[["beds", "baths", "area", "year_built"]], np.log10(housing["price"])
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y,
    test_size = 0.2
)

Train a linear regression model:

housing_fit <- workflow(
  price ~ beds + baths + area + year_built,
  linear_reg()
) |>
  fit(data = housing_train)
from sklearn import linear_model
housing_fit = linear_model.LinearRegression().fit(X_train, y_train)

Create a deployable model object

library(vetiver)
v <- vetiver_model(housing_fit, "tompkins-housing")
v

── tompkins-housing ─ <bundled_workflow> model for deployment 
A lm regression modeling workflow using 4 features
from vetiver import VetiverModel
v = VetiverModel(model = housing_fit, model_name = "tompkins-housing", prototype_data = X_train)
v.description
'A scikit-learn LinearRegression model'

Customize the description

v <- v <- vetiver_model(
  housing_fit,
  "tompkins-housing",
  description = "Linear regression model to predict housing prices in Tompkins County"
)
v

── tompkins-housing ─ <bundled_workflow> model for deployment 
Linear regression model to predict housing prices in Tompkins County using 4
features
v = VetiverModel(
  model = housing_fit,
  model_name = "tompkins-housing",
  prototype_data = X_train,
  description = "Linear regression model to predict housing prices in Tompkins County"
)
v.description
'Linear regression model to predict housing prices in Tompkins County'

Pin your model

library(pins)

board <- board_temp()
board |> vetiver_pin_write(v)
Creating new version '20251022T124039Z-a2472'
Writing to pin 'tompkins-housing'

Create a Model Card for your published model
• Model Cards provide a framework for transparent, responsible reporting
• Use the vetiver `.Rmd` template as a place to start
board |> pin_meta("tompkins-housing")
List of 13
 $ file       : chr "tompkins-housing.rds"
 $ file_size  : 'fs_bytes' int 51.7K
 $ pin_hash   : chr "a2472abbbb02aa87"
 $ type       : chr "rds"
 $ title      : chr "tompkins-housing: a pinned list"
 $ description: chr "Linear regression model to predict housing prices in Tompkins County"
 $ tags       : NULL
 $ urls       : NULL
 $ created    : POSIXct[1:1], format: "2025-10-22 08:40:39"
 $ api_version: int 1
 $ user       :List of 2
  ..$ required_pkgs: chr [1:3] "parsnip" "stats" "workflows"
  ..$ renv_lock    : NULL
 $ name       : chr "tompkins-housing"
 $ local      :List of 3
  ..$ dir    : 'fs_path' chr "/var/folders/y5/0kt9x9qd2f13mtkqx3w0lh9r0000gr/T/RtmpUdw1BW/pins-11a89721e7b34/tompkins-housing/20251022T124039Z-a2472"
  ..$ url    : NULL
  ..$ version: chr "20251022T124039Z-a2472"
from pins import board_temp
from vetiver import vetiver_pin_write

board = board_temp(allow_pickle_read = True)
vetiver_pin_write(board, v)
Model Cards provide a framework for transparent, responsible reporting. 
 Use the vetiver `.qmd` Quarto template as a place to start, 
 with vetiver.model_card()
Writing pin:
Name: 'tompkins-housing'
Version: 20251022T084039Z-1ee78
board.pin_meta("tompkins-housing")
Meta(title='tompkins-housing: a pinned LinearRegression object', description='Linear regression model to predict housing prices in Tompkins County', created='20251022T084039Z', pin_hash='1ee78d2011464d28', file='tompkins-housing.joblib', file_size=905, type='joblib', api_version=1, version=Version(created=datetime.datetime(2025, 10, 22, 8, 40, 39), hash='1ee78'), tags=None, name='tompkins-housing', user={'user': {}, 'vetiver_meta': {'prototype': '{"beds": 3.0, "baths": 3.0, "area": 3015.0, "year_built": 1932.0}', 'python_version': [3, 13, 6, 'final', 0], 'required_pkgs': ['scikit-learn']}}, local={})

Store a new version

Train your model with a new algorithm:

housing_fit <- workflow(
  price ~ beds + baths + area + year_built + town,
  linear_reg()
) |>
  fit(data = housing_train)
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline

# Define feature columns
numeric_features = ["beds", "baths", "area", "year_built"]
categorical_features = ["town"]

# Preprocessing for numeric and categorical data
preprocessor = ColumnTransformer(
  transformers=[
    ("num", "passthrough", numeric_features),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
  ]
)

# Create pipeline with preprocessor and linear regression
housing_fit = Pipeline(steps=[
  ("preprocessor", preprocessor),
  ("regressor", linear_model.LinearRegression())
])

# Prepare features and target
X = housing[numeric_features + categorical_features]
y = np.log10(housing["price"])

# Split data
X_train, X_test, y_train, y_test = model_selection.train_test_split(
  X, y,
  test_size=0.2
)

# Fit model
housing_fit.fit(X_train, y_train)
Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', 'passthrough',
                                                  ['beds', 'baths', 'area',
                                                   'year_built']),
                                                 ('cat',
                                                  OneHotEncoder(handle_unknown='ignore'),
                                                  ['town'])])),
                ('regressor', LinearRegression())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Store this new model as a new version of the same pin:

v <- vetiver_model(
  model = housing_fit,
  model_name = "tompkins-housing",
  versioned = TRUE
)
board |> vetiver_pin_write(v)
v = VetiverModel(housing_fit, "tompkins-housing", prototype_data = X_train)
vetiver_pin_write(board, v)
Model Cards provide a framework for transparent, responsible reporting. 
 Use the vetiver `.qmd` Quarto template as a place to start, 
 with vetiver.model_card()
Writing pin:
Name: 'tompkins-housing'
Version: 20251022T084040Z-3ea51

What versions do you have?

board |> pin_versions("tompkins-housing")
# A tibble: 2 × 3
  version                created             hash 
  <chr>                  <dttm>              <chr>
1 20251022T124039Z-a2472 2025-10-22 08:40:39 a2472
2 20251022T124040Z-ec322 2025-10-22 08:40:40 ec322
board.pin_versions("tompkins-housing")
              created   hash                 version
0 2025-10-22 08:40:39  1ee78  20251022T084039Z-1ee78
1 2025-10-22 08:40:40  3ea51  20251022T084040Z-3ea51

Create a new vetiver model

Fit a random forest model

rf_rec <- recipe(
  price ~ beds + baths + area + year_built + town,
  data = housing_train
) |>
  step_impute_mean(all_numeric_predictors()) |>
  step_impute_mode(all_nominal_predictors())

housing_fit <- workflow() |>
  add_recipe(rf_rec) |>
  add_model(rand_forest(trees = 200, mode = "regression")) |>
  fit(data = housing_train)
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Define feature columns
numeric_features = ["beds", "baths", "area", "year_built"]
categorical_features = ["town"]

# Create preprocessing steps
numeric_transformer = SimpleImputer(strategy="mean")
categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

# Combine preprocessors
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

# Create pipeline with preprocessor and model
housing_fit = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=200, random_state=123))
])

# Prepare training data with all features
X_train_full = housing.loc[X_train.index, numeric_features + categorical_features]
housing_fit.fit(X_train_full, y_train)
Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', SimpleImputer(),
                                                  ['beds', 'baths', 'area',
                                                   'year_built']),
                                                 ('cat',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='most_frequent')),
                                                                  ('onehot',
                                                                   OneHotEncoder(handle_unknown='ignore'))]),
                                                  ['town'])])),
                ('regressor',
                 RandomForestRegressor(n_estimators=200, random_state=123))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Store your model:

library(pins)
library(vetiver)

board <- board_temp()
v <- vetiver_model(housing_fit, "tompkins-housing", versioned = TRUE)
board |> vetiver_pin_write(v)
from pins import board_temp
from vetiver import vetiver_pin_write

board = board_temp(versioned = True, allow_pickle_read = True)
v = VetiverModel(housing_fit, "tompkins-housing", prototype_data = X_train)
vetiver_pin_write(board, v)

Create a vetiver REST API

from vetiver import VetiverAPI

api = VetiverAPI(v)
api.run()
Running FastAPI from a Quarto document

Quarto uses the Jupyter engine to run Python code blocks, which does not support running asynchronous code directly. To run a FastAPI server within a Quarto document, you can use an asynchronous context.

import asyncio
import uvicorn

app = api.app

if __name__ == "__main__":
    config = uvicorn.Config(app)
    server = uvicorn.Server(config)
    await server.serve()

Call your new API endpoints

Run your API in the background

We will write a standalone script to run the API in the background.

vetiver_write_plumber(
  board = board,
  name = "tompkins-housing",
  file = "plumber.R"
)

To run the R script, switch to the Terminal tab and run the Shell command:

Rscript ae-14-run-plumber.R

This executes the runner script which starts the API in the background. Note the URL and port printed in the terminal. You will need this to execute queries against the API.

from vetiver import write_app
write_app(board = board, pin_name = "tompkins-housing", file = "app.py")

To run the Python script, switch to the Terminal tab and run the Shell command:

uvicorn app:api --port <TODO> --host 127.0.0.1

Replace <TODO> with a random four digit number. This executes the API in the background. Note the URL and port printed in the terminal. You will need this to execute queries against the API.

Return predictions from your model API:

library(vetiver)

url <- "http://127.0.0.1:5850/predict"
endpoint <- vetiver_endpoint(url)
predict(endpoint, slice_sample(housing_test, n = 5))
from vetiver.server import predict, vetiver_endpoint

url = "http://127.0.0.1:5850/predict"
endpoint = vetiver_endpoint(url)
predict(endpoint = endpoint, data = X_test.head(5))

Optional: try /metadata or /ping here:

library(httr2)

url <- "http://127.0.0.1:5850/metadata"

request(url) |>
  req_perform() |>
  resp_body_json()
import requests

url = "http://127.0.0.1:5850/metadata" 
print(requests.get(url).content)

Acknowledgments

sessioninfo::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.5.1 (2025-06-13)
 os       macOS Tahoe 26.0.1
 system   aarch64, darwin20
 ui       X11
 language (EN)
 collate  C.UTF-8
 ctype    C.UTF-8
 tz       America/New_York
 date     2025-10-22
 pandoc   3.6.3 @ /Applications/Positron.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
 quarto   1.7.33 @ /Users/bcs88/Projects/info-4940/course-site/.venv/bin/quarto

─ Packages ───────────────────────────────────────────────────────────────────
 ! package      * version    date (UTC) lib source
 P archive        1.1.12     2025-03-20 [?] CRAN (R 4.5.0)
 P backports      1.5.0      2024-05-23 [?] RSPM (R 4.5.0)
 P bit            4.6.0      2025-03-06 [?] RSPM (R 4.5.0)
 P bit64          4.6.0-1    2025-01-16 [?] RSPM (R 4.5.0)
 P broom        * 1.0.9      2025-07-28 [?] RSPM (R 4.5.0)
 P bundle         0.1.2      2024-11-12 [?] RSPM
 P butcher        0.3.5      2025-03-18 [?] RSPM (R 4.5.1)
 P class          7.3-23     2025-01-01 [?] CRAN (R 4.5.1)
 P cli            3.6.5      2025-04-23 [?] RSPM (R 4.5.0)
 P codetools      0.2-20     2024-03-31 [?] CRAN (R 4.5.1)
 P crayon         1.5.3      2024-06-20 [?] RSPM (R 4.5.0)
 P data.table     1.17.8     2025-07-10 [?] RSPM (R 4.5.0)
 P dials        * 1.4.1      2025-07-29 [?] RSPM
 P DiceDesign     1.10       2023-12-07 [?] RSPM (R 4.5.0)
 P digest         0.6.37     2024-08-19 [?] RSPM (R 4.5.0)
 P dplyr        * 1.1.4      2023-11-17 [?] RSPM (R 4.5.0)
 P ellipsis       0.3.2      2021-04-29 [?] RSPM
 P evaluate       1.0.4      2025-06-18 [?] RSPM (R 4.5.1)
 P farver         2.1.2      2024-05-13 [?] RSPM (R 4.5.0)
 P fastmap        1.2.0      2024-05-15 [?] RSPM (R 4.5.0)
 P forcats      * 1.0.0      2023-01-29 [?] RSPM (R 4.5.0)
 P foreach        1.5.2      2022-02-02 [?] RSPM
 P fs             1.6.6      2025-04-12 [?] RSPM (R 4.5.0)
 P furrr          0.3.1      2022-08-15 [?] RSPM
 P future         1.67.0     2025-07-29 [?] RSPM
 P future.apply   1.20.0     2025-06-06 [?] RSPM
 P generics       0.1.4      2025-05-09 [?] RSPM (R 4.5.0)
 P ggplot2      * 4.0.0      2025-09-11 [?] RSPM
 P globals        0.18.0     2025-05-08 [?] RSPM
 P glue           1.8.0      2024-09-30 [?] RSPM (R 4.5.0)
 P gower          1.0.2      2024-12-17 [?] RSPM
 P GPfit          1.0-9      2025-04-12 [?] RSPM (R 4.5.0)
 P gtable         0.3.6      2024-10-25 [?] RSPM (R 4.5.0)
 P hardhat        1.4.1      2025-01-31 [?] RSPM
 P here           1.0.1      2020-12-13 [?] RSPM (R 4.5.0)
 P hms            1.1.3      2023-03-21 [?] RSPM (R 4.5.0)
 P htmltools      0.5.8.1    2024-04-04 [?] RSPM (R 4.5.0)
 P htmlwidgets    1.6.4      2023-12-06 [?] RSPM (R 4.5.0)
 P infer        * 1.0.9      2025-06-26 [?] RSPM
 P ipred          0.9-15     2024-07-18 [?] RSPM
 P iterators      1.0.14     2022-02-05 [?] RSPM
 P jsonlite       2.0.0      2025-03-27 [?] RSPM (R 4.5.0)
 P knitr          1.50       2025-03-16 [?] RSPM (R 4.5.0)
 P lattice        0.22-7     2025-04-02 [?] CRAN (R 4.5.1)
 P lava           1.8.1      2025-01-12 [?] RSPM
 P lhs            1.2.0      2024-06-30 [?] RSPM (R 4.5.0)
 P lifecycle      1.0.4      2023-11-07 [?] RSPM (R 4.5.0)
 P listenv        0.9.1      2024-01-29 [?] RSPM
 P lubridate    * 1.9.4      2024-12-08 [?] RSPM (R 4.5.0)
 P magrittr       2.0.3      2022-03-30 [?] RSPM (R 4.5.1)
 P MASS           7.3-65     2025-02-28 [?] CRAN (R 4.5.1)
 P Matrix         1.7-3      2025-03-11 [?] CRAN (R 4.5.1)
 P modeldata    * 1.5.0      2025-07-31 [?] RSPM
 P modelenv       0.2.0      2024-10-14 [?] RSPM
 P nnet           7.3-20     2025-01-01 [?] CRAN (R 4.5.1)
 P parallelly     1.45.1     2025-07-24 [?] RSPM
 P parsnip      * 1.3.2      2025-05-28 [?] RSPM
 P pillar         1.11.0     2025-07-04 [?] RSPM (R 4.5.1)
 P pins         * 1.4.1      2025-04-30 [?] RSPM
 P pkgconfig      2.0.3      2019-09-22 [?] RSPM (R 4.5.0)
 P png            0.1-8      2022-11-29 [?] RSPM (R 4.5.0)
 P prodlim        2025.04.28 2025-04-28 [?] RSPM
 P purrr        * 1.1.0      2025-07-10 [?] RSPM (R 4.5.0)
 P R6             2.6.1      2025-02-15 [?] RSPM (R 4.5.0)
 P ranger         0.17.0     2024-11-08 [?] RSPM
 P RColorBrewer   1.1-3      2022-04-03 [?] RSPM (R 4.5.0)
 P Rcpp           1.1.0      2025-07-02 [?] RSPM (R 4.5.0)
 P readr        * 2.1.5      2024-01-10 [?] RSPM (R 4.5.0)
 P recipes      * 1.3.1      2025-05-21 [?] RSPM
   renv           1.1.5      2025-07-24 [1] RSPM (R 4.5.0)
 P reticulate     1.43.0     2025-07-21 [?] CRAN (R 4.5.0)
 P rlang          1.1.6      2025-04-11 [?] RSPM (R 4.5.0)
 P rmarkdown      2.29       2024-11-04 [?] RSPM
 P rpart          4.1.24     2025-01-07 [?] CRAN (R 4.5.1)
 P rprojroot      2.1.0      2025-07-12 [?] RSPM (R 4.5.0)
 P rsample      * 1.3.1      2025-07-29 [?] RSPM
 P rstudioapi     0.17.1     2024-10-22 [?] RSPM (R 4.5.0)
 P S7             0.2.0      2024-11-07 [?] RSPM (R 4.5.0)
 P scales       * 1.4.0      2025-04-24 [?] RSPM (R 4.5.0)
 P sessioninfo    1.2.3      2025-02-05 [?] RSPM (R 4.5.0)
 P sparsevctrs    0.3.4      2025-05-25 [?] RSPM
 P stringi        1.8.7      2025-03-27 [?] RSPM (R 4.5.0)
 P stringr      * 1.5.1      2023-11-14 [?] RSPM (R 4.5.1)
 P survival       3.8-3      2024-12-17 [?] CRAN (R 4.5.1)
 P tibble       * 3.3.0      2025-06-08 [?] RSPM (R 4.5.0)
 P tidymodels   * 1.3.0      2025-02-21 [?] RSPM
 P tidyr        * 1.3.1      2024-01-24 [?] RSPM (R 4.5.0)
 P tidyselect     1.2.1      2024-03-11 [?] RSPM (R 4.5.0)
 P tidyverse    * 2.0.0      2023-02-22 [?] RSPM (R 4.5.0)
 P timechange     0.3.0      2024-01-18 [?] RSPM (R 4.5.0)
 P timeDate       4041.110   2024-09-22 [?] RSPM
 P tune         * 1.3.0      2025-02-21 [?] RSPM
 P tzdb           0.5.0      2025-03-15 [?] RSPM (R 4.5.0)
 P utf8           1.2.6      2025-06-08 [?] RSPM (R 4.5.0)
 P vctrs          0.6.5      2023-12-01 [?] RSPM (R 4.5.0)
 P vetiver      * 0.2.5      2023-11-16 [?] RSPM
 P vroom          1.6.5      2023-12-05 [?] RSPM (R 4.5.1)
 P withr          3.0.2      2024-10-28 [?] RSPM (R 4.5.0)
 P workflows    * 1.2.0      2025-02-19 [?] RSPM
 P workflowsets * 1.1.1      2025-05-27 [?] RSPM
 P xfun           0.52       2025-04-02 [?] RSPM (R 4.5.1)
 P yaml           2.3.10     2024-07-26 [?] RSPM (R 4.5.0)
 P yardstick    * 1.3.2      2025-01-22 [?] RSPM

 [1] /Users/bcs88/Projects/info-4940/course-site/renv/library/macos/R-4.5/aarch64-apple-darwin20
 [2] /Users/bcs88/Library/Caches/org.R-project.R/R/renv/sandbox/macos/R-4.5/aarch64-apple-darwin20/4cd76b74

 * ── Packages attached to the search path.
 P ── Loaded and on-disk path mismatch.

─ Python configuration ───────────────────────────────────────────────────────
 python:         /Users/bcs88/Projects/info-4940/course-site/.venv/bin/python
 libpython:      /Users/bcs88/.local/share/uv/python/cpython-3.13.6-macos-aarch64-none/lib/libpython3.13.dylib
 pythonhome:     /Users/bcs88/Projects/info-4940/course-site/.venv:/Users/bcs88/Projects/info-4940/course-site/.venv
 virtualenv:     /Users/bcs88/Projects/info-4940/course-site/.venv/bin/activate_this.py
 version:        3.13.6 (main, Aug 14 2025, 16:07:26) [Clang 20.1.4 ]
 numpy:          /Users/bcs88/Projects/info-4940/course-site/.venv/lib/python3.13/site-packages/numpy
 numpy_version:  2.3.2
 
 NOTE: Python version was forced by VIRTUAL_ENV

──────────────────────────────────────────────────────────────────────────────