Computing body mass index

Here we compute the body mass index (BMI) formula:

body_mass_index = body_weight / (body_height * body_height)

The formula assumes weight is in kilograms, and height in meters.
Furthermore, this vignette assumes:

Setup Phea

First we connect to the server and call setup_phea(). Calling setup_phea() is optional, but it allows us to use the convenience functions slqt() and sql0().

suppressPackageStartupMessages(library(dplyr))
library(phea)

# Connect to SQL server.
dbcon <- DBI::dbConnect(
    RPostgres::Postgres(),
    host = 'localhost',
    port = 8765,
    dbname = 'fort',
    user = cred$pg$user,
    password = cred$pg$pass)

# Provide the connection to Phea so we can use the sqlt() and sql0() shorthands.
setup_phea(dbcon, 'cdm_new_york3')

Create components

make_component() receives a lazy table (dbplyr/dplyr interface) and returns a component to use in calculate_formula().
We must specify which column (in the lazy table) is the timestamp, and which column is the patient ID. Notice that we don’t use quotes ““.

# Weight component
# Loinc 29463-7 Body weight, OMOP concept ID 3025315
weight <- sqlt(measurement) |>
  filter(measurement_concept_id == 3025315) |>
  make_component(
    .ts = measurement_datetime,
    .pid = person_id)

# Height component
# Loinc 8302-2 Body height, OMOP concept ID 3036277
height <- sqlt(measurement) |>
  filter(measurement_concept_id == 3036277) |>
  make_component(
    .ts = measurement_datetime,
    .pid = person_id)

sqlt(measurement) returns a reference (lazy table) to table MEASUREMENT in cdm_new_york3. dplyr::filter() selects the desired records using a SQL WHERE statement.

Calculate the phenotype

Each component goes inside a list in the call to calculate_formula(). The names of the list items become the names available to use in the formula. For example, if you want the value_as_number column from component height, use height_value_as_number.

Notice that, because the body height records in cdm_new_york3 are in centimeters, we use a separate formula to produce height_in_meters.

# BMI phenotype
bmi <- calculate_formula(
  components = list(
    weight = weight,
    height = height),
  fml = list(
    height_in_meters = 'height_value_as_number / 100',
    bmi = 'weight_value_as_number / (height_in_meters * height_in_meters)'))

kable(head_shot(bmi, blind = TRUE))
row_id pid ts window height_value_as_number weight_value_as_number height_in_meters bmi
1 1 2005-05-13 00:00:00 189.3 98.3 1.893 27.43167
12 1 2006-05-12 00:00:00 189.3 98.3 1.893 27.43167
13 1 2008-05-16 00:00:00 189.3 98.3 1.893 27.43167
14 1 2011-05-20 00:00:00 189.3 98.3 1.893 27.43167
5 1 2014-03-07 00:00:00 189.3 98.3 1.893 27.43167
6 1 2016-03-11 00:00:00 189.3 98.3 1.893 27.43167
7 1 2018-03-16 00:00:00 189.3 98.3 1.893 27.43167
18 1 2020-03-20 00:00:00 189.3 98.3 1.893 27.43167
19 1 2022-02-11 00:00:00 189.3 98.3 1.893 27.43167
20 1 2022-03-25 00:00:00 189.3 98.3 1.893 27.43167

The conversion from centimeters to meters could also be done using mutate() before creating the component, or using parentheses in the bmi formula. Either approach would be equivalent.

head_shot() is a convenience function to peek into the first rows of the lazy table.

Plot the phenotype result

phea_plot() creates an interactive chart using the plotly library. It requires specifying the ID of one patient.

# Plot BMI phenotype for patient whose pid = 2105
bmi |>
  select(-height_value_as_number) |> # We don't need to plot column height_value_as_number
  rename(weight_in_kg = weight_value_as_number) |> # Rename column
  phea_plot(pid = 2105)
#> Collecting lazy table, done. (turn this message off with `verbose = FALSE`)

Computing aggregates from a phenotype result

The result bmi returned by calculate_formula() is a lazy table. We can manipulate it further to produce a few aggregates:

# See summary statistics
bmi |>
  group_by(pid) |>
  summarise(
    min_bmi = min(bmi, na.rm = TRUE),
    avg_bmi = mean(bmi, na.rm = TRUE),
    max_bmi = max(bmi, na.rm = TRUE),
    n_distinct_bmi = n_distinct(bmi)) |>
  arrange(desc(n_distinct_bmi), avg_bmi) |>
  head_shot(blind = TRUE) |>
  kable()
pid min_bmi avg_bmi max_bmi n_distinct_bmi
11872 34.82498 36.72123 38.41281 81
22242 27.40980 28.91259 30.29330 78
22906 27.58095 29.46034 30.35070 76
4191 27.40982 28.92541 30.28767 75
20198 27.53843 29.35831 30.34387 73
5274 27.42844 29.46449 30.36412 72
18843 27.14441 29.09229 30.09168 71
10583 27.57425 29.14332 30.36699 69
1665 27.32200 29.18407 30.09031 69
8055 27.60844 28.86689 30.37667 68

Obtain SQL query that computes the phenotype

To see the SQL query underlying the phenotype, use helper function code_shot(), or dbplyr::sql_render(), or the .clip_sql option in calculate_formula().

code_shot(bmi)
SELECT
  *,
  weight_value_as_number / (height_in_meters * height_in_meters) AS "bmi"
FROM (
  SELECT
    "row_id",
    "pid",
    "ts",
    "window",
    "height_value_as_number",
    "weight_value_as_number",
    height_value_as_number / 100 AS "height_in_meters"
  FROM (
    SELECT
      *,
      greatest(weight_ts, height_ts) - least(weight_ts, height_ts) AS "window",
      last_value(row_id) over (partition by "pid", "ts") AS "phea_ts_row"
    FROM (
      SELECT
        "row_id",
        "pid",
        "ts",
        MAX("weight_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_1") AS "weight_value_as_number",
        MAX("weight_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_2") AS "weight_ts",
        MAX("height_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_3") AS "height_value_as_number",
        MAX("height_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_4") AS "height_ts"
      FROM (
        SELECT
          *,
          SUM(CASE WHEN (("weight_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_1",
          SUM(CASE WHEN (("weight_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_2",
          SUM(CASE WHEN (("height_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_3",
          SUM(CASE WHEN (("height_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_4"
        FROM (
          SELECT
            row_number() over () AS "row_id",
            "pid",
            "ts",
            last_value(case when "name" = 'sh6icwld' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "weight_value_as_number",
            last_value(case when "name" = 'sh6icwld' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "weight_ts",
            last_value(case when "name" = 'fx43o2rt' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "height_value_as_number",
            last_value(case when "name" = 'fx43o2rt' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "height_ts"
          FROM (
            (
              SELECT
                'sh6icwld' AS "name",
                "person_id" AS "pid",
                "measurement_datetime" AS "ts",
                "value_as_number"
              FROM "cdm_new_york3"."measurement"
              WHERE ("measurement_concept_id" = 3025315.0)
            )
            UNION ALL
            (
              SELECT
                'fx43o2rt' AS "name",
                "person_id" AS "pid",
                "measurement_datetime" AS "ts",
                "value_as_number"
              FROM "cdm_new_york3"."measurement"
              WHERE ("measurement_concept_id" = 3036277.0)
            )
          ) "q01"
        ) "q02"
      ) "q03"
    ) "q04"
  ) "q05"
  WHERE ("row_id" = "phea_ts_row")
) "q06"