Here we compute the body mass index (BMI) formula:
body_mass_index = body_weight / (body_height * body_height)
The formula assumes weight is in kilograms, and height in
meters.
Furthermore, this vignette assumes:
localhost. This example uses
PostgreSQL.cdm_new_york3 in that server.MEASUREMENT, with
measurement_concept_id of 3025315,
Loinc 29463-7 Body weight.MEASUREMENT, with
measurement_concept_id of 3036277,
Loinc 8302-2 Body height.First we connect to the server and call setup_phea().
Calling setup_phea() is optional, but it allows us to use
the convenience functions slqt() and
sql0().
suppressPackageStartupMessages(library(dplyr))
library(phea)
# Connect to SQL server.
dbcon <- DBI::dbConnect(
RPostgres::Postgres(),
host = 'localhost',
port = 8765,
dbname = 'fort',
user = cred$pg$user,
password = cred$pg$pass)
# Provide the connection to Phea so we can use the sqlt() and sql0() shorthands.
setup_phea(dbcon, 'cdm_new_york3')make_component() receives a lazy table
(dbplyr/dplyr interface) and returns a
component to use in calculate_formula().
We must specify which column (in the lazy table) is the timestamp, and
which column is the patient ID. Notice that we don’t use quotes ““.
# Weight component
# Loinc 29463-7 Body weight, OMOP concept ID 3025315
weight <- sqlt(measurement) |>
filter(measurement_concept_id == 3025315) |>
make_component(
.ts = measurement_datetime,
.pid = person_id)
# Height component
# Loinc 8302-2 Body height, OMOP concept ID 3036277
height <- sqlt(measurement) |>
filter(measurement_concept_id == 3036277) |>
make_component(
.ts = measurement_datetime,
.pid = person_id)sqlt(measurement) returns a reference (lazy table) to
table MEASUREMENT in cdm_new_york3.
dplyr::filter() selects the desired records using a SQL
WHERE statement.
Each component goes inside a list in the call to
calculate_formula(). The names of the list items become the
names available to use in the formula. For example, if you want the
value_as_number column from component height,
use height_value_as_number.
Notice that, because the body height records in cdm_new_york3 are in
centimeters, we use a separate formula to produce
height_in_meters.
# BMI phenotype
bmi <- calculate_formula(
components = list(
weight = weight,
height = height),
fml = list(
height_in_meters = 'height_value_as_number / 100',
bmi = 'weight_value_as_number / (height_in_meters * height_in_meters)'))
kable(head_shot(bmi, blind = TRUE))| row_id | pid | ts | window | height_value_as_number | weight_value_as_number | height_in_meters | bmi |
|---|---|---|---|---|---|---|---|
| 1 | 1 | 2005-05-13 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 12 | 1 | 2006-05-12 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 13 | 1 | 2008-05-16 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 14 | 1 | 2011-05-20 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 5 | 1 | 2014-03-07 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 6 | 1 | 2016-03-11 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 7 | 1 | 2018-03-16 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 18 | 1 | 2020-03-20 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 19 | 1 | 2022-02-11 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
| 20 | 1 | 2022-03-25 | 00:00:00 | 189.3 | 98.3 | 1.893 | 27.43167 |
The conversion from centimeters to meters could also be done using
mutate() before creating the component, or using
parentheses in the bmi formula. Either approach would be
equivalent.
head_shot() is a convenience function to peek into the
first rows of the lazy table.
phea_plot() creates an interactive chart using the
plotly library. It requires specifying the ID of one
patient.
# Plot BMI phenotype for patient whose pid = 2105
bmi |>
select(-height_value_as_number) |> # We don't need to plot column height_value_as_number
rename(weight_in_kg = weight_value_as_number) |> # Rename column
phea_plot(pid = 2105)
#> Collecting lazy table, done. (turn this message off with `verbose = FALSE`)The result bmi returned by
calculate_formula() is a lazy table. We can manipulate it
further to produce a few aggregates:
# See summary statistics
bmi |>
group_by(pid) |>
summarise(
min_bmi = min(bmi, na.rm = TRUE),
avg_bmi = mean(bmi, na.rm = TRUE),
max_bmi = max(bmi, na.rm = TRUE),
n_distinct_bmi = n_distinct(bmi)) |>
arrange(desc(n_distinct_bmi), avg_bmi) |>
head_shot(blind = TRUE) |>
kable()| pid | min_bmi | avg_bmi | max_bmi | n_distinct_bmi |
|---|---|---|---|---|
| 11872 | 34.82498 | 36.72123 | 38.41281 | 81 |
| 22242 | 27.40980 | 28.91259 | 30.29330 | 78 |
| 22906 | 27.58095 | 29.46034 | 30.35070 | 76 |
| 4191 | 27.40982 | 28.92541 | 30.28767 | 75 |
| 20198 | 27.53843 | 29.35831 | 30.34387 | 73 |
| 5274 | 27.42844 | 29.46449 | 30.36412 | 72 |
| 18843 | 27.14441 | 29.09229 | 30.09168 | 71 |
| 10583 | 27.57425 | 29.14332 | 30.36699 | 69 |
| 1665 | 27.32200 | 29.18407 | 30.09031 | 69 |
| 8055 | 27.60844 | 28.86689 | 30.37667 | 68 |
To see the SQL query underlying the phenotype, use helper function
code_shot(), or dbplyr::sql_render(), or the
.clip_sql option in calculate_formula().
code_shot(bmi)SELECT
*,
weight_value_as_number / (height_in_meters * height_in_meters) AS "bmi"
FROM (
SELECT
"row_id",
"pid",
"ts",
"window",
"height_value_as_number",
"weight_value_as_number",
height_value_as_number / 100 AS "height_in_meters"
FROM (
SELECT
*,
greatest(weight_ts, height_ts) - least(weight_ts, height_ts) AS "window",
last_value(row_id) over (partition by "pid", "ts") AS "phea_ts_row"
FROM (
SELECT
"row_id",
"pid",
"ts",
MAX("weight_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_1") AS "weight_value_as_number",
MAX("weight_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_2") AS "weight_ts",
MAX("height_value_as_number") OVER (PARTITION BY "pid", "..dbplyr_partion_3") AS "height_value_as_number",
MAX("height_ts") OVER (PARTITION BY "pid", "..dbplyr_partion_4") AS "height_ts"
FROM (
SELECT
*,
SUM(CASE WHEN (("weight_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_1",
SUM(CASE WHEN (("weight_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_2",
SUM(CASE WHEN (("height_value_as_number" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_3",
SUM(CASE WHEN (("height_ts" IS NULL)) THEN 0 ELSE 1 END) OVER (PARTITION BY "pid" ORDER BY "pid", "ts" ROWS UNBOUNDED PRECEDING) AS "..dbplyr_partion_4"
FROM (
SELECT
row_number() over () AS "row_id",
"pid",
"ts",
last_value(case when "name" = 'sh6icwld' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "weight_value_as_number",
last_value(case when "name" = 'sh6icwld' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "weight_ts",
last_value(case when "name" = 'fx43o2rt' then "value_as_number" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "height_value_as_number",
last_value(case when "name" = 'fx43o2rt' then "ts" else null end) over (partition by "pid", "name" order by "ts" rows between unbounded preceding and current row) AS "height_ts"
FROM (
(
SELECT
'sh6icwld' AS "name",
"person_id" AS "pid",
"measurement_datetime" AS "ts",
"value_as_number"
FROM "cdm_new_york3"."measurement"
WHERE ("measurement_concept_id" = 3025315.0)
)
UNION ALL
(
SELECT
'fx43o2rt' AS "name",
"person_id" AS "pid",
"measurement_datetime" AS "ts",
"value_as_number"
FROM "cdm_new_york3"."measurement"
WHERE ("measurement_concept_id" = 3036277.0)
)
) "q01"
) "q02"
) "q03"
) "q04"
) "q05"
WHERE ("row_id" = "phea_ts_row")
) "q06"