This vignette assumes a SQL server at localhost
(we
use PostgreSQL), with an OMOP Common Data Model v5.4 in schema
cdm_new_york3
containing patient records. The patient
records shown in this example are synthetic data from Synthea Patient
Generator.
library(knitr)
library(credx) # local package, contains dbConnectFort
# library(phea)
::load_all()
devtools#> ℹ Loading phea
suppressPackageStartupMessages(library(dplyr))
<- dbConnectFort()
dbcon
# Provide the connection to Phea so we can use the sqlt() and sql0() shorthands.
setup_phea(dbcon, 'cdm_new_york3')
Say we want to find patients whose weight has increased by more than 50% in the last 2 years to 3 years. The formula for the number we want is very simple:
weight_drop = (weight_current - weight_previous) / weight_previous
If weight_drop
is ever ≥0.5, that means an increase of
≥50%. But unlike in other formulas, here the component
weight_previous
is not the most recently available weight
record for the given patient, at the given point in time.
While the record source is the same as
weight_current
, the component is different. When
we call make_component()
, we need to specify the time frame
using the delay
option (and optionally the
window
as well).
# Weight records
# Loinc 29463-7 Body weight, concept ID 3025315
= sqlt(measurement) |>
weight_record_source filter(measurement_concept_id == 3025315) |>
make_record_source(
ts = 'measurement_datetime',
pid = 'person_id')
# Most recent weight count record
= make_component(weight_record_source)
weight_current
# weight count record at least 24 hours older than phenotype date, up to limit of 48 hours
= make_component(weight_record_source,
weight_previous delay = "'2 years'::interval",
window = "'3 years'::interval")
delay
picks the most recent record that is
at least as old as the specified amount of
time.
window
limits how old the record is
allowed to be.
Both are expressed in SQL language, including the automatic parsing,
by the server, of terms such as '2 years'
,
'2.5 hours'
, '1 week'
, etc.
If no such record exists for the patient, at the given point in time,
the value is NULL
.
To facilitate interpretation of our phenotype, let us add the age of
the patient in the results. We create a component from table
PERSON
using birth_datetime
as the
timestamp.
<- sqlt(person) |>
age_component make_component(
ts = 'birth_datetime',
pid = 'person_id')
When you supply a lazy table to make_component()
, it
silently creates a record source from it, then uses that record source
to create a component. You must provide varibales .ts
e
.pid
for that.
Armed with the components, we call
calculate_formula()
:
<- calculate_formula(
phen components = list(
person = age_component,
weight_current = weight_current,
weight_previous = weight_previous),
fml = list(
patient_age = 'extract(year from age(ts, person_birth_datetime))',
weight_change = '(weight_current_value_as_number - weight_previous_value_as_number) /
weight_previous_value_as_number'),
export = list(
'weight_current_measurement_datetime',
'weight_previous_measurement_datetime'))
The phen
object is a lazy table, which means it
is a SQL query. We can manipulate it to obtain only the patients that
had a ≥50% increase in body weight at least once in their history.
<- phen |>
summary_per_patient filter(weight_change >= 0.5) |> # Only rows where increase is ≥50%
group_by(pid) |> # Group by patient ID
summarise(
# Keep timestamp of earliest increase of ≥50% for the patient
date_at_first_50p_incr = min(ts, na.rm = TRUE),
# Keep patient age at earliest increase of ≥50%
age_at_first_50p_incr = min(patient_age, na.rm = TRUE)) |>
collect() # Execute query and download data from the server to local memory
kable(head(summary_per_patient, 6))
pid | date_at_first_50p_incr | age_at_first_50p_incr |
---|---|---|
5 | 2014-03-09 | 2 |
7 | 2009-03-07 | 2 |
8 | 2015-02-18 | 2 |
9 | 2018-09-20 | 2 |
15 | 2018-06-29 | 2 |
23 | 2007-05-30 | 2 |
As you can see, very young patients (kids) exhibit pairs of body weight measurements 2 to 3 years apart showing an increase of 50% or more.
Let us pick a patient at random and plot their results:
# Sample one patient at random, from among those with ≥50% weight increase at least once in their history.
<- sample(summary_per_patient$pid, 1)
sample_patient message('Sampled patient: ', sample_patient)
#> Sampled patient: 12810
# Plot the phenotype for the sampled patient.
|>
phen select(
-weight_current_measurement_datetime,
-weight_previous_measurement_datetime,
-person_birth_datetime) |>
phea_plot(pid = sample_patient,
titles_font_size = 8)
#> Collecting lazy table, done. (turn this message off with `verbose` or `.verbose` in setup_phea())
Notice that the chart for
weight_previous_value_as_number
tracks
weight_current_measurement_datetime
, but 2-3 years
behind.
To see the SQL query underlying the phenotype, use helper function
code_shot()
, or dbplyr::sql_render()
, or the
.clip_sql
option in calculate_formula()
.
code_shot(phen)
SELECT
*,
- weight_previous_value_as_number) /
(weight_current_value_as_number AS "weight_change"
weight_previous_value_as_number FROM (
SELECT *, extract(year from age(ts, person_birth_datetime)) AS "patient_age"
FROM (
SELECT
"phea_row_id",
"pid",
"ts",
greatest("person_ts", "weight_current_ts", "weight_previous_ts") - least("person_ts", "weight_current_ts", "weight_previous_ts") AS "window",
"person_birth_datetime",
"weight_current_value_as_number",
"weight_previous_value_as_number",
"weight_current_measurement_datetime",
"weight_previous_measurement_datetime"
FROM (
SELECT
row_number() OVER (ORDER BY "pid", "ts") AS "phea_row_id",
"pid",
"ts",
case when "name" = 389647 then "birth_datetime" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "person_birth_datetime",
phea_last_value_ignore_nulls(case when "name" = 389647 then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "person_ts",
phea_last_value_ignore_nulls(case when "name" = 980332 then "measurement_datetime" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "weight_current_measurement_datetime",
phea_last_value_ignore_nulls(case when "name" = 980332 then "value_as_number" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "weight_current_value_as_number",
phea_last_value_ignore_nulls(case when "name" = 980332 then "ts" else null end) OVER (PARTITION BY "pid" ORDER BY "ts" ROWS UNBOUNDED PRECEDING) AS "weight_current_ts",
phea_last_value_ignore_nulls(case when "name" = 980332 then "measurement_datetime" else null end) over (partition by "pid" order by "ts" range between '3 years'::interval preceding and '2 years'::interval preceding) AS "weight_previous_measurement_datetime",
phea_last_value_ignore_nulls(case when "name" = 980332 then "value_as_number" else null end) over (partition by "pid" order by "ts" range between '3 years'::interval preceding and '2 years'::interval preceding) AS "weight_previous_value_as_number",
phea_last_value_ignore_nulls(case when "name" = 980332 then "ts" else null end) over (partition by "pid" order by "ts" range between '3 years'::interval preceding and '2 years'::interval preceding) AS "weight_previous_ts"
phea_last_value_ignore_nulls(FROM (
(SELECT *, NULL AS "measurement_datetime", NULL AS "value_as_number"
FROM (
SELECT
389647 AS "name",
"person_id" AS "pid",
"birth_datetime" AS "ts",
"birth_datetime"
FROM "cdm_new_york3"."person"
"q01"
)
)UNION ALL
(SELECT
"name",
"pid",
"ts",
NULL AS "birth_datetime",
"measurement_datetime",
"value_as_number"
FROM (
SELECT
980332 AS "name",
"person_id" AS "pid",
"measurement_datetime" AS "ts",
"measurement_datetime",
"value_as_number"
FROM "cdm_new_york3"."measurement"
WHERE ("measurement_concept_id" = 3025315.0)
"q02"
)
)"q03"
) "q04"
) "q05"
) "q06" )