BioSense Introduction for CSTE Members

Download Report

Transcript BioSense Introduction for CSTE Members

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Creating Syndrome Definitions Using RStudio

Tim Hopper

Data Scientist RTI International

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Code Is Available Online

https://gist.github.com/tdhopper/d5939aaf74886143224e/raw/3ae883a25ef078 a5edd2fcced0f0268b34be3d6b/Custom+Syndromes

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Setup

# Connect to TarrantCounty_FP database # Credentials USERNAME <- 'username' PASSWORD <- 'password' HOSTNAME <- 'data3.biosen.se' DBNAME <- 'TarrantCounty_FP' TABLE <- 'TC_Meaningful_Use_Base' # Create database connection con <- dbConnect(dbDriver('MySQL'), user=USERNAME, password=PASSWORD, host=HOSTNAME, dbname=DBNAME)

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Example: Co-morbid Syndrome

We want to see the co-occurrence of influenza (influenza-like illness) and asthma.

   Data source: Texas region 2/3 Location: Tarrant County Time: February 1 –October 31, 2013

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Query for Asthma

SELECT Facility_City, Facility_State, Diagnosis_Code, Diagnosis_Text, Chief_Complaint, Age, Gender, Visit_Date_Time, Row_Number FROM TC_Meaningful_Use_Base WHERE Visit_Date_Time BETWEEN '2013-02-01 00:00:00' AND '2013-10-31 23:59:59' AND (Diagnosis_Code LIKE '%493%')

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Query for Influenza-Like Illness

SELECT Facility_City, Facility_State, Diagnosis_Code, Diagnosis_Text, Chief_Complaint, Age, Gender, Visit_Date_Time, Row_Number FROM TC_Meaningful_Use_Base WHERE Visit_Date_Time BETWEEN '2013-02-01 00:00:00' AND '2013-10-31 23:59:59' AND (Diagnosis_Code LIKE '%487%' OR Diagnosis_Code LIKE '%488%' OR Diagnosis_Code LIKE '%V04.8%' OR Diagnosis_Code LIKE '%V0481%' OR Diagnosis_Code LIKE '%V06.6%' OR Diagnosis_Code LIKE '%V066%')

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Run Query and Process Data

# Run Query df.asthma <- dbGetQuery(con, query.asthma) df.ili <- dbGetQuery(con, query.ili) # Add column naming each as a syndrome df.asthma$Syndrome <- 'ASTHMA' df.ili$Syndrome <- 'ILI' # Combine these two data sets into one data.frame

df <- rbind(df.asthma, df.ili) # Format dates and add date column (without time) df$Visit_Date_Time <- ymd_hms(df$Visit_Date_Time) df$Visit_Date <- as.Date(df$Visit_Date_Time)

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Create Summary Data Set

events.per.day.split <- ddply(df, .(Visit_Date, Syndrome), summarize, Number_of_Visits=length(Visit_Date)) ############################################ # Visit_Date Syndrome Number_of_Visits # 1 2013-02-01 ASTHMA 49 # 2 2013-02-01 ASTHMA & ILI 2 # 3 2013-02-01 ILI 5 # 4 2013-02-02 ASTHMA 60 # 5 2013-02-02 ILI 21 # 6 2013-02-03 ASTHMA 89 ############################################

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Visits Per Day by Syndrome

ggplot(events.per.day.split) + aes(Visit_Date, Number_of_Visits, color=Syndrome) + geom_line()

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Create Summary Data Set

events.per.day <- ddply(df, .(Visit_Date), summarize, Number_of_Visits=length(Visit_Date)) ############################### # Visit_Date Number_of_Visits # 1 2013-02-01 513 # 2 2013-02-02 396 # 3 2013-02-03 428 # 4 2013-02-04 409 # 5 2013-02-05 580 # 6 2013-02-06 391 ###############################

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Visits Per Day by Syndrome

ggplot(events.per.day) + aes(Visit_Date, Number_of_Visits) + geom_line()

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Example: New Syndrome

We want to see create a new syndrome to identify visits during which the patient had cough AND dizziness AND headache.

 Data source: Texas region 2/3   Location: Tarrant County Time: February 1 –October 31, 2013

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Query

SELECT Facility_City, Facility_State, Diagnosis_Code, Diagnosis_Text, Chief_Complaint, Age, Gender, Visit_Date_Time, Row_Number FROM TC_Meaningful_Use_Base WHERE Visit_Date_Time BETWEEN '2013-02-01 00:00:00' AND '2013-10-31 23:59:59' AND (Diagnosis_Code LIKE '%786.2%' OR Diagnosis_Code LIKE '%7862%') AND (Diagnosis_Code LIKE '%780.4%' OR Diagnosis_Code LIKE '%7804%') AND (Diagnosis_Code LIKE '%784.0%' OR Diagnosis_Code LIKE '%7840%');

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Run Query

# Run Query df.sick <- dbGetQuery(con, query) # Fix dates using lubridate df.sick$Visit_Date_Time <- ymd_hms(df.sick$Visit_Date_Time) # Create a month column df.sick$Month <- month(df.sick$Visit_Date, label=T)

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Run Query

ggplot(df.sick) + aes(Month) + geom_histogram()

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Create Line Listing

write.csv(df.sick, 'sick.csv', quote=F, row.names=F) # sick.csv: # # Row_Number,Facility_City,Facility_State,Diagnosis_Code,D...

# 1374852,Houston,TX,473.9:780.4:300.00:786.2:784.0:305.1:...

# 1536525,Houston,TX,486:786.2:780.4:784.0:794.00:789.00:7...

# 2100347,Rowlett,TX,780.4:784.0:786.2,NA,SCREENING - HA -...

# 2189305,Rowlett,TX,780.4:784.0:786.2:V76.12,NA,SCREENING...

# 3108090,Rowlett,TX,780.4:784.0:786.2:V76.12,NA,SCREENING...

# 5887191,Rowlett,TX,786.2:780.1:780.4:784.0,NA,786.2:SEP:...

# 7968958,Houston,TX,493.90:780.4:780.60:784.0:786.2:787.0...

# 9197758,Houston,TX,493.90:780.4:780.60:784.0:786.2:787.0...

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Example: Refined Age Groups

We want to see motor vehicle traffic accidents involving young people. We recombine the ages to the following groups: 0 –15, 16–20, 21–25, 26–30, and 31 –35 years.

   Data source: Texas region 2/3 Location: Tarrant County Time: February 1 –October 31, 2013

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Query

SELECT Facility_City, Facility_State, Diagnosis_Code, Diagnosis_Text, Chief_Complaint, Age, Gender, Visit_Date_Time, Row_Number FROM TC_Meaningful_Use_Base WHERE Visit_Date_Time BETWEEN '2013-02-01 00:00:00' AND '2013-10-31 23:59:59' AND (Diagnosis_Code LIKE '%E81_%') AND Age <= 35;

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Run Query

# Run Query df.auto <- dbGetQuery(con, query) # Fix dates using lubridate df.auto$Visit_Date_Time <- ymd_hms(df.auto$Visit_Date_Time) # Create a date column df.auto$Visit_Date <- as.Date(df.auto$Visit_Date_Time)

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Bin Ages

# Drop all rows where age is greater than 35 years or is undefined df.auto <- df.auto[!is.na(df.auto$Age),] df.auto <- df.auto[df.auto$Age <= 35,] # Bin ages df.auto$Age_binned <- cut(df.auto$Age, breaks=c(0, 15, 20, 25, 30, 35), include.lowest=T)

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Histogram of Visits by Age Group

ggplot(df.auto) + aes(Age_binned) + geom_histogram()

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Create Summary Data Set

df.auto.daily.counts <- ddply(df.auto, .(Visit_Date, Age_binned), summarize, count=length(Chief_Complaint)) ################################ # Visit_Date Age_binned count # 1 2013-02-01 [0,15] 3 # 2 2013-02-01 (15,20] 25 # 3 2013-02-01 (20,25] 16 # 4 2013-02-01 (25,30] 16 # 5 2013-02-02 [0,15] 13 # 6 2013-02-02 (15,20] 6 ###############################

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Visits per Week by Age

ggplot(df.auto.daily.counts) + aes(x = Visit_Date, y = count, color=Age_binned) + geom_line(size=2, alpha=.7)

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Visits per Week by Age

ggplot(df.auto.daily.counts) + aes(x = Visit_Date, y = count, color=Age_binned) + geom_smooth(size=3, alpha=.7)

BioSense

Public Health Surveillance Through Collaboration https://biosen.se

Questions

?