diff --git a/README.Rmd b/README.Rmd index 4f552ee..060ba5a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -17,26 +17,31 @@ library(tidyverse) library(lubridate) library(coarseDataTools) library(gridExtra) +library(ggpubr) +# devtools::install_github("salauer/activemonitr") +library(activeMonitr) cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7") set.seed(1) ## read in coronavirus data -ncov <- read_csv("data/nCoV-IDD-traveler-data.csv") %>% - rename(EL_date=EL, ER_date=ER, SL_date=SL, SR_date=SR) %>% - ## change dates to restrict exposure to after December 1 - ## add times where missing - # if EL is missing or before 1 Dec 2019, use 1 Dec 2019 - mutate(EL_date=ifelse(is.na(EL_date),"2019-12-01 00:00:00", EL_date) %>% - ymd_hms() %>% - if_else(. < ymd_hms("2019-12-01 00:00:00"), - ymd_hms("2019-12-01 00:00:00"), .), - # if SR is missing, use PR - SR_date=ifelse(ymd_hms(SR_date) %>% is.na, - PR, SR_date) %>% - ymd_hms(), +ncov_raw <- read_csv("data/nCoV-IDD-traveler-data.csv") %>% + rename(EL_date=EL, ER_date=ER, SL_date=SL, SR_date=SR) + +## change dates to restrict exposure to after 1 December 2019 +## add other times where missing +ncov_ELSR <- ncov_raw %>% + # if EL is missing or before 1 Dec 2019, use 1 Dec 2019 + mutate(EL_date=ifelse(is.na(EL_date),"2019-12-01 00:00:00", EL_date) %>% + ymd_hms() %>% + if_else(. < ymd_hms("2019-12-01 00:00:00"), ymd_hms("2019-12-01 00:00:00"), .), + # if SR is missing, use PR + SR_date=ifelse(ymd_hms(SR_date) %>% is.na, PR, SR_date) %>% + ymd_hms(), # SR_fever is only for cases with confirmed fever dates - SR_fever=ymd_hms(SR_fever)) %>% + SR_fever=ymd_hms(SR_fever)) + +ncov <- ncov_ELSR %>% # if ER is missing, use SR; if SL is missing, use EL mutate(ER_date=if_else(is.na(ER_date), SR_date, ymd_hms(ER_date)), ER_date=if_else(ER_date>SR_date, SR_date, ER_date), @@ -77,16 +82,64 @@ ncov <- read_csv("data/nCoV-IDD-traveler-data.csv") %>% ## Now lets divide data sets by observation type ## only fevers -ncov_fever <- ncov %>% - filter(!is.na(SL_fever) | !is.na(SR_fever)) +ncov_fever <- ncov %>% filter(!is.na(SL_fever) | !is.na(SR_fever)) +ncov_mild <- ncov %>% filter(is.na(SL_fever) & is.na(SR_fever)) ## only travel outside of China -ncov_foreign <- ncov %>% - filter(COUNTRY.DEST != "China" | PROVINCE.DEST %in% c("HongKong", "Macau")) +ncov_foreign <- ncov %>% filter(COUNTRY.DEST != "China" | PROVINCE.DEST %in% c("HongKong", "Macau")) ## only fevers outside of China -ncov_foreign_fever <- ncov_foreign %>% - filter(!is.na(SL_fever) | !is.na(SR_fever)) +ncov_foreign_fever <- ncov_foreign %>% filter(!is.na(SL_fever) | !is.na(SR_fever)) + +## only cases within mainland China +ncov_mainland <- ncov %>% filter(COUNTRY.DEST == "China" & !(PROVINCE.DEST %in% c("HongKong", "Macau"))) + +## only cases with a defined EL +ncov_EL <- ncov_raw %>% + filter(!is.na(EL_date)) %>% + # if EL is missing or before 1 Dec 2019, use 1 Dec 2019 + mutate(EL_date=ymd_hms(EL_date), + # if SR is missing, use PR + SR_date=ifelse(ymd_hms(SR_date) %>% is.na, PR, SR_date) %>% + ymd_hms(), + # SR_fever is only for cases with confirmed fever dates + SR_fever=ymd_hms(SR_fever)) %>% + # if ER is missing, use SR; if SL is missing, use EL + mutate(ER_date=if_else(is.na(ER_date), SR_date, ymd_hms(ER_date)), + SL_date=if_else(is.na(SL_date), EL_date, ymd_hms(SL_date)), + SL_fever=if_else(is.na(SL_fever) & !is.na(SR_fever), + SL_date, ymd_hms(SL_fever))) %>% + # calculate days since 1 Dec 2019 + mutate(EL=difftime(EL_date, ymd_hms("2019-12-01 00:00:00"), units="days") %>% + as.numeric(), + ER=difftime(ER_date, ymd_hms("2019-12-01 00:00:00"), units="days") %>% + as.numeric(), + SL=difftime(SL_date, ymd_hms("2019-12-01 00:00:00"), units="days") %>% + as.numeric(), + SR=difftime(SR_date, ymd_hms("2019-12-01 00:00:00"), units="days") %>% + as.numeric(), + SL_fever=difftime(SL_fever, ymd_hms("2019-12-01 00:00:00"), + units="days") %>% + as.numeric(), + SR_fever=difftime(SR_fever, ymd_hms("2019-12-01 00:00:00"), + units="days") %>% + as.numeric(), + PL=difftime(PL, ymd_hms("2019-12-01 00:00:00"), units="days") %>% + as.numeric(), + PR=difftime(PR, ymd_hms("2019-12-01 00:00:00"), units="days") %>% + as.numeric(), + E_int=ER-EL, + S_int=SR-SL, + S_fever_int=SR_fever-SL_fever, + max_inc_int=SR-EL, + min_inc_int=SL-ER) %>% + # remove any entries missing EL, ER, SL, or SR + filter(!is.na(EL_date), !is.na(ER_date), !is.na(SL_date), !is.na(SR_date)) %>% + # remove entries that haven't been reviewed by two people + filter(!is.na(REVIEWER2)) %>% + # remove entries with exposure/onset intervals less than 0 + # remove entries where ER greater than SR or EL greater than SL + filter(E_int > 0, S_int > 0, ER<=SR, SL>=EL, EL>0) backer_params <- read_csv("data/backer-params.csv") ``` @@ -277,9 +330,9 @@ Updated: `r date()` [Read the medRxiv preprint](https://www.medrxiv.org/content/10.1101/2020.02.02.20020016v1) -Our lab has been collecting data (freely available at [`data/nCoV-IDD-traveler-data.csv`](https://github.com/HopkinsIDD/ncov_incubation/blob/master/data/nCoV-IDD-traveler-data.csv)) on the exposure and symptom onset for novel coronavirus (nCoV-2019) cases that have been confirmed outside of the Hubei province. +Our lab has been collecting data (freely available at [`data/nCoV-IDD-traveler-data.csv`](https://github.com/HopkinsIDD/ncov_incubation/blob/master/data/nCoV-IDD-traveler-data.csv)) on the exposure and symptom onset for novel coronavirus (COVID-19) cases that have been confirmed outside of the Hubei province. These cases have been confirmed either in other countries or in regions of China with no known local transmission. -We search for news articles and reports in both English and Chinese and abstract the data necessary to estimate the incubation period of nCoV-2019. +We search for news articles and reports in both English and Chinese and abstract the data necessary to estimate the incubation period of COVID-19. Two team members independently review the full text of each case report to ensure that data is correctly input. Discrepancies are resolved by discussion and consensus. @@ -310,6 +363,9 @@ dat_sum <- ncov %>% SLnew = SL-ER, SRnew = SR-ER, Smid = (SLnew + SRnew)/2, + PLnew = PL-ER, + PRnew = PR-ER, + Pmid = (PLnew + PRnew)/2, UID=reorder(UID, SR-EL)) ggplot(dat_sum, aes(y=factor(UID))) + @@ -317,8 +373,11 @@ ggplot(dat_sum, aes(y=factor(UID))) + color="#0072B2", size=2, alpha=.25) + geom_segment(aes(x=SLnew, xend=SRnew, yend=factor(UID)), size=2, color="#CC0000", alpha=.25) + + geom_segment(aes(x=PLnew, xend=PRnew, yend=factor(UID)), + size=2, color="#00a841", alpha=.25) + geom_point(aes(x=Emid, y=factor(UID)), size=0.5, color="#0072B2") + geom_point(aes(x=Smid, y=factor(UID)), size=0.5, color="#CC0000") + + geom_point(aes(x=Pmid, y=factor(UID)), size=0.5, color="#00a841") + geom_segment(aes(x=Emid, xend=Smid, yend=factor(UID)), size=0.33, color="#999999") + #ggtitle("Exposure and symptom onset windows") + scale_x_continuous("Days from last possible exposure") + @@ -327,6 +386,7 @@ ggplot(dat_sum, aes(y=factor(UID))) + theme(axis.text.y = element_blank(), axis.ticks.y= element_blank(), axis.text.x=element_text(color="black")) + ``` The bars where the exposure and symptom onset windows completely overlap are frequently travelers from Wuhan who were symptomatic on arrival to another country, that did not release further details. @@ -338,7 +398,7 @@ The necessary components for estimating the incubation period are left and right We use explicit dates and times when they are reported in the source documents, however when they are not available, we make the following assumptions: - For cases without a reported right-bound on symptom onset time (SR), we use the time that the case is first presented to a hospital or, lacking that, the time that the source document was published -- For cases without an EL, we use 2019 December 1, which was the onset date for the first reported nCoV-2019 case; though we will test this assumption later +- For cases without an EL, we use 2019 December 1, which was the onset date for the first reported COVID-19 case; though we will test this assumption later - For cases without an ER, we use the SR - For cases without an SL, we use the EL @@ -483,7 +543,7 @@ ggplot(data=all_sens_plot, aes(y=est, ymin=CIlow, ymax=CIhigh, x=as.factor(qtile), color=type)) + geom_errorbar(height=0.2, position=position_dodge(0.9)) + geom_point(position=position_dodge(0.9)) + - scale_y_continuous("Incubation time, in days (with 95% CIs)", limits=c(0,21)) + + scale_y_continuous("Incubation time, in days (with 95% CIs)") + scale_x_discrete("Estimate quantile") + scale_color_manual("Est\ntype", values=cbbPalette[c(1,6,4,7)]) + diff --git a/README.md b/README.md index aca82c8..0dd6e75 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Real-time estimation of the novel coronavirus incubation time ============================================================= -Updated: Thu Feb 27 16:26:31 2020 +Updated: Thu Feb 27 19:18:17 2020 [Read the medRxiv preprint](https://www.medrxiv.org/content/10.1101/2020.02.02.20020016v1) @@ -36,11 +36,11 @@ Quick links: Data summary ------------ -There are 175 cases from 48 countries and provinces outside of Hubei, -China. Of those 64 are known to be female (37%) and 107 are male (61%). -The median age is about 44.5 years (IQR: 34-55). 76 cases are from -Mainland China (43%), while 99 are from the rest of the world (57%). 98 -cases presented with a fever (56%). +There are 181 cases from 49 countries and provinces outside of Hubei, +China. Of those 69 are known to be female (38%) and 108 are male (60%). +The median age is about 44.5 years (IQR: 34-55.5). 81 cases are from +Mainland China (45%), while 100 are from the rest of the world (55%). 99 +cases presented with a fever (55%).