Hi there,
I’m an Epidemiologist working on producing an epidemiological situation report on MERS outbreak. However, at the data cleaning stage, I got an error message while left-joining two datasets. I can’t seem to figure what’s wrong, even after consulting the help function in R and the Epi R Handbook.
Would be grateful for your assistance with this. Please see sample code below:
# Load packages
pacman::p_load(rio, # import and export of dataset
here, # defining relative filepath
janitor, # data cleaning
skimr, # review imported dataset
outbreaks,# to access dataset
apyramid, # age-sex pyramid
epikit, # creating age categories
datapasta, #recreating dataset
reprex, # creating reproducible examples
lubridate,# cleaning of dates
tidyverse # data manipulation, presentation
)
# Load/import data
data(package = "outbreaks")
mers_cases <- data.frame(
stringsAsFactors = FALSE,
id = c("SK_122", "SK_3", "SK_34", "SK_71", "SK_53"),
dt_report = c("2015-06-10","2015-05-20",
"2015-06-03","2015-06-07","2015-06-06"),
dt_onset = c("2015-06-02","2015-05-20",
"2015-05-20","2015-06-04","2015-05-29"),
dt_death = c(NA, "2015-06-04", NA, NA, NA),
dt_diag = c("2015-06-10","2015-05-21",
"2015-06-04","2015-06-07","2015-06-06")
)
mers_contacts <- data.frame(
diff_dt_onset = c(15L, 22L, 3L, 14L, 14L),
exposure = as.factor(c("Hospital room",
"Hospital room","Hospital room","Hospital room",
"Emergency room"))
)
# Data cleaning
mers_clean <- data.frame(
stringsAsFactors = FALSE,
id = c("SK_122", "SK_3", "SK_34", "SK_71", "SK_53"),
dt_report = c("2015-06-10","2015-05-20",
"2015-06-03","2015-06-07","2015-06-06"),
dt_onset = c("2015-06-02","2015-05-20",
"2015-05-20","2015-06-04","2015-05-29"),
dt_death = c(NA, "2015-06-04", NA, NA, NA),
dt_diag = c("2015-06-10","2015-05-21",
"2015-06-04","2015-06-07","2015-06-06")
) %>%
#renaming colnames
rename(
date_onset = dt_onset,
date_report = dt_report,
date_diag = dt_diag,
date_death = dt_death
) %>%
#creating new variables with important dates
mutate(diff_onset = date_report - date_onset,
delay_rep = date_report - date_onset,
delay_diag = date_diag - date_onset,
time_to_death = date_death - date_onset)
#> Error in `mutate()`:
#> ℹ In argument: `diff_onset = date_report - date_onset`.
#> Caused by error in `date_report - date_onset`:
#> ! non-numeric argument to binary operator
#> Backtrace:
#> ▆
#> 1. ├─... %>% ...
#> 2. ├─dplyr::mutate(...)
#> 3. ├─dplyr:::mutate.data.frame(...)
#> 4. │ └─dplyr:::mutate_cols(.data, dplyr_quosures(...), by)
#> 5. │ ├─base::withCallingHandlers(...)
#> 6. │ └─dplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#> 7. │ └─mask$eval_all_mutate(quo)
#> 8. │ └─dplyr (local) eval()
#> 9. └─base::.handleSimpleError(...)
#> 10. └─dplyr (local) h(simpleError(msg, call))
#> 11. └─rlang::abort(message, class = error_class, parent = parent, call = error_call)
#Joining contacts dataset
joined <-mers_clean %>% #baseline dataset
inner_join(data.frame(
diff_dt_onset = c(15L, 22L, 3L, 14L, 14L),
exposure = as.factor(c("Hospital room",
"Hospital room","Hospital room","Hospital room",
"Emergency room"))
), # dataset to be joined
by = c(diff_onset = "diff_dt_onset"))
#> Error in eval(expr, envir, enclos): object 'mers_clean' not found
F.I