Error in my R code

Hello,

I am an Epidemiologist who has just now completed my Intro training to R course. As part of the reprex code exercise, I have created an error in my code and copying the error below to request guidance in troubleshooting this code. Any help will be greatly appreciated. Reprex is below.

pacman::p_load(
  rio,          # for importing data
  here,         # for locating files
  skimr,        # for reviewing the data
  janitor,      # for data cleaning  
  epikit,       # creating age categories
  gtsummary,    # creating tables  
  RColorBrewer, # for colour palettes
  viridis,      # for more colour palettes
  scales,       # percents in tables  
  flextable,    # for making pretty tables
  gghighlight,  # highlighting plot parts  
  ggExtra,      # special plotting functions
  datapasta,
  reprex,
  styler,
  naniar,        # replace values with NA
  tidyverse     # for data management and visualization
)


# Import data -------------------------------------------------------------

# importing the file not from a project folder using here() as coded below, followed by using read.csv()

file_path <- here ("C:/Users/vxe9/Desktop/intro_course/learning_materials/extra_datasets/H7N9_china_2013_EN.csv")

h1n1 <- data.frame(
  stringsAsFactors = FALSE,
  case_id = c(1L, 2L, 3L, 4L, 5L),
  date_of_symptoms = c("2/19/2013",
                       "2/27/2013","3/9/2013","3/19/2013","3/19/2013"),
  date_of_hospitalisation = c(NA,"3/3/2013",
                              "3/19/2013","3/27/2013","3/30/2013"),
  date_of_result = c("3/4/2013",
                     "3/10/2013","4/9/2013",NA,"5/15/2013"),
  sex = c("m", "m", "f", "f", "f"),
  age = c("87", "27", "35", "45", "48"),
  province = c("Shanghai",
               "Shanghai","Anhui","Jiangsu","Jiangsu")
)


# clean the imported data

h1n1_cl <- h1n1 %>% 
  clean_names() %>% 
  distinct () %>% 
  # rename variables
  # new name = old name
  rename(
    symp_date   = date_of_symptoms,
    hosp_date   = date_of_hospitalisation,
    result_date = date_of_result
  ) %>% 
  # transform variables 
  mutate (
    symp_date   = mdy (symp_date),
    hosp_date   = mdy (hosp_date),
    result_date = ymd (result_date),
    sex    = recode (sex,
                     "m" = "male",
                     "f" = "female"),
    age_cat   = age_categories (
      age,
      breakers = c(0, 10, 20, 30, 40, 50, 60, 70))) %>% 
  replace_with_na(replace = list(result = c("", "D"))) %>%        #replace_with_na() of naniar package replaces specific values with na
  filter (province != "Anhui") %>% 
  drop_na(age_cat)
#> Warning: There was 1 warning in `mutate()`.
#> β„Ή In argument: `result_date = ymd(result_date)`.
#> Caused by warning:
#> ! All formats failed to parse. No formats found.
#> Warning: Missing from data: `result`

Created on 2024-03-09 with reprex v2.0.2

Session info
sessionInfo()
#> R version 4.2.2 (2022-10-31 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 19045)
#> 
#> Matrix products: default
#> 
#> locale:
#> [1] LC_COLLATE=English_United States.utf8 
#> [2] LC_CTYPE=English_United States.utf8   
#> [3] LC_MONETARY=English_United States.utf8
#> [4] LC_NUMERIC=C                          
#> [5] LC_TIME=English_United States.utf8    
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] lubridate_1.9.3    forcats_1.0.0      stringr_1.5.0      dplyr_1.1.3       
#>  [5] purrr_1.0.2        readr_2.1.4        tidyr_1.3.0        tibble_3.2.1      
#>  [9] tidyverse_2.0.0    naniar_1.1.0       styler_1.10.2      reprex_2.0.2      
#> [13] datapasta_3.1.0    ggExtra_0.10.1     gghighlight_0.4.1  ggplot2_3.4.4     
#> [17] flextable_0.9.4    scales_1.2.1       viridis_0.6.5      viridisLite_0.4.2 
#> [21] RColorBrewer_1.1-3 gtsummary_1.7.2    epikit_0.1.6       janitor_2.2.0     
#> [25] skimr_2.1.5        here_1.0.1         rio_1.0.1         
#> 
#> loaded via a namespace (and not attached):
#>  [1] colorspace_2.1-0        ellipsis_0.3.2          class_7.3-20           
#>  [4] visdat_0.6.0            rprojroot_2.0.4         snakecase_0.11.1       
#>  [7] base64enc_0.1-3         fs_1.6.3                httpcode_0.3.0         
#> [10] rstudioapi_0.15.0       proxy_0.4-27            fansi_1.0.4            
#> [13] xml2_1.3.5              R.methodsS3_1.8.2       knitr_1.44             
#> [16] jsonlite_1.8.7          gt_0.10.1               R.oo_1.25.0            
#> [19] shiny_1.8.0             compiler_4.2.2          fastmap_1.1.1          
#> [22] cli_3.6.1               later_1.3.2             htmltools_0.5.6.1      
#> [25] tools_4.2.2             gtable_0.3.4            glue_1.6.2             
#> [28] Rcpp_1.0.11             fontquiver_0.2.1        vctrs_0.6.3            
#> [31] crul_1.4.0              broom.helpers_1.14.0    xfun_0.40              
#> [34] timechange_0.2.0        mime_0.12               miniUI_0.1.1.1         
#> [37] lifecycle_1.0.3         pacman_0.5.1            ragg_1.2.6             
#> [40] hms_1.1.3               promises_1.2.1          fontLiberation_0.1.0   
#> [43] yaml_2.3.7              curl_5.1.0              gridExtra_2.3          
#> [46] gdtools_0.3.5           stringi_1.7.12          fontBitstreamVera_0.1.1
#> [49] e1071_1.7-14            zip_2.3.0               repr_1.1.6             
#> [52] rlang_1.1.1             pkgconfig_2.0.3         systemfonts_1.0.5      
#> [55] evaluate_0.22           sf_1.0-15               tidyselect_1.2.0       
#> [58] magrittr_2.0.3          R6_2.5.1                generics_0.1.3         
#> [61] DBI_1.1.3               pillar_1.9.0            withr_2.5.0            
#> [64] units_0.8-5             crayon_1.5.2            gfonts_0.2.0           
#> [67] uuid_1.1-1              KernSmooth_2.23-20      utf8_1.2.3             
#> [70] tzdb_0.4.0              rmarkdown_2.25          officer_0.6.3          
#> [73] grid_4.2.2              data.table_1.14.8       digest_0.6.33          
#> [76] classInt_0.4-10         xtable_1.8-4            R.cache_0.16.0         
#> [79] httpuv_1.6.14           R.utils_2.12.3          textshaping_0.3.7      
#> [82] openssl_2.1.1           munsell_0.5.0           askpass_1.2.0

Describe your issue

  • What specifically do you need help with
  • Relevant context (e.g. public health trends or associated data infrastructure)
  • Timeline/urgency
  • (No sensitive or identifiable information)

What steps have you already taken to find an answer?

Provide an example of your R code

  • Watch this video: https://www.youtube.com/watch?v=XIc-VHFeUl8
    This document is also helpful: Reprex do's and don'ts β€’ reprex

  • If pasting your code, put it in backticks so that others can easy copy/paste:

    • Use single backticks to make text appear as code, like mutate() and filter()
    • Use 3 backticks on lines above and below a large block of text to make it appear as code
    male_cases <- linelist %>%
       filter(gender == "male")
    

Follow-up

  • Thank the volunteers who try to help you
  • Mark one reply as the β€œSolution” if appropriate
1 Like

Hello,

There are two issues with your code, the first is that you use the ymd() function for result_date even though it’s formatted as mm/dd/YY, i.e., you should continue to use the mdy() function.

Second, when you used the replace_with_na() function, you specified a variable called result, but this does not exist in your data. Perhaps you intended to include this variable or you meant result_date, which is what I assumed.

Please see the code below:

# loading packages
library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
library(epikit)
library(naniar)

# creating fake data
h1n1 <- data.frame(
  stringsAsFactors = FALSE,
  case_id = c(1L, 2L, 3L, 4L, 5L),
  date_of_symptoms = c(
    "2/19/2013",
    "2/27/2013", "3/9/2013", "3/19/2013", "3/19/2013"
  ),
  date_of_hospitalisation = c(
    NA, "3/3/2013",
    "3/19/2013", "3/27/2013", "3/30/2013"
  ),
  date_of_result = c(
    "3/4/2013",
    "3/10/2013", "4/9/2013", NA, "5/15/2013"
  ),
  sex = c("m", "m", "f", "f", "f"),
  age = c("87", "27", "35", "45", "48"),
  province = c(
    "Shanghai",
    "Shanghai", "Anhui", "Jiangsu", "Jiangsu"
  )
)

# cleaning the data
h1n1_clean <- h1n1 |>
    clean_names() |>
    rename(
        symp_date = date_of_symptoms,
        hosp_date = date_of_hospitalisation,
        result_date = date_of_result
    ) |>
    mutate (
        symp_date = mdy(symp_date),
        hosp_date = mdy(hosp_date),
        result_date = mdy(result_date),
        sex = recode (sex,
                                         "m" = "male",
                                         "f" = "female"),
        age_cat = age_categories(
            age,
            breakers = c(0, 10, 20, 30, 40, 50, 60, 70))) |>
    replace_with_na(replace = list(result_date = c("", "D"))) |>
    dplyr::filter (province != "Anhui") |>
    drop_na(age_cat)

Created on 2024-03-10 with reprex v2.1.0

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.3.1 (2023-06-16)
#>  os       macOS Ventura 13.6.3
#>  system   x86_64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       America/Toronto
#>  date     2024-03-10
#>  pandoc   3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  class         7.3-22  2023-05-03 [2] CRAN (R 4.3.1)
#>  classInt      0.4-10  2023-09-05 [1] CRAN (R 4.3.0)
#>  cli           3.6.2   2023-12-11 [1] CRAN (R 4.3.0)
#>  colorspace    2.1-0   2023-01-23 [1] CRAN (R 4.3.0)
#>  DBI           1.2.2   2024-02-16 [1] RSPM (R 4.3.0)
#>  digest        0.6.34  2024-01-11 [1] RSPM (R 4.3.0)
#>  dplyr       * 1.1.4   2023-11-17 [1] CRAN (R 4.3.0)
#>  e1071         1.7-14  2023-12-06 [1] CRAN (R 4.3.0)
#>  epikit      * 0.1.6   2024-01-23 [1] RSPM (R 4.3.0)
#>  evaluate      0.23    2023-11-01 [1] CRAN (R 4.3.0)
#>  fansi         1.0.6   2023-12-08 [1] CRAN (R 4.3.0)
#>  fastmap       1.1.1   2023-02-24 [1] CRAN (R 4.3.0)
#>  forcats     * 1.0.0   2023-01-29 [1] CRAN (R 4.3.0)
#>  fs            1.6.3   2023-07-20 [1] CRAN (R 4.3.0)
#>  generics      0.1.3   2022-07-05 [1] CRAN (R 4.3.0)
#>  ggplot2     * 3.5.0   2024-02-23 [1] RSPM (R 4.3.0)
#>  glue          1.7.0   2024-01-09 [1] RSPM (R 4.3.0)
#>  gtable        0.3.4   2023-08-21 [1] CRAN (R 4.3.0)
#>  hms           1.1.3   2023-03-21 [1] CRAN (R 4.3.0)
#>  htmltools     0.5.7   2023-11-03 [1] CRAN (R 4.3.0)
#>  janitor     * 2.2.0   2023-02-02 [1] CRAN (R 4.3.0)
#>  KernSmooth    2.23-22 2023-07-10 [2] CRAN (R 4.3.0)
#>  knitr         1.45    2023-10-30 [1] CRAN (R 4.3.0)
#>  lifecycle     1.0.4   2023-11-07 [1] CRAN (R 4.3.0)
#>  lubridate   * 1.9.3   2023-09-27 [1] CRAN (R 4.3.0)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.3.0)
#>  munsell       0.5.0   2018-06-12 [1] CRAN (R 4.3.0)
#>  naniar      * 1.0.0   2023-02-02 [1] CRAN (R 4.3.0)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.3.0)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.3.0)
#>  proxy         0.4-27  2022-06-09 [1] CRAN (R 4.3.0)
#>  purrr       * 1.0.2   2023-08-10 [1] CRAN (R 4.3.0)
#>  R.cache       0.16.0  2022-07-21 [1] CRAN (R 4.3.0)
#>  R.methodsS3   1.8.2   2022-06-13 [1] CRAN (R 4.3.0)
#>  R.oo          1.26.0  2024-01-24 [1] RSPM (R 4.3.0)
#>  R.utils       2.12.3  2023-11-18 [1] CRAN (R 4.3.0)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.3.0)
#>  Rcpp          1.0.12  2024-01-09 [1] RSPM (R 4.3.0)
#>  readr       * 2.1.5   2024-01-10 [1] RSPM (R 4.3.0)
#>  reprex        2.1.0   2024-01-11 [1] RSPM (R 4.3.0)
#>  rlang         1.1.3   2024-01-10 [1] RSPM (R 4.3.0)
#>  rmarkdown     2.25    2023-09-18 [1] CRAN (R 4.3.0)
#>  rstudioapi    0.15.0  2023-07-07 [1] CRAN (R 4.3.0)
#>  scales        1.3.0   2023-11-28 [1] CRAN (R 4.3.0)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.3.0)
#>  sf            1.0-15  2023-12-18 [1] RSPM (R 4.3.0)
#>  snakecase     0.11.1  2023-08-27 [1] CRAN (R 4.3.0)
#>  stringi       1.8.3   2023-12-11 [1] CRAN (R 4.3.0)
#>  stringr     * 1.5.1   2023-11-14 [1] CRAN (R 4.3.0)
#>  styler        1.10.2  2023-08-29 [1] CRAN (R 4.3.0)
#>  tibble      * 3.2.1   2023-03-20 [1] CRAN (R 4.3.0)
#>  tidyr       * 1.3.1   2024-01-24 [1] RSPM (R 4.3.0)
#>  tidyselect    1.2.0   2022-10-10 [1] CRAN (R 4.3.0)
#>  tidyverse   * 2.0.0   2023-02-22 [1] CRAN (R 4.3.0)
#>  timechange    0.3.0   2024-01-18 [1] RSPM (R 4.3.0)
#>  tzdb          0.4.0   2023-05-12 [1] CRAN (R 4.3.0)
#>  units         0.8-5   2023-11-28 [1] CRAN (R 4.3.0)
#>  utf8          1.2.4   2023-10-22 [1] CRAN (R 4.3.0)
#>  vctrs         0.6.5   2023-12-01 [1] CRAN (R 4.3.0)
#>  visdat        0.6.0   2023-02-02 [1] CRAN (R 4.3.0)
#>  withr         3.0.0   2024-01-16 [1] RSPM (R 4.3.0)
#>  xfun          0.42    2024-02-08 [1] RSPM (R 4.3.0)
#>  yaml          2.3.8   2023-12-11 [1] CRAN (R 4.3.0)
#> 
#>  [1] /Users/timothychisamore/Library/R/x86_64/4.3/library
#>  [2] /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────

All the best,

Tim