Facet_wrap not reflecting the correct data by sub-groups


* Problem encountered is the following 
# The resulting line graph does not correctly show the number of cases per year 
# (some parts of the two line graphs between the two panes is exactly the same, which is incorrect reflection of the number of cases)

demo_data <- tibble::tribble(
                                ~sex, ~cases_per_year, ~pdato_year, ~tb_incidence,
                              "Male",            510L,        2021,         "MIC",
                              "Male",            674L,        2020,         "HIC",
                              "Male",            469L,        2019,         "MIC",
                              "Male",            510L,        2021,         "MIC",
                            "Female",            469L,        2019,         "HIC",
                            "Female",            745L,        2016,         "HIC",
                              "Male",            510L,        2021,         "HIC",
                              "Male",            469L,        2019,         "HIC",
                            "Female",            765L,        2013,         "HIC",
                            "Female",            674L,        2020,         "MIC",
                            "Female",            674L,        2020,         "HIC",
                              "Male",            795L,        2012,         "MIC",
                              "Male",            674L,        2020,         "HIC",
                              "Male",            765L,        2013,         "MIC",
                              "Male",            644L,        2009,         "HIC",
                            "Female",            489L,        2018,         "HIC",
                              "Male",            510L,        2021,         "MIC",
                            "Female",            674L,        2020,         "HIC",
                            "Female",            644L,        2009,         "MIC",
                            "Female",            674L,        2020,         "MIC",
                            "Female",            510L,        2021,         "MIC",
                            "Female",            510L,        2021,         "HIC",
                            "Female",            510L,        2021,         "MIC",
                            "Female",            674L,        2020,         "MIC",
                              "Male",            745L,        2016,         "MIC",
                            "Female",            784L,        2011,         "MIC",
                              "Male",            510L,        2021,         "HIC",
                              "Male",            469L,        2019,         "HIC",
                              "Male",            674L,        2020,         "HIC",
                            "Female",            674L,        2020,         "HIC",
                            "Female",            674L,        2020,         "MIC",
                              "Male",            510L,        2021,         "HIC",
                              "Male",            674L,        2020,         "HIC",
                            "Female",            469L,        2019,         "MIC",
                              "Male",            510L,        2021,         "MIC",
                              "Male",            674L,        2020,         "HIC",
                              "Male",            489L,        2018,         "HIC",
                              "Male",            489L,        2018,         "MIC",
                              "Male",            489L,        2018,         "HIC",
                              "Male",            489L,        2018,         "MIC",
                            "Female",            674L,        2020,         "HIC",
                            "Female",            510L,        2021,         "MIC",
                            "Female",            674L,        2020,         "HIC",
                              "Male",            510L,        2021,         "HIC",
                              "Male",            674L,        2020,         "HIC",
                              "Male",            510L,        2021,         "MIC",
                              "Male",            469L,        2019,         "HIC",
                              "Male",            489L,        2018,         "MIC",
                            "Female",            510L,        2021,         "MIC",
                              "Male",            674L,        2020,         "HIC"
                            )

# The cases_per_year variable should not have "L" at end of the integer, not sure of this issue too. I created it with the following script though 
data <- data %>%
  group_by(pdato_year) %>%
  mutate(cases_per_year = n()) %>%
  ungroup()

# it does not show this "L" when i review this variable separately 
demo_data$cases_per_year

class(demo_data$cases_per_year)
"integer"

# creating a plot for cases per year by wrapping
## Problem encountered is the following 
# The resulting line graph does not correctly show the number of cases per year 
# (some parts of the two line graphs between the two panes is exactly the same, which is incorrect reflection of the number of cases)
demo_data %>%
  ggplot(aes(x = pdato_year, y = cases_per_year ,  color= tb_incidence)) +
  geom_line() +
  geom_point() +
  facet_wrap(~sex)


Created on 2025-04-06 with reprex v2.0.2

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.3.1 (2023-06-16 ucrt)
#>  os       Windows 10 x64 (build 19045)
#>  system   x86_64, mingw32
#>  ui       RTerm
#>  language (EN)
#>  collate  Norwegian BokmΓ₯l_Norway.utf8
#>  ctype    Norwegian BokmΓ₯l_Norway.utf8
#>  tz       Europe/Paris
#>  date     2025-04-06
#>  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  cli           3.6.1   2023-03-23 [1] CRAN (R 4.3.1)
#>  digest        0.6.33  2023-07-07 [1] CRAN (R 4.3.1)
#>  evaluate      0.22    2023-09-29 [1] CRAN (R 4.3.1)
#>  fansi         1.0.4   2023-01-22 [1] CRAN (R 4.3.1)
#>  fastmap       1.1.1   2023-02-24 [1] CRAN (R 4.3.1)
#>  fs            1.6.3   2023-07-20 [1] CRAN (R 4.3.1)
#>  glue          1.6.2   2022-02-24 [1] CRAN (R 4.3.1)
#>  htmltools     0.5.5   2023-03-23 [1] CRAN (R 4.3.1)
#>  knitr         1.43    2023-05-25 [1] CRAN (R 4.3.1)
#>  lifecycle     1.0.3   2022-10-07 [1] CRAN (R 4.3.1)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.3.1)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.3.1)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.3.1)
#>  reprex        2.0.2   2022-08-17 [1] CRAN (R 4.3.1)
#>  rlang         1.1.1   2023-04-28 [1] CRAN (R 4.3.1)
#>  rmarkdown     2.27    2024-05-17 [1] CRAN (R 4.3.3)
#>  rstudioapi    0.15.0  2023-07-07 [1] CRAN (R 4.3.1)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.3.1)
#>  tibble        3.2.1   2023-03-20 [1] CRAN (R 4.3.1)
#>  utf8          1.2.3   2023-01-31 [1] CRAN (R 4.3.1)
#>  vctrs         0.6.3   2023-06-14 [1] CRAN (R 4.3.1)
#>  withr         2.5.1   2023-09-26 [1] CRAN (R 4.3.1)
#>  xfun          0.39    2023-04-20 [1] CRAN (R 4.3.1)
#>  yaml          2.3.7   2023-01-23 [1] CRAN (R 4.3.0)
#> 
#>  [1] C:/Program Files/R/library
#>  [2] C:/Program Files/R/R-4.3.1/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────

Hello,

In R, the L after a number is just indicating that it’s an integer rather than a numeric - R will automatically do this when you use the count or n functions. Note that it’s not actually attaching an L to the number, it’s just a way of denoting it in the console.

With respect to your plot, I think it may be an issue with missing counts where combinations of the variables were missing. See below for how I would approach this problem:

# loading packages
library(tibble)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

fake_data <- tribble(
    ~sex, ~cases_per_year, ~pdato_year, ~tb_incidence,
    "Male",            510L,        2021,         "MIC",
    "Male",            674L,        2020,         "HIC",
    "Male",            469L,        2019,         "MIC",
    "Male",            510L,        2021,         "MIC",
    "Female",            469L,        2019,         "HIC",
    "Female",            745L,        2016,         "HIC",
    "Male",            510L,        2021,         "HIC",
    "Male",            469L,        2019,         "HIC",
    "Female",            765L,        2013,         "HIC",
    "Female",            674L,        2020,         "MIC",
    "Female",            674L,        2020,         "HIC",
    "Male",            795L,        2012,         "MIC",
    "Male",            674L,        2020,         "HIC",
    "Male",            765L,        2013,         "MIC",
    "Male",            644L,        2009,         "HIC",
    "Female",            489L,        2018,         "HIC",
    "Male",            510L,        2021,         "MIC",
    "Female",            674L,        2020,         "HIC",
    "Female",            644L,        2009,         "MIC",
    "Female",            674L,        2020,         "MIC",
    "Female",            510L,        2021,         "MIC",
    "Female",            510L,        2021,         "HIC",
    "Female",            510L,        2021,         "MIC",
    "Female",            674L,        2020,         "MIC",
    "Male",            745L,        2016,         "MIC",
    "Female",            784L,        2011,         "MIC",
    "Male",            510L,        2021,         "HIC",
    "Male",            469L,        2019,         "HIC",
    "Male",            674L,        2020,         "HIC",
    "Female",            674L,        2020,         "HIC",
    "Female",            674L,        2020,         "MIC",
    "Male",            510L,        2021,         "HIC",
    "Male",            674L,        2020,         "HIC",
    "Female",            469L,        2019,         "MIC",
    "Male",            510L,        2021,         "MIC",
    "Male",            674L,        2020,         "HIC",
    "Male",            489L,        2018,         "HIC",
    "Male",            489L,        2018,         "MIC",
    "Male",            489L,        2018,         "HIC",
    "Male",            489L,        2018,         "MIC",
    "Female",            674L,        2020,         "HIC",
    "Female",            510L,        2021,         "MIC",
    "Female",            674L,        2020,         "HIC",
    "Male",            510L,        2021,         "HIC",
    "Male",            674L,        2020,         "HIC",
    "Male",            510L,        2021,         "MIC",
    "Male",            469L,        2019,         "HIC",
    "Male",            489L,        2018,         "MIC",
    "Female",            510L,        2021,         "MIC",
    "Male",            674L,        2020,         "HIC"
)

# Counting cases by sex, year, and incidence and filling in missing values with
# 0 counts
count_data <- fake_data |>
    count(sex, pdato_year, tb_incidence) |>
    complete(sex = c("Female", "Male"),
                     pdato_year = 2009:2021,
                     tb_incidence = c("HIC", "MIC"),
                     fill = list(n = 0))

# Plotting data
count_data |>
    ggplot(aes(x = pdato_year, y = n ,  color = tb_incidence)) +
    geom_line() +
    geom_point() +
    scale_x_continuous(breaks = scales::breaks_extended(), labels = scales::label_date(format = "%Y")) +
    scale_y_continuous(breaks = scales::breaks_extended(), labels = scales::label_comma()) +
    scale_color_brewer(type = "Qualitative", palette = "Dark2") +
    labs(x = "\nYear",
             y = "Number of cases\n",
             colour = "Incidence") +
    facet_wrap(~sex, ncol = 1) +
    theme_minimal() +
    theme(legend.position = "bottom")

Created on 2025-04-06 with reprex v2.1.1

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.4.3 (2025-02-28)
#>  os       macOS Sequoia 15.3.2
#>  system   x86_64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       America/Toronto
#>  date     2025-04-06
#>  pandoc   3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/x86_64/ (via rmarkdown)
#>  quarto   1.3.353 @ /usr/local/bin/quarto
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package      * version date (UTC) lib source
#>  cli            3.6.4   2025-02-13 [1] RSPM (R 4.4.0)
#>  colorspace     2.1-1   2024-07-26 [1] RSPM (R 4.4.0)
#>  curl           6.2.1   2025-02-19 [1] RSPM (R 4.4.0)
#>  digest         0.6.37  2024-08-19 [1] RSPM (R 4.4.0)
#>  dplyr        * 1.1.4   2023-11-17 [1] RSPM (R 4.4.0)
#>  evaluate       1.0.3   2025-01-10 [1] RSPM (R 4.4.0)
#>  farver         2.1.2   2024-05-13 [1] RSPM (R 4.4.0)
#>  fastmap        1.2.0   2024-05-15 [1] RSPM (R 4.4.0)
#>  fs             1.6.5   2024-10-30 [1] RSPM (R 4.4.1)
#>  generics       0.1.3   2022-07-05 [1] RSPM (R 4.4.0)
#>  ggplot2      * 3.5.1   2024-04-23 [1] RSPM (R 4.4.0)
#>  glue           1.8.0   2024-09-30 [1] RSPM (R 4.4.0)
#>  gtable         0.3.6   2024-10-25 [1] RSPM (R 4.4.0)
#>  htmltools      0.5.8.1 2024-04-04 [1] RSPM (R 4.4.0)
#>  knitr          1.49    2024-11-08 [1] RSPM (R 4.4.0)
#>  labeling       0.4.3   2023-08-29 [1] RSPM (R 4.4.0)
#>  lifecycle      1.0.4   2023-11-07 [1] RSPM (R 4.4.0)
#>  magrittr       2.0.3   2022-03-30 [1] RSPM (R 4.4.0)
#>  munsell        0.5.1   2024-04-01 [1] RSPM (R 4.4.0)
#>  pillar         1.10.1  2025-01-07 [1] RSPM (R 4.4.0)
#>  pkgconfig      2.0.3   2019-09-22 [1] RSPM (R 4.4.0)
#>  purrr          1.0.4   2025-02-05 [1] RSPM (R 4.4.0)
#>  R6             2.6.1   2025-02-15 [1] RSPM (R 4.4.0)
#>  RColorBrewer   1.1-3   2022-04-03 [1] RSPM (R 4.4.0)
#>  reprex         2.1.1   2024-07-06 [1] RSPM (R 4.4.0)
#>  rlang          1.1.5   2025-01-17 [1] RSPM (R 4.4.0)
#>  rmarkdown      2.29    2024-11-04 [1] RSPM (R 4.4.1)
#>  rstudioapi     0.17.1  2024-10-22 [1] RSPM (R 4.4.0)
#>  scales         1.3.0   2023-11-28 [1] RSPM (R 4.4.0)
#>  sessioninfo    1.2.3   2025-02-05 [1] RSPM (R 4.4.0)
#>  tibble       * 3.2.1   2023-03-20 [1] RSPM (R 4.4.0)
#>  tidyr        * 1.3.1   2024-01-24 [1] RSPM (R 4.4.0)
#>  tidyselect     1.2.1   2024-03-11 [1] RSPM (R 4.4.0)
#>  vctrs          0.6.5   2023-12-01 [1] RSPM (R 4.4.0)
#>  withr          3.0.2   2024-10-28 [1] RSPM (R 4.4.0)
#>  xfun           0.51    2025-02-19 [1] RSPM (R 4.4.0)
#>  xml2           1.3.7   2025-02-28 [1] RSPM (R 4.4.0)
#>  yaml           2.3.10  2024-07-26 [1] RSPM (R 4.4.0)
#> 
#>  [1] /Users/timothychisamore/Library/R/x86_64/4.4/library
#>  [2] /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library
#>  * ── Packages attached to the search path.
#> 
#> ──────────────────────────────────────────────────────────────────────────────
1 Like

# Thank you, it is very helpful. I am also trying to add a new variable/column, which is not working. 
# Is it related to the previous error? Although the column clearly exist in my data. 

# percentage of cases based on grouping variables 
demo_data |>
  group_by(pdato_year , tb_incidence) |>
  summarise(total_cases = sum(cases_per_year)) |>
  ungroup() |>
  group_by(pdato_year) |>
  mutate(percentage_cases =(total_cases/sum(total_cases)*100))

# Error prompt 
Error in `group_by()`:
! Must group by variables found in `.data`.
βœ– Column `pdato_year` is not found.

demo_data <- tibble::tribble(
                                   ~sex, ~cases_per_year, ~pdato_year, ~tb_incidence,
                                 "Male",            510L,        2021,         "MIC",
                                 "Male",            674L,        2020,         "HIC",
                                 "Male",            469L,        2019,         "MIC",
                                 "Male",            510L,        2021,         "MIC",
                               "Female",            469L,        2019,         "HIC",
                               "Female",            745L,        2016,         "HIC",
                                 "Male",            510L,        2021,         "HIC",
                                 "Male",            469L,        2019,         "HIC",
                               "Female",            765L,        2013,         "HIC",
                               "Female",            674L,        2020,         "MIC",
                               "Female",            674L,        2020,         "HIC",
                                 "Male",            795L,        2012,         "MIC",
                                 "Male",            674L,        2020,         "HIC",
                                 "Male",            765L,        2013,         "MIC",
                                 "Male",            644L,        2009,         "HIC",
                               "Female",            489L,        2018,         "HIC",
                                 "Male",            510L,        2021,         "MIC",
                               "Female",            674L,        2020,         "HIC",
                               "Female",            644L,        2009,         "MIC",
                               "Female",            674L,        2020,         "MIC",
                               "Female",            510L,        2021,         "MIC",
                               "Female",            510L,        2021,         "HIC",
                               "Female",            510L,        2021,         "MIC",
                               "Female",            674L,        2020,         "MIC",
                                 "Male",            745L,        2016,         "MIC",
                               "Female",            784L,        2011,         "MIC",
                                 "Male",            510L,        2021,         "HIC",
                                 "Male",            469L,        2019,         "HIC",
                                 "Male",            674L,        2020,         "HIC",
                               "Female",            674L,        2020,         "HIC",
                               "Female",            674L,        2020,         "MIC",
                                 "Male",            510L,        2021,         "HIC",
                                 "Male",            674L,        2020,         "HIC",
                               "Female",            469L,        2019,         "MIC",
                                 "Male",            510L,        2021,         "MIC",
                                 "Male",            674L,        2020,         "HIC",
                                 "Male",            489L,        2018,         "HIC",
                                 "Male",            489L,        2018,         "MIC",
                                 "Male",            489L,        2018,         "HIC",
                                 "Male",            489L,        2018,         "MIC",
                               "Female",            674L,        2020,         "HIC",
                               "Female",            510L,        2021,         "MIC",
                               "Female",            674L,        2020,         "HIC",
                                 "Male",            510L,        2021,         "HIC",
                                 "Male",            674L,        2020,         "HIC",
                                 "Male",            510L,        2021,         "MIC",
                                 "Male",            469L,        2019,         "HIC",
                                 "Male",            489L,        2018,         "MIC",
                               "Female",            510L,        2021,         "MIC",
                                 "Male",            674L,        2020,         "HIC"
                               )


Created on 2025-04-07 with reprex v2.0.2

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.3.1 (2023-06-16 ucrt)
#>  os       Windows 10 x64 (build 19045)
#>  system   x86_64, mingw32
#>  ui       RTerm
#>  language (EN)
#>  collate  Norwegian BokmΓ₯l_Norway.utf8
#>  ctype    Norwegian BokmΓ₯l_Norway.utf8
#>  tz       Europe/Paris
#>  date     2025-04-07
#>  pandoc   3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  cli           3.6.1   2023-03-23 [1] CRAN (R 4.3.1)
#>  digest        0.6.33  2023-07-07 [1] CRAN (R 4.3.1)
#>  evaluate      0.22    2023-09-29 [1] CRAN (R 4.3.1)
#>  fansi         1.0.4   2023-01-22 [1] CRAN (R 4.3.1)
#>  fastmap       1.1.1   2023-02-24 [1] CRAN (R 4.3.1)
#>  fs            1.6.3   2023-07-20 [1] CRAN (R 4.3.1)
#>  glue          1.6.2   2022-02-24 [1] CRAN (R 4.3.1)
#>  htmltools     0.5.5   2023-03-23 [1] CRAN (R 4.3.1)
#>  knitr         1.43    2023-05-25 [1] CRAN (R 4.3.1)
#>  lifecycle     1.0.3   2022-10-07 [1] CRAN (R 4.3.1)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.3.1)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.3.1)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.3.1)
#>  reprex        2.0.2   2022-08-17 [1] CRAN (R 4.3.1)
#>  rlang         1.1.1   2023-04-28 [1] CRAN (R 4.3.1)
#>  rmarkdown     2.27    2024-05-17 [1] CRAN (R 4.3.3)
#>  rstudioapi    0.15.0  2023-07-07 [1] CRAN (R 4.3.1)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.3.1)
#>  tibble        3.2.1   2023-03-20 [1] CRAN (R 4.3.1)
#>  utf8          1.2.3   2023-01-31 [1] CRAN (R 4.3.1)
#>  vctrs         0.6.3   2023-06-14 [1] CRAN (R 4.3.1)
#>  withr         2.5.1   2023-09-26 [1] CRAN (R 4.3.1)
#>  xfun          0.39    2023-04-20 [1] CRAN (R 4.3.1)
#>  yaml          2.3.7   2023-01-23 [1] CRAN (R 4.3.0)
#> 
#>  [1] C:/Program Files/R/library
#>  [2] C:/Program Files/R/R-4.3.1/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────
1 Like

@machupovirus thank you again! A gentle follow up, if you could review the error prompt (which i posted earlier) in estimating percentage of cases column. Thanks

1 Like

Hello,

My apologies, I didn’t see your ask about percentages, I would calculate proportions in this way:

# loading packages
library(tibble)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

fake_data <- tribble(
    ~sex, ~cases_per_year, ~pdato_year, ~tb_incidence,
    "Male",            510L,        2021,         "MIC",
    "Male",            674L,        2020,         "HIC",
    "Male",            469L,        2019,         "MIC",
    "Male",            510L,        2021,         "MIC",
    "Female",            469L,        2019,         "HIC",
    "Female",            745L,        2016,         "HIC",
    "Male",            510L,        2021,         "HIC",
    "Male",            469L,        2019,         "HIC",
    "Female",            765L,        2013,         "HIC",
    "Female",            674L,        2020,         "MIC",
    "Female",            674L,        2020,         "HIC",
    "Male",            795L,        2012,         "MIC",
    "Male",            674L,        2020,         "HIC",
    "Male",            765L,        2013,         "MIC",
    "Male",            644L,        2009,         "HIC",
    "Female",            489L,        2018,         "HIC",
    "Male",            510L,        2021,         "MIC",
    "Female",            674L,        2020,         "HIC",
    "Female",            644L,        2009,         "MIC",
    "Female",            674L,        2020,         "MIC",
    "Female",            510L,        2021,         "MIC",
    "Female",            510L,        2021,         "HIC",
    "Female",            510L,        2021,         "MIC",
    "Female",            674L,        2020,         "MIC",
    "Male",            745L,        2016,         "MIC",
    "Female",            784L,        2011,         "MIC",
    "Male",            510L,        2021,         "HIC",
    "Male",            469L,        2019,         "HIC",
    "Male",            674L,        2020,         "HIC",
    "Female",            674L,        2020,         "HIC",
    "Female",            674L,        2020,         "MIC",
    "Male",            510L,        2021,         "HIC",
    "Male",            674L,        2020,         "HIC",
    "Female",            469L,        2019,         "MIC",
    "Male",            510L,        2021,         "MIC",
    "Male",            674L,        2020,         "HIC",
    "Male",            489L,        2018,         "HIC",
    "Male",            489L,        2018,         "MIC",
    "Male",            489L,        2018,         "HIC",
    "Male",            489L,        2018,         "MIC",
    "Female",            674L,        2020,         "HIC",
    "Female",            510L,        2021,         "MIC",
    "Female",            674L,        2020,         "HIC",
    "Male",            510L,        2021,         "HIC",
    "Male",            674L,        2020,         "HIC",
    "Male",            510L,        2021,         "MIC",
    "Male",            469L,        2019,         "HIC",
    "Male",            489L,        2018,         "MIC",
    "Female",            510L,        2021,         "MIC",
    "Male",            674L,        2020,         "HIC"
)

# Counting cases by sex, year, and incidence and filling in missing values with
# 0 counts
count_data <- fake_data |>
    count(sex, pdato_year, tb_incidence) |>
    complete(sex = c("Female", "Male"),
                     pdato_year = 2009:2021,
                     tb_incidence = c("HIC", "MIC"),
                     fill = list(n = 0)) |>
    mutate(prop = n / sum(n), .by = c(sex, pdato_year))

Created on 2025-04-08 with reprex v2.1.1

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.4.3 (2025-02-28)
#>  os       macOS Sequoia 15.3.2
#>  system   x86_64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       America/Toronto
#>  date     2025-04-08
#>  pandoc   3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/x86_64/ (via rmarkdown)
#>  quarto   1.3.353 @ /usr/local/bin/quarto
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  cli           3.6.4   2025-02-13 [1] RSPM (R 4.4.0)
#>  colorspace    2.1-1   2024-07-26 [1] RSPM (R 4.4.0)
#>  digest        0.6.37  2024-08-19 [1] RSPM (R 4.4.0)
#>  dplyr       * 1.1.4   2023-11-17 [1] RSPM (R 4.4.0)
#>  evaluate      1.0.3   2025-01-10 [1] RSPM (R 4.4.0)
#>  fastmap       1.2.0   2024-05-15 [1] RSPM (R 4.4.0)
#>  fs            1.6.5   2024-10-30 [1] RSPM (R 4.4.1)
#>  generics      0.1.3   2022-07-05 [1] RSPM (R 4.4.0)
#>  ggplot2     * 3.5.1   2024-04-23 [1] RSPM (R 4.4.0)
#>  glue          1.8.0   2024-09-30 [1] RSPM (R 4.4.0)
#>  gtable        0.3.6   2024-10-25 [1] RSPM (R 4.4.0)
#>  htmltools     0.5.8.1 2024-04-04 [1] RSPM (R 4.4.0)
#>  knitr         1.49    2024-11-08 [1] RSPM (R 4.4.0)
#>  lifecycle     1.0.4   2023-11-07 [1] RSPM (R 4.4.0)
#>  magrittr      2.0.3   2022-03-30 [1] RSPM (R 4.4.0)
#>  munsell       0.5.1   2024-04-01 [1] RSPM (R 4.4.0)
#>  pillar        1.10.1  2025-01-07 [1] RSPM (R 4.4.0)
#>  pkgconfig     2.0.3   2019-09-22 [1] RSPM (R 4.4.0)
#>  purrr         1.0.4   2025-02-05 [1] RSPM (R 4.4.0)
#>  R6            2.6.1   2025-02-15 [1] RSPM (R 4.4.0)
#>  reprex        2.1.1   2024-07-06 [1] RSPM (R 4.4.0)
#>  rlang         1.1.5   2025-01-17 [1] RSPM (R 4.4.0)
#>  rmarkdown     2.29    2024-11-04 [1] RSPM (R 4.4.1)
#>  rstudioapi    0.17.1  2024-10-22 [1] RSPM (R 4.4.0)
#>  scales        1.3.0   2023-11-28 [1] RSPM (R 4.4.0)
#>  sessioninfo   1.2.3   2025-02-05 [1] RSPM (R 4.4.0)
#>  tibble      * 3.2.1   2023-03-20 [1] RSPM (R 4.4.0)
#>  tidyr       * 1.3.1   2024-01-24 [1] RSPM (R 4.4.0)
#>  tidyselect    1.2.1   2024-03-11 [1] RSPM (R 4.4.0)
#>  vctrs         0.6.5   2023-12-01 [1] RSPM (R 4.4.0)
#>  withr         3.0.2   2024-10-28 [1] RSPM (R 4.4.0)
#>  xfun          0.51    2025-02-19 [1] RSPM (R 4.4.0)
#>  yaml          2.3.10  2024-07-26 [1] RSPM (R 4.4.0)
#> 
#>  [1] /Users/timothychisamore/Library/R/x86_64/4.4/library
#>  [2] /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library
#>  * ── Packages attached to the search path.
#> 
#> ──────────────────────────────────────────────────────────────────────────────

All the best,

Tim

1 Like

Thank you so much for your awesome support, grateful! :slight_smile: Many best,

1 Like