Hi, I would like to ask for advice using str_remove(). Ideally I would like to remove multiple patterns e.g. c(“ale”, “emale”) in the example below in one-go so I can code more efficiently.
Any help is appreciated. Thanks.
# load package
pacman::p_load(rio, janitor, datapasta, reprex, tidyverse)
# sample data
sample_case <- data.frame(
stringsAsFactors = FALSE,
pid = c("9174d755abce7fb7",
"469fd41756126126","9fb6d7c3a1be6987",
"6b2c6f9d5980edb8",
"4c3ebec8e4794bee","4c3ebec8e4794bee",
"53cce1871a2312a3",
"2b6134374e2562cf","2b6134374e2562cf",
"dc1f54eccf6a0588",
"31efca4ba6f5a9c3",
"fce6873a45ec882c","921adccf06d6655a",
"921adccf06d6655a",
"0a7c27f26b2cc7e4","dc8c3242bb6feb18",
"dc8c3242bb6feb18",
"1f85240bbee6f7ca","20f66ccc3c631640",
"8c8b8aa37887e5c9"),
case_gender = c("Male","Female","Female",
"Male","Female","Female","Male",
"Female","Female",
"Female","Male","Female","Female",
"Female","Male","Male",
"Male","Male","Female",
"Female"))
# data cleaning
sample_case = sample_case %>%
mutate(gender_short = str_remove(case_gender, "ale"),
gender_short = str_remove(gender_short, "em"))
sample_case
#> pid case_gender gender_short
#> 1 9174d755abce7fb7 Male M
#> 2 469fd41756126126 Female F
#> 3 9fb6d7c3a1be6987 Female F
#> 4 6b2c6f9d5980edb8 Male M
#> 5 4c3ebec8e4794bee Female F
#> 6 4c3ebec8e4794bee Female F
#> 7 53cce1871a2312a3 Male M
#> 8 2b6134374e2562cf Female F
#> 9 2b6134374e2562cf Female F
#> 10 dc1f54eccf6a0588 Female F
#> 11 31efca4ba6f5a9c3 Male M
#> 12 fce6873a45ec882c Female F
#> 13 921adccf06d6655a Female F
#> 14 921adccf06d6655a Female F
#> 15 0a7c27f26b2cc7e4 Male M
#> 16 dc8c3242bb6feb18 Male M
#> 17 dc8c3242bb6feb18 Male M
#> 18 1f85240bbee6f7ca Male M
#> 19 20f66ccc3c631640 Female F
#> 20 8c8b8aa37887e5c9 Female F