Hi @shoaibraee, thanks for the reproducible example - it is very helpful. Below are two things that may help you:
1) I inserted a line using as.numeric()
to convert your column to class numeric, right before the case_when()
command. The command now runs without error. There was no data in the amnr_test_unit
column however, so it is not obvious what the result will be.
pacman::p_load(rio, lubridate, datapasta, reprex, tidyverse)
demo_data <- tibble::tribble(~table2_dur_of_amenorrhea_female, ~table2_vomiting3,
"0", "0",
NA, "0",
NA, "0",
"0", "0",
NA, "0",
NA, "0",
"0", "0",
"0", "0",
NA, "0",
NA, "0",
NA, "0",
NA, "0",
NA, "0",
NA, "0",
"0", "0",
NA, "0",
NA, "0",
NA, "0",
NA, "2 weeks",
"0", "0"
)
# Checking the class of columns
class(demo_data$table2_dur_of_amenorrhea_female)
#> [1] "character"
class(demo_data$table2_vomiting3)
#> [1] "character"
# Splitting column into two and converting to days
# QUESTION: can we also split the other variable "table2_vomiting3" in the same line below
Amnr_test <- str_split_fixed(demo_data$table2_dur_of_amenorrhea_female , " " , 2)
# Binding demo data and newly created variables
demo_data <- bind_cols(demo_data , Amnr_test)
#> New names:
#> • `` -> `...3`
#> • `` -> `...4`
names(demo_data)
#> [1] "table2_dur_of_amenorrhea_female" "table2_vomiting3"
#> [3] "...3" "...4"
# Renaming the newly created split columns
names(demo_data)[3:4] = c("Amnr_test_numeric" , "Amnr_test_unit")
# Cross checking that newly created naming went well
names(demo_data)
#> [1] "table2_dur_of_amenorrhea_female" "table2_vomiting3"
#> [3] "Amnr_test_numeric" "Amnr_test_unit"
# Question: Please see if you recommend this code to change the class. I found it online. But this cannot force character to numeric if there are characters entered among the numeric variables.
#demo_data <- type.convert(demo_data , as.is=TRUE)
unique(demo_data$Amnr_test_numeric)
#> [1] "0" NA
unique(demo_data$Amnr_test_unit)
#> [1] ""
class(demo_data$Amnr_test_numeric)
#> [1] "character"
# !!! Add this step to convert the column call from character to numeric
demo_data <- demo_data %>%
mutate(Amnr_test_numeric = as.numeric(Amnr_test_numeric))
demo_data %>%
mutate(Amnr_test_new = case_when(
Amnr_test_unit == "month" ~ Amnr_test_numeric * 30 ,
Amnr_test_unit == "Month" ~ Amnr_test_numeric * 30 ,
Amnr_test_unit == "Month(menopause)" ~ Amnr_test_numeric * 30 ,
Amnr_test_unit == "(post menopausal)" ~ Amnr_test_numeric ,
Amnr_test_unit == "Year" ~ Amnr_test_numeric *365 ,
# Amnr_test_numeric == "NO" ~ "NA",
# Amnr_test_numeric == NA ~ NA ,
# Amnr_test_numeric == "na" ~ "NA",
# Amnr_test_unit == "" ~ Amnr_test_numeric,
# is.na(Amnr_test_unit) ~ Amnr_test_numeric
))
#> # A tibble: 20 × 5
#> table2_dur_of_amenorrhea_…¹ table2_vomiting3 Amnr_test_numeric Amnr_test_unit
#> <chr> <chr> <dbl> <chr>
#> 1 0 0 0 ""
#> 2 <NA> 0 NA ""
#> 3 <NA> 0 NA ""
#> 4 0 0 0 ""
#> 5 <NA> 0 NA ""
#> 6 <NA> 0 NA ""
#> 7 0 0 0 ""
#> 8 0 0 0 ""
#> 9 <NA> 0 NA ""
#> 10 <NA> 0 NA ""
#> 11 <NA> 0 NA ""
#> 12 <NA> 0 NA ""
#> 13 <NA> 0 NA ""
#> 14 <NA> 0 NA ""
#> 15 0 0 0 ""
#> 16 <NA> 0 NA ""
#> 17 <NA> 0 NA ""
#> 18 <NA> 0 NA ""
#> 19 <NA> 2 weeks NA ""
#> 20 0 0 0 ""
#> # ℹ abbreviated name: ¹table2_dur_of_amenorrhea_female
#> # ℹ 1 more variable: Amnr_test_new <dbl>
攀x
#> Error in eval(expr, envir, enclos): object '攀x' not found
Created on 2023-10-14 with reprex v2.0.2
2) I tried to understand what you want to do, and provide some alternative code here to split a column using separate()
, and then convert to days using your case_when code. I hope it is helpful and perhaps more efficient.
pacman::p_load(rio, lubridate, datapasta, reprex, tidyverse)
demo_data <- tibble::tribble(~table2_vomiting3,
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"0",
"2 weeks",
"0"
)
# Split a column into two other columns
demo_data <- demo_data %>% # re-define your dataset
separate(col = table2_vomiting3, # the column to split
into = c("vom_duration", "vom_unit"), # create two new columns
sep = " ", # split the value at space
extra = "merge", # if more values then expected, keep
remove = FALSE # keep the original column for records
) %>%
mutate(vom_duration = as.numeric(vom_duration)) # convert duration column to numeric
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 19 rows [1, 2,
#> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20].
# Apply your case_when() logic to result in days
demo_data <- demo_data %>%
mutate(vom_duration_days = case_when(
vom_unit %in% c("week", "weeks") ~ vom_duration * 7, # I added this about week - please check if you want to keep
# Look for the word "month"
str_detect(vom_unit, "month|Month|months|Months") ~ vom_duration * 30, # be careful using 30 as a proxy for month! Not exact
vom_unit == "(post menopausal)" ~ vom_duration, # I am not sure what you are doing here
vom_unit == "Year" ~ vom_duration * 365
))
demo_data
#> # A tibble: 20 × 4
#> table2_vomiting3 vom_duration vom_unit vom_duration_days
#> <chr> <dbl> <chr> <dbl>
#> 1 0 0 <NA> NA
#> 2 0 0 <NA> NA
#> 3 0 0 <NA> NA
#> 4 0 0 <NA> NA
#> 5 0 0 <NA> NA
#> 6 0 0 <NA> NA
#> 7 0 0 <NA> NA
#> 8 0 0 <NA> NA
#> 9 0 0 <NA> NA
#> 10 0 0 <NA> NA
#> 11 0 0 <NA> NA
#> 12 0 0 <NA> NA
#> 13 0 0 <NA> NA
#> 14 0 0 <NA> NA
#> 15 0 0 <NA> NA
#> 16 0 0 <NA> NA
#> 17 0 0 <NA> NA
#> 18 0 0 <NA> NA
#> 19 2 weeks 2 weeks 14
#> 20 0 0 <NA> NA
Created on 2023-10-14 with reprex v2.0.2