R/remove_columns_based_on_NA.R
remove_columns_based_on_NA.RdRemove columns based on NA values
remove_columns_based_on_NA(data, threshold = 0.5)A data frame with columns removed if they have more than the specified threshold of NA values
# Create sample data frame with NA values
df <- data.frame(
a = c(1, 2, NA, 4, 5),
b = c(NA, NA, NA, 4, 5),
c = c(1, 2, 3, NA, 5)
)
# Remove columns with more than 50% NA values
remove_columns_based_on_NA(df)
#> a c
#> 1 1 1
#> 2 2 2
#> 3 NA 3
#> 4 4 NA
#> 5 5 5
# Use stricter threshold of 10% NA values
remove_columns_based_on_NA(df, threshold = 0.1)
#> data frame with 0 columns and 5 rows