cat_tab <- function(x, var, digits = 2, na.rm = TRUE, drop_empty = TRUE, trim = TRUE){ x <- as.character(x) if (trim) x <- trimws(x) if (na.rm) x <- x[!is.na(x)] if (drop_empty) x <- x[x != ""] tab <- as.data.frame(table(x), stringsAsFactors = FALSE) names(tab) <- c(var, "n") tab$n <- as.integer(tab$n) tab <- tab[order(-tab$n, tab[[var]]), , drop = FALSE] tab$prc <- round(tab$n / sum(tab$n) * 100, digits) tab } make_key <- function(z){ z2 <- iconv(z, from = "", to = "ASCII//TRANSLIT") if (is.na(z2)) z2 <- z z2 <- tolower(z2) z2 <- gsub("[^a-z0-9]+", "_", z2) z2 <- gsub("^_+|_+$", "", z2) z2 } cat_add <- function(lista, x, var, type = c("levels", "topk"), levels = NULL, k = Inf, digits = 2, na.rm = TRUE, drop_empty = TRUE, trim = TRUE, sep = "_", prefix_label = "var", add_nvalid = TRUE){ type <- match.arg(type) tab <- cat_tab(x, var = var, digits = digits, na.rm = na.rm, drop_empty = drop_empty, trim = trim) if (add_nvalid) { lista[[paste0("nvalid", sep, var)]] <- sum(tab$n) } if (type == "levels") { if (is.null(levels)) levels <- tab[[var]] for (lv in levels) { i <- match(lv, tab[[var]]) key <- make_key(lv) lista[[paste0(var, sep, key, sep, "n")]] <- if (is.na(i)) NA_integer_ else tab$n[i] lista[[paste0(var, sep, key, sep, "prc")]] <- if (is.na(i)) NA_real_ else tab$prc[i] } } else { # topk if (is.infinite(k)) k <- nrow(tab) k <- min(as.integer(k), nrow(tab)) suffix <- paste0(sep, var) for (j in seq_len(k)) { lista[[paste0(prefix_label, j, suffix)]] <- tab[[var]][j] lista[[paste0("n", j, suffix)]] <- tab$n[j] lista[[paste0("prc", j, suffix)]] <- tab$prc[j] } } lista }