Transform a data set into an n x m table, e.g. to be used in certestats::confusion_matrix()
.
crosstab(
df,
identifier,
compare,
outcome,
positive = "^pos.*",
negative = "^neg.*",
...,
na.rm = TRUE,
ignore_case = TRUE
)
a column name to use as identifier, such as a patient ID or an order ID
a column name for the two axes of the table: the labels between the outcomes must be compared
a column name containing the outcome values to compare
a regex to match the values in outcome
that must be considered as the Positive class, use FALSE
to not use a Positive class
a regex to match the values in outcome
that must be considered as the Negative class, use FALSE
to not use a Negative class
manual regexes for classes if not using positive
and negative
, such as Class1 = "c1", Class2 = "c2", Class3 = "c3"
a logical to indicate whether empty values must be removed before forming the table
a logical to indicate whether the case in the values of positive
, negative
and ...
must be ignored
df <- data.frame(
order_nr = sort(rep(LETTERS[1:20], 2)),
test_type = rep(c("Culture", "PCR"), 20),
result = sample(c("pos", "neg"),
size = 40,
replace = TRUE,
prob = c(0.3, 0.9))
)
head(df)
#> order_nr test_type result
#> 1 A Culture neg
#> 2 A PCR pos
#> 3 B Culture neg
#> 4 B PCR neg
#> 5 C Culture neg
#> 6 C PCR neg
out <- df |> crosstab(order_nr, test_type, result)
out
#> PCR
#> Culture Positive Negative
#> Positive 0 2
#> Negative 4 14
df$result <- gsub("pos", "#p", df$result)
df$result <- gsub("neg", "#n", df$result)
head(df)
#> order_nr test_type result
#> 1 A Culture #n
#> 2 A PCR #p
#> 3 B Culture #n
#> 4 B PCR #n
#> 5 C Culture #n
#> 6 C PCR #n
# gives a warning that pattern matching failed:
df |> crosstab(order_nr, test_type, result)
#> Warning: Check the regular expressions in the 'positive' and 'negative' arguments - they are not both matched
#> PCR
#> Culture Positive Negative
#> Positive 0 0
#> Negative 0 0
# define the pattern yourself in such case:
df |> crosstab(order_nr, test_type, result,
positive = "#p",
negative = "#n")
#> PCR
#> Culture Positive Negative
#> Positive 0 2
#> Negative 4 14
# defining classes manually, can be more than 2:
df |> crosstab(order_nr, test_type, result,
ClassA = "#p", Hello = "#n")
#> PCR
#> Culture ClassA Hello
#> ClassA 0 2
#> Hello 4 14
if ("certestats" %in% rownames(utils::installed.packages())) {
certestats::confusion_matrix(out)
}
#>
#> ── Confusion Matrix ────────────────────────────────────────────────────────────
#>
#> Positive Negative
#> Positive 0 2
#> Negative 4 14
#>
#>
#> ── Model Metrics ───────────────────────────────────────────────────────────────
#>
#> Accuracy 0.700
#> Area under the Precision Recall Curve (APRC) 0.125
#> Area under the Receiver Operator Curve (AROC) 0.611
#> Balanced Accuracy 0.389
#> Brier Score for Classification Models (BSCM) 3.100
#> Costs Function for Poor Classification (CFPC) 1.500
#> F Measure 0.000
#> Gain Capture 0.222
#> J-Index -0.222
#> Kappa -0.154
#> Matthews Correlation Coefficient (MCC) -0.167
#> Mean log Loss for Multinomial Data (MLMD) 32.439
#> Negative Predictive Value (NPV) 0.875
#> Positive Predictive Value (PPV) 0.000
#> Precision 0.000
#> Prevalence 0.200
#> Recall 0.000
#> Sensitivity 0.000
#> Specificity 0.778