The plot2() function is a convenient wrapper around many ggplot2 functions. By design, the ggplot2 package requires users to use a lot of functions and manual settings, while the plot2() function does all the heavy lifting automatically and only requires users to define some arguments in one single function, greatly increases convenience.

Moreover, plot2() allows for in-place calculation of y, all axes, and all axis labels, often preventing the need to use group_by(), count(), mutate(), or summarise().

See plot2-methods for all implemented methods for different object classes.

plot2(
  .data,
  x = NULL,
  y = NULL,
  category = NULL,
  facet = NULL,
  type = NULL,
  x.title = TRUE,
  y.title = TRUE,
  category.title = NULL,
  title = NULL,
  subtitle = NULL,
  caption = NULL,
  tag = NULL,
  title.linelength = 60,
  title.colour = getOption("plot2.colour_font_primary", "black"),
  subtitle.linelength = 60,
  subtitle.colour = getOption("plot2.colour_font_secondary", "grey35"),
  na.replace = "",
  na.rm = FALSE,
  facet.position = "top",
  facet.fill = NULL,
  facet.bold = TRUE,
  facet.italic = FALSE,
  facet.size = 10,
  facet.margin = 8,
  facet.repeat_lbls_x = TRUE,
  facet.repeat_lbls_y = TRUE,
  facet.fixed_y = NULL,
  facet.fixed_x = TRUE,
  facet.drop = FALSE,
  facet.nrow = NULL,
  facet.relative = FALSE,
  x.date_breaks = NULL,
  x.date_labels = NULL,
  x.date_remove_years = NULL,
  category.focus = NULL,
  colour = getOption("plot2.colour", "ggplot2"),
  colour_fill = NULL,
  colour_opacity = 0,
  x.lbl_angle = 0,
  x.lbl_align = NULL,
  x.lbl_italic = FALSE,
  x.lbl_taxonomy = FALSE,
  x.remove = FALSE,
  x.position = "bottom",
  x.max_items = Inf,
  x.max_txt = "(rest, x%n)",
  category.max_items = Inf,
  category.max_txt = "(rest, x%n)",
  facet.max_items = Inf,
  facet.max_txt = "(rest, x%n)",
  x.breaks = NULL,
  x.n_breaks = NULL,
  x.trans = "identity",
  x.expand = NULL,
  x.limits = NULL,
  x.labels = NULL,
  x.character = NULL,
  x.drop = FALSE,
  x.mic = FALSE,
  x.zoom = FALSE,
  y.remove = FALSE,
  y.24h = FALSE,
  y.age = FALSE,
  y.scientific = NULL,
  y.percent = FALSE,
  y.percent_break = 0.1,
  y.breaks = NULL,
  y.n_breaks = NULL,
  y.limits = NULL,
  y.labels = NULL,
  y.expand = NULL,
  y.trans = "identity",
  y.position = "left",
  y.zoom = FALSE,
  y_secondary = NULL,
  y_secondary.type = type,
  y_secondary.title = TRUE,
  y_secondary.colour = colour,
  y_secondary.colour_fill = colour_fill,
  y_secondary.scientific = NULL,
  y_secondary.percent = FALSE,
  y_secondary.labels = NULL,
  category.labels = NULL,
  category.percent = FALSE,
  category.breaks = NULL,
  category.limits = NULL,
  category.expand = 0,
  category.midpoint = NULL,
  category.trans = "identity",
  category.date_breaks = NULL,
  category.date_labels = NULL,
  category.character = NULL,
  x.sort = NULL,
  category.sort = TRUE,
  facet.sort = TRUE,
  x.complete = NULL,
  category.complete = NULL,
  facet.complete = NULL,
  datalabels = TRUE,
  datalabels.round = ifelse(y.percent, 2, 1),
  datalabels.format = "%n",
  datalabels.colour = "grey25",
  datalabels.colour_fill = NULL,
  datalabels.size = (3 * text_factor),
  datalabels.angle = 0,
  datalabels.lineheight = 1,
  decimal.mark = dec_mark(),
  big.mark = big_mark(),
  summarise_function = base::sum,
  stacked = FALSE,
  stackedpercent = FALSE,
  horizontal = FALSE,
  reverse = horizontal,
  smooth = NULL,
  smooth.method = NULL,
  smooth.formula = NULL,
  smooth.se = TRUE,
  smooth.level = 0.95,
  smooth.alpha = 0.25,
  smooth.linewidth = 0.75,
  smooth.linetype = 3,
  smooth.colour = NULL,
  size = NULL,
  linetype = 1,
  linewidth = NULL,
  binwidth = NULL,
  width = NULL,
  jitter_seed = NA,
  violin_scale = "count",
  legend.position = NULL,
  legend.title = NULL,
  legend.reverse = FALSE,
  legend.barheight = 6,
  legend.barwidth = 1.5,
  legend.nbin = 300,
  legend.italic = FALSE,
  sankey.node_width = 0.15,
  sankey.node_whitespace = 0.03,
  sankey.alpha = 0.5,
  sankey.remove_axes = NULL,
  zoom = FALSE,
  sep = " / ",
  print = FALSE,
  text_factor = 1,
  font = getOption("plot2.font"),
  theme = getOption("plot2.theme", "theme_minimal2"),
  background = getOption("plot2.colour_background", "white"),
  markdown = TRUE,
  ...
)

Arguments

.data

data to plot

x

plotting 'direction' for the x axis. This can be:

  • A single variable from .data, such as x = column1

  • A function to calculate over one or more variables from .data, such as x = format(column1, "%Y"), or x = ifelse(column1 == "A", "Group A", "Other")

  • Multiple variables from .data, such as x = c(column1, column2, column2), or using selection helpers such as x = where(is.character) or x = starts_with("var_") (only allowed and required for Sankey plots using type = "sankey")

y

values to use for plotting along the y axis. This can be:

  • A single variable from .data, such as y = column1

  • Multiple variables from .data, such as y = c(column1, column2) or y = c(name1 = column1, "name 2" = column2), or using selection helpers such as y = where(is.double) or y = starts_with("var_") (multiple variables only allowed if category is not set)

  • A function to calculate over .data returning a single value, such as y = n() for the row count, or based on other variables such as y = n_distinct(person_id), y = max(column1), or y = median(column2) / column3

  • A function to calculate over .data returning multiple values, such as y = quantile(column1, c(0.25, 0.75)) or y = range(age) (multiple values only allowed if category is not set)

category, facet

plotting 'direction' (category is called 'fill' and 'colour' in ggplot2). This can be:

  • A single variable from .data, such as category = column1

  • A function to calculate over one or more variables from .data, such as category = median(column2) / column3, or facet = ifelse(column1 == "A", "Group A", "Other")

  • Multiple variables from .data, such as facet = c(column1, column2) (use sep to control the separator character)

  • One or more variables from .data using selection helpers, such as category = where(is.double) or facet = starts_with("var_")

The category can also be a date or date/time (class Date or POSIXt).

type, y_secondary.type

type of visualisation to use. This can be:

  • A ggplot2 geom name or their abbreviation such as "col" and "point". All geoms are supported (including geom_blank()).

    Full function names can be used (e.g., "geom_histogram"), but they can also be abbreviated (e.g., "h", "hist"). The following geoms can be abbreviated by their first character: area ("a"), boxplot ("b"), column ("c"), histogram ("h"), jitter ("j"), line ("l"), point ("p"), ribbon ("r"), and violin ("v").

    Please note: in ggplot2, 'bars' and 'columns' are equal, while it is common to many people that 'bars' are oriented horizontally and 'columns' are oriented vertically since Microsoft Excel has been using these terms this way for many years. For this reason, type = "bar" will set type = "col" and horizontal = TRUE.

  • One of these additional types:

    • "barpercent" (short: "bp"), which is effectively a shortcut to set type = "col" and horizontal = TRUE and x.max_items = 10 and x.sort = "freq-desc" and datalabels.format = "%n (%p)".

    • "linedot" (short: "ld"), which sets type = "line" and adds two point geoms using add_point(); one with large white dots and one with smaller dots using the colours set in colour. This is essentially equal to base R plot(..., type = "b") but with closed shapes.

    • "dumbbell" (short: "d"), which sets type = "point" and horizontal = TRUE, and adds a line between the points (using geom_segment()). The line colour cannot be changed. This plot type is only possible when the category has two distinct values.

    • "sankey" (short: "s") creates a Sankey plots using category for the flows and requires x to contain multiple variables from .data. At default, it also sets x.expand = c(0.05, 0.05) and y.limits = c(NA, NA) and y.expand = c(0.01, 0.01). The so-called nodes (the 'blocks' with text) are considered the datalabels, so you can set the text size and colour of the nodes using datalabels.size, datalabels.colour, and datalabels.colour_fill. The transparency of the flows can be set using sankey.alpha, and the width of the nodes can be set using sankey.node_width. Sankey plots can also be flipped using horizontal = TRUE.

  • Left blank. In this case, the type will be determined automatically: "boxplot" if there is no x axis or if the length of unique values per x axis item is at least 3, "point" if both the y and x axes are numeric, and the option "plot2.default_type" otherwise (which defaults to "col"). Use type = "blank" or type = "geom_blank" to not add a geom.

title, subtitle, caption, tag, x.title, y.title, category.title, legend.title, y_secondary.title

a title to use. This can be:

  • A character, which supports markdown by using md_to_expression() internally if markdown = TRUE (which is the default)

  • A function to calculate over .data, such as title = paste("Based on n =", n_distinct(person_id), "individuals") or subtitle = paste("Total rows:", n()), see Examples

  • An expression, e.g. using parse(text = "...")

The title will be guessed with get_plot_title() when left blank.

The category.title defaults to TRUE if the legend items are numeric.

title.linelength

maximum number of characters per line in the title, before a linebreak occurs

title.colour

text colour of the title

subtitle.linelength

maximum number of characters per line in the subtitle, before a linebreak occurs

subtitle.colour

text colour of the subtitle

na.replace

character to put in place of NA values if na.rm = FALSE

na.rm

remove NA values from showing in the plot

facet.position, facet.fill, facet.bold, facet.italic, facet.size, facet.margin, facet.repeat_lbls_x, facet.repeat_lbls_y, facet.drop, facet.nrow, facet.relative

additional settings for the plotting direction facet

facet.fixed_y

a logical to indicate whether all y scales should have the same limits. Defaults to TRUE only if the coefficient of variation (sd divided by mean) of the maximum values of y is less than 15%.

facet.fixed_x

a logical to indicate whether all x scales should have the same breaks. This acts like the inverse of x.drop.

x.date_breaks

breaks to use when the x axis contains dates, will be determined automatically if left blank. This accepts values such as "1 day" and "2 years".

x.date_labels

labels to use when the x axis contains dates, will be determined automatically if left blank. This accepts 'Excel' date-language such as "d mmmm yyyy".

x.date_remove_years

a logical to indicate whether the years of all x values must be unified. This will set the years of all x values to 1970 if the data does not contain a leap year, and to 1972 otherwise. This allows to plot years on the category while maintaining a date range on x. The default is FALSE, unless category contains all years present in x.

category.focus

a value of category that should be highlighted, meaning that all other values in category will be greyed out. This can also be a numeric value between 1 and the length of unique values of category, e.g. category.focus = 2 to focus on the second legend item.

colour

colour(s) to set, will be evaluated with colourpicker() if set. This can also be one of the viridis colours with automatic implementation for any plot: "viridis", "magma", "inferno", "plasma", "cividis", "rocket", "mako" or "turbo". Also, this can also be a named vector to match values of category, see Examples. Using a named vector can also be used to manually sort the values of category.

colour_fill

colour(s) to be used for filling, will be determined automatically if left blank and will be evaluated with colourpicker()

colour_opacity

amount of opacity for colour/colour_fill (0 = solid, 1 = transparent)

x.lbl_angle

angle to use for the x axis in a counter-clockwise direction (i.e., a value of 90 will orient the axis labels from bottom to top, a value of 270 will orient the axis labels from top to bottom)

x.lbl_align

alignment for the x axis between 0 (left aligned) and 1 (right aligned)

x.lbl_italic

logical to indicate whether the x labels should in in italics

x.lbl_taxonomy

a logical to transform all words of the x labels into italics that are in the microorganisms data set of the AMR package. This uses md_to_expression() internally and will set x.labels to parse expressions.

x.remove, y.remove

a logical to indicate whether the axis labels and title should be removed

x.position, y.position

position of the axis

x.max_items, category.max_items, facet.max_items

number of maximum items to use, defaults to infinite. All other values will be grouped and summarised using the summarise_function function. Please note: the sorting will be applied first, allowing to e.g. plot the top n most frequent values of the x axis by combining x.sort = "freq-desc" with x.max_items = n.

x.max_txt, category.max_txt, facet.max_txt

the text to use of values not included number of *.max_items. The placeholder %n will be replaced with the outcome of the summarise_function function, the placeholder %p will be replaced with the percentage.

x.breaks, y.breaks

a breaks function or numeric vector to use for the axis

x.n_breaks, y.n_breaks

number of breaks, only useful if x.breaks cq. y.breaks is NULL

x.trans, y.trans, category.trans

a transformation function to use, e.g. "log2". This can be: "asinh", "asn", "atanh", "boxcox", "compose", "date", "exp", "hms", "identity", "log", "log10", "log1p", "log2", "logit", "modulus", "probability", "probit", "pseudo_log", "reciprocal", "reverse", "sqrt", "time", "timespan", "yj".

x.expand, y.expand

expansion to use for the axis, can be length 1 or 2. x.expand defaults to 0.5 and y.expand defaults to 0.25, except for sf objects (then both default to 0).

x.limits, y.limits

limits to use for the axis, can be length 1 or 2. Use NA for the highest or lowest value in the data, e.g. y.limits = c(0, NA) to have the y scale start at zero.

x.labels, y.labels, y_secondary.labels

a labels function or character vector to use for the axis

x.character

a logical to indicate whether the values of the x axis should be forced to character. The default is FALSE, except for years (values between 2000 and 2050) and months (values from 1 to 12).

x.drop

logical to indicate whether factor levels should be dropped

x.mic

logical to indicate whether the x axis should be formatted as MIC values, by dropping all factor levels and adding missing factors of 2

x.zoom, y.zoom

a logical to indicate if the axis should be zoomed on the data, by setting x.limits = c(NA, NA) and x.expand = 0 for the x axis, or y.limits = c(NA, NA) and y.expand = 0 for the y axis

y.24h

a logical to indicate whether the y labels and breaks should be formatted as 24-hour sequences

y.age

a logical to indicate whether the y labels and breaks should be formatted as ages in years

y.scientific, y_secondary.scientific

a logical to indicate whether the y labels should be formatted in scientific notation, using format2_scientific(). Defaults to TRUE only if the range of the y values spans more than 10e5.

y.percent, y_secondary.percent

a logical to indicate whether the y labels should be formatted as percentages

y.percent_break

a value on which the y axis should have breaks

y_secondary

values to use for plotting along the secondary y axis. This functionality is poorly supported by ggplot2 and might give unexpected results. Setting the secondary y axis will set the colour to the axis titles.

y_secondary.colour, y_secondary.colour_fill

colours to set for the secondary y axis, will be evaluated with colourpicker()

category.labels, category.percent, category.breaks, category.expand, category.midpoint

settings for the plotting direction category.

category.limits

limits to use for a numeric category, can be length 1 or 2. Use NA for the highest or lowest value in the data, e.g. category.limits = c(0, NA) to have the scale start at zero.

category.date_breaks

breaks to use when the category contains dates, will be determined automatically if left blank. This will be passed on to seq.Date(by = ...) and thus can be: a number, taken to be in days, or a character string containing one of "day", "week", "month", "quarter" or "year" (optionally preceded by an integer and a space, and/or followed by "s").

category.date_labels

labels to use when the category contains dates, will be determined automatically if left blank. This accepts 'Excel' date-language such as "d mmmm yyyy".

category.character

a logical to indicate whether the values of the category should be forced to character. The default is FALSE, except for years (values between 2000 and 2050) and months (values from 1 to 12).

x.sort, category.sort, facet.sort

sorting of the plotting direction, defaults to TRUE, except for continuous values on the x axis (such as dates and numbers). Applying one of the sorting methods will transform the values to an ordered factor, which ggplot2 uses to orient the data. Valid values are:

  • A manual vector of values

  • TRUE: sort factors on their levels, otherwise sort ascending on alphabet, while maintaining numbers in the text (numeric sort)

  • FALSE: sort according to the order in the data

  • NULL: do not sort/transform at all

  • "asc" or "alpha": sort as TRUE

  • "desc": sort factors on their reversed levels, otherwise sort descending on alphabet, while maintaining numbers in the text (numeric sort)

  • "order" or "inorder": sort as FALSE

  • "freq" or "freq-desc": sort descending according to the frequencies of y computed by summarise_function (highest value first)

  • "freq-asc": sort ascending according to the frequencies of y computed by summarise_function (lowest value first)

x.complete, category.complete, facet.complete

a value to complete the data. This makes use of tidyr::full_seq() and tidyr::complete(). For example, using x.complete = 0 will apply data |> complete(full_seq(x, ...), fill = list(x = 0)). Using value TRUE (e.g., x.complete = TRUE) is identical to using value 0.

datalabels

values to show as datalabels, see also datalabels.format. This can be:

  • Left blank. This will default to the values of y in column-type plots, or when plotting spatial 'sf' data, the values of the first column. It will print a maximum of 25 labels unless datalabels = TRUE.

  • TRUE or FALSE to force or remove datalabels

  • A function to calculate over .data, such as datalabels = paste(round(column1), "\n", column2)

datalabels.round

number of digits to round the datalabels, applies to both "%n" and "%p" for replacement (see datalabels.format)

datalabels.format

format to use for datalabels. This can be a function (such as euros()) or a text. For the text, "%n" will be replaced by the count number, and "%p" will be replaced by the percentage of the total count. Use datalabels.format = NULL to not transform the datalabels.

datalabels.colour, datalabels.colour_fill, datalabels.size, datalabels.angle, datalabels.lineheight

settings for the datalabels

decimal.mark

decimal mark, defaults to dec_mark()

big.mark

thousands separator, defaults to big_mark()

summarise_function

a function to use if the data has to be summarised, see Examples. This can also be NULL, which will be converted to function(x) x.

stacked

a logical to indicate that values must be stacked

stackedpercent

a logical to indicate that values must be 100% stacked

horizontal

a logical to turn the plot 90 degrees using coord_flip(). This option also updates some theme options, so that e.g., x.lbl_italic will still apply to the original x axis.

reverse

a logical to reverse the values of category. Use legend.reverse to reverse the legend of category.

smooth

a logical to add a smooth. In histograms, this will add the density count as an overlaying line (default: TRUE). In all other cases, a smooth will be added using geom_smooth() (default: FALSE).

smooth.method, smooth.formula, smooth.se, smooth.level, smooth.alpha, smooth.linewidth, smooth.linetype, smooth.colour

settings for smooth

size

size of the geom. Defaults to 2 for geoms point and jitter, 5 for a dumbbell plots (using type = "dumbbell"), and to 0.75 otherwise.

linetype

linetype of the geom, only suitable for geoms that draw lines. Defaults to 1.

linewidth

linewidth of the geom, only suitable for geoms that draw lines. Defaults to:

  • 0.5 for geoms that have no area (such as line), and for geoms boxplot/violin

  • 0.1 for sf

  • 0.25 for geoms that are continous and have fills (such as area)

  • 1.0 for dumbbell plots (using type = "dumbbell")

  • 0.5 otherwise (such as histogram and area)

binwidth

width of bins (only useful for geom = "histogram"), can be specified as a numeric value or as a function that calculates width from x, see geom_histogram(). It defaults to approx. diff(range(x)) divided by 12 to 22 based on the data.

width

width of the geom. Defaults to 0.75 for geoms boxplot, violin and jitter, and to 0.5 otherwise.

jitter_seed

seed (randomisation factor) to be set when using type = "jitter"

violin_scale

scale to be set when using type = "violin", can also be set to "area"

legend.position

position of the legend, must be "top", "right", "bottom", "left" or "none" (or NA or NULL), can be abbreviated. Defaults to "right" for numeric category values and 'sf' plots, and "top" otherwise.

legend.reverse, legend.barheight, legend.barwidth, legend.nbin, legend.italic

other settings for the legend

sankey.node_width

width of the vertical nodes in a Sankey plot (i.e., when type = "sankey")

sankey.node_whitespace

whitespace between the nodes

sankey.alpha

alpha of the flows in a Sankey plot (i.e., when type = "sankey")

sankey.remove_axes

logical to indicate whether all axes must be removed in a Sankey plot (i.e., when type = "sankey")

zoom

a logical to indicate if the plot should be scaled to the data, i.e., not having the x and y axes to start at 0. This will set x.zoom = TRUE and y.zoom = TRUE.

sep

separator character to use if multiple columns are given to either of the three directions: x, category and facet, e.g. facet = c(column1, column2)

print

a logical to indicate if the result should be printed instead of just returned

text_factor

text factor to use, which will apply to all texts shown in the plot

font

font (family) to use, can be set with options(plot2.font = "..."). Can be any installed system font or any of the > 1400 font names from Google Fonts.

theme

a valid ggplot2 theme to apply, or NULL to use the default theme_grey(). This argument accepts themes (e.g., theme_bw()), functions (e.g., theme_bw) and characters themes (e.g., "theme_bw"). The default is theme_minimal2(), but can be set with options(plot2.theme = "...").

background

the background colour of the entire plot, can also be NA to remove it. Will be evaluated with colourpicker(). Only applies when theme is not NULL.

markdown

a logical to turn all labels and titles into plotmath expressions, by converting common markdown language using the md_to_expression() function (defaults to TRUE)

...

any argument to give to the geom. This will override automatically-set settings for the geom.

Value

a ggplot object

Details

The plot2() function is a convenient wrapper around many ggplot2 functions such as ggplot(), aes(), geom_col(), facet_wrap(), labs(), etc., and provides:

  • Writing as few lines of codes as possible

  • Easy plotting in three 'directions': x (the regular x axis), category (replaces 'fill' and 'colour') and facet

  • Automatic setting of these 'directions' based on the input data

  • Setting in-place calculations for all plotting directions and even y

  • Easy way for sorting data in many ways (such as on alphabet, numeric value, frequency, original data order), by setting a single argument for the 'direction': x.sort, category.sort and facet.sort

  • Easy limiting values, e.g. by setting x.max_items = 5 or category.max_items = 5

  • Markdown support for any title text, with any theme

  • Integrated support for any Google Font and any installed system font

  • An extra clean, minimalistic theme with a lot of whitespace (but without unnecessary margins) that is ideal for printing: theme_minimal2()

  • Some conveniences from Microsoft Excel:

    • The y axis starts at 0 if possible

    • The y scale expands at the top to be better able to interpret all data points

    • Date breaks can be written in a human-readable format (such as "d mmm yyyy")

    • Labels with data values can easily be printed and are automatically determined

  • Support for any ggplot2 extension based on ggplot2::fortify()

The ggplot2 package in conjunction with the tidyr, forcats and cleaner packages can provide above functionalities, but the goal of the plot2() function is to generalise this into one function. The generic plot2() function currently has 149 arguments, all with a default value. Less typing, faster coding.

Examples

options(plot2.colour = NULL, plot2.colour_sf_fill = NULL)

head(iris)
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1          5.1         3.5          1.4         0.2  setosa
#> 2          4.9         3.0          1.4         0.2  setosa
#> 3          4.7         3.2          1.3         0.2  setosa
#> 4          4.6         3.1          1.5         0.2  setosa
#> 5          5.0         3.6          1.4         0.2  setosa
#> 6          5.4         3.9          1.7         0.4  setosa

# no variables determined, so plot2() will try for itself -
# the type will be points since the first two variables are numeric
iris |>
  plot2()
#> i Using x = Sepal.Length
#> i Using y = Sepal.Width
#> i Using category = Species
#> i Using type = "point" since both axes are numeric


# if x and y are set, no additional mapping will be set:
iris |> 
  plot2(Sepal.Width, Sepal.Length)
#> i Using type = "point" since both axes are numeric

iris |> 
  plot2(Species, Sepal.Length)
#> i Using type = "boxplot" since all groups in Species contain at least three values


# the arguments are in this order: x, y, category, facet
iris |> 
  plot2(Sepal.Length, Sepal.Width, Petal.Length, Species)
#> i Using type = "point" since both axes are numeric
#> i Assuming facet.fixed_y = TRUE since the three y scales are roughly equal


iris |> 
  plot2(Sepal.Length, Sepal.Width, Petal.Length, Species,
        colour = "viridis") # set the viridis colours
#> i Using type = "point" since both axes are numeric
#> i Assuming facet.fixed_y = TRUE since the three y scales are roughly equal

      
iris |> 
  plot2(Sepal.Length, Sepal.Width, Petal.Length, Species,
        colour = c("white", "red", "black")) # set own colours
#> i Using type = "point" since both axes are numeric
#> i Using category.midpoint = 3.45 (the current category scale centre)
#> i Assuming facet.fixed_y = TRUE since the three y scales are roughly equal


# y can also be multiple (named) columns
iris |> 
  plot2(x = Sepal.Length,
        y = c(Length = Petal.Length, Width = Petal.Width),
        category.title = "Petal property")
#> i Assuming summarise_function = function(x) x
#> i Using type = "point" since both axes are numeric

iris |>
  # with included selection helpers such as where(), starts_with(), etc.:
  plot2(x = Species, y = where(is.double))
#> i Assuming summarise_function = function(x) x
#> i Using y = c(Petal.Length, Petal.Width, Sepal.Length, Sepal.Width)
#> i Using type = "boxplot" since all groups in Species and category contain at least three values

  
# support for secondary y axis
mtcars |>
  plot2(x = mpg,
        y = hp,
        y_secondary = disp ^ 2, 
        y_secondary.scientific = TRUE,
        title = "Secondary y axis sets colour to the axis titles")
#> i Using type = "point" since both axes are numeric



admitted_patients
#> # A tibble: 250 × 7
#>    date       patient_id gender   age age_group hospital ward   
#>    <date>          <dbl> <chr>  <dbl> <ord>     <fct>    <chr>  
#>  1 2002-01-14          1 M         78 75+       D        Non-ICU
#>  2 2002-03-17          2 M         78 75+       C        Non-ICU
#>  3 2002-04-08          3 M         78 75+       A        ICU    
#>  4 2002-04-14          4 M         72 55-74     C        Non-ICU
#>  5 2002-05-07          5 M         83 75+       C        Non-ICU
#>  6 2002-05-16          6 F         65 55-74     B        ICU    
#>  7 2002-05-16          7 M         47 25-54     D        Non-ICU
#>  8 2002-06-18          8 M         30 25-54     B        ICU    
#>  9 2002-06-23          9 M         82 75+       D        Non-ICU
#> 10 2002-06-23          9 M         82 75+       D        Non-ICU
#> # ℹ 240 more rows

# the arguments are in this order: x, y, category, facet
admitted_patients |>
  plot2(hospital, age)
#> i Using type = "boxplot" since all groups in hospital contain at least three values


admitted_patients |>
  plot2(hospital, age, gender)
#> i Using type = "boxplot" since all groups in hospital and gender contain at least three values

  
admitted_patients |>
  plot2(hospital, age, gender, ward)
#> i Using type = "boxplot" since all groups in hospital and gender and ward contain at least three values
#> i Assuming facet.fixed_y = TRUE since the two y scales are roughly equal

  
# or use any function for y
admitted_patients |>
  plot2(hospital, median(age), gender, ward)
#> i To compare single values in two categories (gender), a dumbbell plot can be used (type = "dumbbell" or type = "d")
#> i Assuming facet.fixed_y = TRUE since the two y scales are roughly equal

admitted_patients |>
  plot2(hospital, n(), gender, ward)
#> i To compare single values in two categories (gender), a dumbbell plot can be used (type = "dumbbell" or type = "d")


admitted_patients |>
  plot2(x = hospital,
        y = age,
        category = gender,
        colour = c("F" = "#3F681C", "M" = "#375E97"),
        colour_fill = "#FFBB00AA",
        linewidth = 1.25,
        y.age = TRUE)
#> i Using type = "boxplot" since all groups in hospital and gender contain at least three values


admitted_patients |>
  plot2(age, type = "hist")
#> i Using binwidth = 6.4 based on data
#> i Assuming smooth = TRUE for type = "histogram"


# even titles support calculations, including support for {glue}
admitted_patients |>
  plot2(age, type = "hist",
        title = paste("Based on n =", n_distinct(patient_id), "patients"),
        subtitle = paste("Total rows:", n()),
        caption = glue::glue("From {n_distinct(hospital)} hospitals"),
        x.title = paste("Age ranging from", paste(range(age), collapse = " to ")))
#> i Using binwidth = 6.4 based on data
#> i Assuming smooth = TRUE for type = "histogram"

 
# the default type is column, datalabels are automatically
# set in non-continuous types:
admitted_patients |> 
  plot2(hospital, n(), gender)
#> i To compare single values in two categories (gender), a dumbbell plot can be used (type = "dumbbell" or type = "d")

  
admitted_patients |> 
  plot2(hospital, n(), gender,
        stacked = TRUE)
#> i To compare single values in two categories (gender), a dumbbell plot can be used (type = "dumbbell" or type = "d")

        
admitted_patients |> 
  plot2(hospital, n(), gender,
        stackedpercent = TRUE)
#> i To compare single values in two categories (gender), a dumbbell plot can be used (type = "dumbbell" or type = "d")


# two categories might benefit from a dumbbell plot:
admitted_patients |> 
  plot2(hospital, median(age), gender, type = "dumbbell")

 
# sort on any direction:
admitted_patients |> 
  plot2(hospital, n(), gender,
        x.sort = "freq-asc",
        stacked = TRUE)
#> i Applying x.sort = "freq-asc" using summarise_function = sum
#> i To compare single values in two categories (gender), a dumbbell plot can be used (type = "dumbbell" or type = "d")


admitted_patients |> 
  plot2(hospital, n(), gender,
        x.sort = c("B", "D", "A"), # missing values ("C") will be added
        category.sort = "alpha-desc",
        stacked = TRUE)
#> i To compare single values in two categories (gender), a dumbbell plot can be used (type = "dumbbell" or type = "d")

        
# support for Sankey plots
Titanic |> # a table from base R
  plot2(x = c(Age, Class, Survived),
        category = Sex,
        type = "sankey")
#> ! Input class 'table' was transformed using `as.data.frame()`
#> i Using y = Freq since `as.data.table()` on a `table` results in a 'Freq' column
#> i Assuming sankey.remove_axes = TRUE


# matrix support, such as for cor()
correlation_matrix <- cor(mtcars)
class(correlation_matrix)
#> [1] "matrix" "array" 
head(correlation_matrix)
#>             mpg        cyl       disp         hp       drat         wt
#> mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684  0.6811719 -0.8676594
#> cyl  -0.8521620  1.0000000  0.9020329  0.8324475 -0.6999381  0.7824958
#> disp -0.8475514  0.9020329  1.0000000  0.7909486 -0.7102139  0.8879799
#> hp   -0.7761684  0.8324475  0.7909486  1.0000000 -0.4487591  0.6587479
#> drat  0.6811719 -0.6999381 -0.7102139 -0.4487591  1.0000000 -0.7124406
#> wt   -0.8676594  0.7824958  0.8879799  0.6587479 -0.7124406  1.0000000
#>             qsec         vs         am       gear       carb
#> mpg   0.41868403  0.6640389  0.5998324  0.4802848 -0.5509251
#> cyl  -0.59124207 -0.8108118 -0.5226070 -0.4926866  0.5269883
#> disp -0.43369788 -0.7104159 -0.5912270 -0.5555692  0.3949769
#> hp   -0.70822339 -0.7230967 -0.2432043 -0.1257043  0.7498125
#> drat  0.09120476  0.4402785  0.7127111  0.6996101 -0.0907898
#> wt   -0.17471588 -0.5549157 -0.6924953 -0.5832870  0.4276059
correlation_matrix |> 
  plot2()
#> i Assuming type = "tile" since the matrix contains identical row and column names
#> ! Omitting printing of 121 datalabels - use datalabels = TRUE to force printing


correlation_matrix |> 
  plot2(colour = c("certeblauw2", "white", "certeroze2"),
        datalabels = TRUE,
        category.title = "*r*-value",
        title =  "Correlation matrix")
#> i Assuming type = "tile" since the matrix contains identical row and column names
#> i Using category.midpoint = 0 (the current category scale centre)



# plot2() supports all S3 extensions available through
# ggplot2::fortify(), such as regression models:
lm(mpg ~ hp, data = mtcars) |> 
  plot2(x = mpg ^ -3,
        y = hp ^ 2,
        smooth = TRUE,
        smooth.method = "lm",
        smooth.formula = "y ~ log(x)",
        title = "Titles/captions *support* **markdown**",
        subtitle = "Axis titles contain the square notation: x^2")
#> i Using type = "point" since both axes are numeric


# plot2() also has various other S3 implementations:

# QC plots, according to e.g. Nelson's Quality Control Rules
if (require("certestats", warn.conflicts = FALSE)) {
  rnorm(250, mean = 10, sd = 1) |> 
    qc_test() |> 
    plot2()
}

        
# sf objects (geographic plots, 'simple features') are also supported
if (require("sf")) {
  netherlands |> 
    plot2(datalabels = paste0(province, "\n", round(area_km2)))
}
#> Loading required package: sf
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
#> i Using category = area_km2
#> i Assuming datalabels.centroid = TRUE. Set to FALSE for a point-on-surface placing of datalabels.


# Antimicrobial resistance (AMR) data analysis
if (require("AMR")) {
  options(AMR_locale = "nl")
  
  example_isolates[, c("mo", penicillins())] |>
    bug_drug_combinations(FUN = mo_gramstain) |>
    plot2(y.percent_break = 0.25)
}
#> ℹ For penicillins() using columns 'PEN' (benzylpenicillin), 'OXA'
#>   (oxacillin), 'FLC' (flucloxacillin), 'AMX' (amoxicillin), 'AMC'
#>   (amoxicillin/clavulanic acid), 'AMP' (ampicillin), and 'TZP'
#>   (piperacillin/tazobactam)
#> ℹ Using column 'mo' as input for col_mo.
#> ! Omitting printing of 36 datalabels - use datalabels = TRUE to force printing

if (require("AMR") & require("dplyr")) {
  example_isolates |>
    select(date, NIT, FOS, AMC) |> 
    group_by(year = format(date, "%Y")) |>
    sir_df() |>
    filter(year >= 2015) |>
    plot2(datalabels = paste0(round(value * 100), "%\nn = ", isolates),
          y.percent_break = 0.125)
}
#> i Using x = year


# # support for any font
# mtcars |>
#   plot2(mpg, hp, font = "Rock Salt",
#         title = "This plot uses a Google Font")