Download Data from a Local or Remote Database — get_diver

Thes functions can be used to download local or remote database data, e.g. Spectre data from DiveLine on a Diver server (from Dimensional Insight). The get_diver_data() function sets up an ODBC connection (using db_connect()), which requires their quite limited DI-ODBC driver.

get_diver_data(
  date_range = this_year(),
  where = NULL,
  post_where = NULL,
  review_qry = interactive(),
  antibiogram_type = "sir",
  distinct = TRUE,
  auto_transform = TRUE,
  snake_case = TRUE,
  preset = read_secret("db.preset_default"),
  add_cols = NULL,
  date_column = NULL,
  diver_cbase = NULL,
  diver_project = read_secret("db.diver_project"),
  diver_dsn = if (diver_testserver == FALSE) read_secret("db.diver_dsn") else
    read_secret("db.diver_dsn_test"),
  diver_testserver = FALSE,
  diver_tablename = "data",
  info = interactive(),
  log_file = read_secret("db.query_log"),
  limit = Inf,
  only_real_patients = TRUE,
  only_conducted_tests = TRUE,
  only_validated = FALSE,
  only_requested = FALSE,
  convert_numeric_where = TRUE,
  in_background = FALSE,
  ...
)

certedb_query(query, where = NULL, auto_transform = TRUE, info = interactive())

get_duckdb_data(
  date_range = this_year(),
  where = NULL,
  preset = NULL,
  review_qry = interactive(),
  duckdb_path = read_secret("db.duckdb_path"),
  duckdb_table = read_secret("db.duckdb_table"),
  auto_transform = TRUE,
  info = interactive()
)

certedb_getmmb(
  dates = NULL,
  where = NULL,
  select_preset = "mmb",
  preset = "mmb",
  select = NULL,
  add_cols = NULL,
  info = interactive(),
  first_isolates = FALSE,
  eucast_rules = "all",
  keep_synonyms = getOption("AMR_keep_synonyms", FALSE),
  mic = FALSE,
  disk = FALSE,
  zipcodes = FALSE,
  ziplength = 4,
  tat_hours = FALSE,
  log_file = read_secret("db.query_log"),
  only_real_patients = TRUE,
  only_conducted_tests = TRUE,
  only_validated = FALSE,
  only_show_query = FALSE,
  review_where = interactive(),
  auto_transform = TRUE,
  query = NULL,
  ...
)

certedb_getmmb_tat(
  dates = NULL,
  where = NULL,
  add_cols = NULL,
  info = interactive(),
  only_real_patients = TRUE,
  only_conducted_tests = TRUE,
  only_validated = TRUE,
  only_show_query = FALSE,
  ...
)

get_glims10_data(
  date_range = this_year(),
  where = NULL,
  review_qry = interactive(),
  antibiogram_type = "sir",
  distinct = TRUE,
  auto_transform = TRUE,
  diver_tablename = "glims10",
  info = interactive(),
  limit = Inf,
  ...
)

Arguments

date_range: date range, can be length 1 or 2 (or more to use the min/max) to filter on the column specified in the YAML file, see presets. Defaults to this_year(). Use NULL to set no date filter. Can also be years, or functions such as last_month(). Date-time ojects will be converted to dates, so using times as input is useless. It is supported to filter on a date-time column though.
where, post_where: arguments to filter data on, will be passed on to filter(). Do not use && or || but only & or | in filtering. The post_where will be run after all downloading and post-processing, but before the auto-transform.
review_qry: a logical to indicate whether the query must be reviewed first, defaults to TRUE in interactive mode and FALSE otherwise. This will always be FALSE in Quarto / R Markdown, since the output of knitr::pandoc_to() must be NULL.
antibiogram_type: antibiotic transformation mode. Leave blank to strip antibiotic results from the data, "sir" to keep SIR values, "mic" to keep MIC values or "disk" to keep disk diffusion values. Values will be cleaned with as.sir(), as.mic() or as.disk().
distinct: logical to apply distinct() to the resulting data set
auto_transform: logical to apply auto_transform() to the resulting data set
snake_case: logical to convert column names to snake case, only when auto_transform = TRUE
preset: a preset to choose from presets(). Will be ignored if diver_cbase is set, even if it is set to NULL. Be sure to read the documentation on how to use presets, and to see in which order the YAML keys will be run.
add_cols: extra column to include in the selection. Can be named to set a new column name. Use gl or di to quickly pick from a list.
date_column: column name of data set to query. Normally this should be set in a preset, but this argument can be used to override that.
diver_cbase, diver_project, diver_dsn, diver_testserver: properties to set in db_connect(). The diver_cbase argument will be based on preset, but can also be set to blank NULL to manually select a cBase in a popup window.
diver_tablename: name of the database table to download data from. This is hard-coded by DI and should normally never be changed.
info: settings for old certedb_getmmb() function
log_file: a file path to which the query will be logged, after collection and before any transformation. Use NULL or "" to prevent logging.
limit: maximum number of rows to return.
only_real_patients: logical to include only real patients, i.e., remove test and Q&A samples
only_conducted_tests: logical to include only tests that were not stopped
only_validated: settings for old certedb_getmmb() function
only_requested: logical to include only requested tests
convert_numeric_where: logical to convert numeric vectors in the WHERE to character vectors. For example, where = PatientID %in% pat_numbers would update the WHERE clause to make the numeric vector pat_numbers a character vector. Using %in% with numeric vectors in a Diver query has been known to return insufficient rows.
in_background: run data collection in the background using callr::r_bg(). Use ...$get_result() to retrieve results, or ...$is_active() to check whether the background process still runs.
...: not used anymore, previously settings for old certetools::certedb_getmmb() function
query: a data.frame to view the query of, or a character string to run as query in certedb_getmmb() (which will ignore all other arguments, except for where, auto_transform and info).
duckdb_path: path to the local DuckDB database
duckdb_table: name of the DuckDB database to retrieve data from
dates: date range, can be length 1 or 2 (or more to use the min/max) to filter on the column Ontvangstdatum. Defaults to this_year(). Use NULL to set no date filter. Can also be years, or functions such as last_month().
select_preset: settings for old certedb_getmmb() function
select: settings for old certedb_getmmb() function
first_isolates: settings for old certedb_getmmb() function
eucast_rules: settings for old certedb_getmmb() function
keep_synonyms: A logical to indicate if old, previously valid taxonomic names must be preserved and not be corrected to currently accepted names. The default is FALSE, which will return a note if old taxonomic names were processed. The default can be set with the package option AMR_keep_synonyms, i.e. options(AMR_keep_synonyms = TRUE) or options(AMR_keep_synonyms = FALSE).
mic: settings for old certedb_getmmb() function
disk: settings for old certedb_getmmb() function
zipcodes: settings for old certedb_getmmb() function
ziplength: settings for old certedb_getmmb() function
tat_hours: settings for old certedb_getmmb() function
only_show_query: settings for old certedb_getmmb() function
review_where: a logical to indicate whether the query must be reviewed first, defaults to TRUE in interactive mode and FALSE otherwise. This will always be FALSE in Quarto / R Markdown, since the output of knitr::pandoc_to() must be NULL.

Details

These functions return a 'certedb tibble' from Diver or the certemmb MySQL database, which prints information in the tibble header about the used source and current user.

Use certedb_query() to retrieve the original query that was used to download the data.

Using certedb_query("your qry here") is identical to using certedb_getmmb(query = "your qry here").

Examples

if (FALSE) { # \dontrun{

# peek-preview of a cBase:
get_diver_data(diver_cbase = "models/MedEpi/GLIMS10_Resistenties.cbase",
               date_range = NULL, limit = 10, review_qry = FALSE, auto_transform = FALSE)

# these two work identical:
get_diver_data(date_range = 2024, where = BepalingCode == "PXNCOV")
get_diver_data(2024, BepalingCode == "PXNCOV")

# use gl$ to pull a list with column names while you type
get_diver_data(2024, gl$BepalingCode == "PXNCOV")

# gl$ can also be used for adding columns, which can also be named
get_diver_data(2024, gl$BepalingCode == "PXNCOV", add_cols = gl$AfnameJaar)
get_diver_data(2024, gl$BepalingCode == "PXNCOV", add_cols = c(jaar = gl$AfnameJaar))

# for the `where`, use `&` or `|`:
get_diver_data(last_month(),
               gl$BepalingCode == "PXNCOV" & gl$Zorglijn == "2e lijn")
get_diver_data(c(2020:2024),
               where = gl$BepalingCode == "PXNCOV" | gl$Zorglijn == "2e lijn")


# use %like%, %unlike%, %like_case% or %unlike_case% for regular expressions
get_diver_data(2024, where = gl$MateriaalNaam %like% "Bloed")
get_diver_data(2024, where = gl$MateriaalNaam %unlike% "Bloed")
get_diver_data(2024, where = gl$MateriaalNaam %like_case% "bloed")
get_diver_data(2024, where = gl$MateriaalNaam %unlike_case% "Bloed")

get_diver_data(2024,
               where = gl$BepalingNaam %like% "Noro" &
                         gl$PatientLeeftijd >= 75)
                         
# R objects will be converted
materialen <- c("A", "B", "C")
get_diver_data(2024, where = gl$MateriaalNaam %in% materialen)
leeftijden <- 65:85
get_diver_data(2024, where = gl$PatientLeeftijd %in% leeftijden)


# USING DIVER INTEGRATOR LANGUAGE --------------------------------------

# See the website for an overview of allowed functions:
# https://www.dimins.com/online-help/workbench_help/Content/ODBC/di-odbc-sql-reference.html

# Use Diver Integrator functions within EVAL():              
get_diver_data(2024, where = EVAL('regexp(value("MateriaalCode"),"^B")'))

get_diver_data(
  2024,
  where = EVAL('rolling(12, value("OntvangstDatum"), date("2024/11/27"))')
)

} # }


# USING DUCKDB DATABASE ------------------------------------------------

# create a local duckdb database and write a table
db <- db_connect(duckdb::duckdb(), "~/my_duck.db")
#> i Opening connection...
#>  OK
db |> db_write_table("iris", values = iris)
db |> db_close()
#> i Closing connection...
#> OK

df <- get_duckdb_data(date_range = NULL,
                      where = Species == "setosa" & Sepal.Width > 3,
                      # not needed in production environment:
                      duckdb_path = "~/my_duck.db",
                      duckdb_table = "iris",
                      review_qry = FALSE,
                      info = TRUE)
#> i Opening connection...
#>  OK
#> i Retrieving initial cBase...
#>  OK (0 secs; 5 columns) 
#> i Collecting data...
#>  OK (0 secs; 42 × 5 observations) 
#> i Closing connection...
#> OK
#> 
df
#> # A DuckDB tibble: 42 × 5
#> # Retrieved from:  "~/my_duck.db"
#> # Retrieved on:    8 aug 2025 08:02
#> # Retrieved by:    CERTE\runner
#>    sepal_length sepal_width petal_length petal_width species
#>  *        <dbl>       <dbl>        <dbl>       <dbl> <chr>  
#>  1          5.1         3.5          1.4         0.2 setosa 
#>  2          4.7         3.2          1.3         0.2 setosa 
#>  3          4.6         3.1          1.5         0.2 setosa 
#>  4          5           3.6          1.4         0.2 setosa 
#>  5          5.4         3.9          1.7         0.4 setosa 
#>  6          4.6         3.4          1.4         0.3 setosa 
#>  7          5           3.4          1.5         0.2 setosa 
#>  8          4.9         3.1          1.5         0.1 setosa 
#>  9          5.4         3.7          1.5         0.2 setosa 
#> 10          4.8         3.4          1.6         0.2 setosa 
#> # ℹ 32 more rows
#> # ℹ Use `certedb_query()` to get the query of this DuckDB tibble
certedb_query(df)
#> # This query was run on "~/my_duck.db" on 2025-08-08 08:02 by CERTE\runner, yielding a DuckDB tibble of 42 × 5 
#> SELECT iris.*
#> FROM iris
#> WHERE (Species = 'setosa' AND "Sepal.Width" > 3.0)

# remove the database
unlink("~/my_duck.db")