Import Data Sets

These functions can be used to import data, from local or remote paths, or from the internet. They work closely with the certeprojects package to support Microsoft Planner project numbers. To support row names and older R versions, import_*() functions return plain data.frames, not e.g. tibbles.

import(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  encoding = "UTF-8",
  key = read_secret("tools.encryption_password"),
  ...
)

import_rds(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  key = read_secret("tools.encryption_password"),
  ...
)

readRDS(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  key = read_secret("tools.encryption_password"),
  ...
)

import_xlsx(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  sheet = 1,
  range = NULL,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_excel(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  sheet = 1,
  range = NULL,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_csv(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_csv2(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ",",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_tsv(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_txt(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  sep = "\t",
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ",",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_sav(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_spss(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_feather(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  col_select = everything(),
  ...
)

decrypt_object(object, key = read_secret("tools.encryption_password"))

import_clipboard(
  sep = "\t",
  header = TRUE,
  startrow = 1,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_mail_attachment(
  search = "hasattachment:yes",
  search_subject = NULL,
  search_from = NULL,
  search_when = NULL,
  search_attachment = NULL,
  folder = certemail::get_inbox_name(account = account),
  n = 5,
  sort = "received desc",
  account = certemail::connect_outlook(),
  auto_transform = TRUE,
  sep = ",",
  ...
)

import_url(
  url,
  auto_transform = TRUE,
  sep = ",",
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8",
  ...
)

import_teams(
  full_teams_path = NULL,
  account = connect_teams(),
  auto_transform = TRUE,
  sep = ",",
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  encoding = "UTF-8"
)

Arguments

filename: the full path of the file to be imported, will be parsed to a character, can also be a remote location (from http/https/ftp/ssh, GitHub/GitLab)
project_number: a Microsoft Planner project number
auto_transform: transform the imported data with auto_transform()
encoding: Default encoding. This only affects how the file is read.
key: a character to decrypt the file, see export() for explanation of key. For manual decryption, run decrypt_object().
...: arguments passed on to methods
sheet: Excel sheet to import, defaults to first sheet
range: a cell range to read from, allows typical Excel ranges such as "B3:D87" and "Budget!B2:G14"
datenames: language of the date names, such as weekdays and months
dateformat: expected date format, will be coerced with format_datetime()
timeformat: expected time format, will be coerced with format_datetime()
decimal.mark: separator for decimal numbers
big.mark: separator for thousands
timezone: expected time zone
na: values to interpret as NA
skip: number of first rows to skip
sep: character to separate values in a row
col_select: columns to select, supports the tidyselect language)
object: object to decrypt
header: use first row as header
startrow: first row to start importing
search: an ODATA filter, ignores sort and defaults to search only mails with attachments
search_subject: a character, equal to search = "subject:(search_subject)", case-insensitive
search_from: a character, equal to search = "from:(search_from)", case-insensitive
search_when: a Date vector of size 1 or 2, equal to search = "received:date1..date2", see Examples
search_attachment: a character to use a regular expression for attachment file names
folder: email folder name to search in, defaults to Inbox of the current user by calling get_inbox_name()
n: maximum number of emails to search
sort: initial sorting
account: a Teams account from Azure or an AzureAuth Microsoft 365 token, e.g. retrieved with certeprojects::connect_teams()
url: remote location of any data set, can also be a (non-raw) GitHub/GitLab link
full_teams_path: a full path in Teams, including the Team name and the channel name. Leave blank to use interactive mode, which allows file/folder picking from a list in the console.

Details

Importing any unlisted filetype using import() requires the rio package to be installed.

Importing an Excel file using import_xlsx() or import_excel() requires the readxl package to be installed.

Importing an SPSS file using import_sav() or import_spss() requires the haven package to be installed.

Importing a Feather file using import_feather() requires the arrow package to be installed. Apache Feather provides efficient binary columnar serialization for data sets, enabling easy sharing data across data analysis languages (such as between Python and R). Use the col_select argument (which supports the tidyselect language) for specific data selection to improve importing speed.

Importing the clipboard using import_clipboard() requires the clipr package to be installed.

Importing mail attachments using import_mail_attachment() requires the certemail package to be installed. It calls download_mail_attachment() internally and saves the attachment to a temporary folder. For all folder names, run: sapply(certemail::connect_outlook()$list_folders(), function(x) x$properties$displayName).

The import_url() function tries to download the file first, after which it will be imported using the appropriate import_*() function.

The import_teams() function uses certeprojects::teams_download_file() to provide an interactive way to select a file in any Team, to download the file, and to import the file using the appropriate import_*() function.

Examples

export_csv(iris)
#> Exported data set (150 × 5) to '/tmp/Rtmp1G6y8P/file1e52763c45fb/reference/iris.csv' (3.9 kB).
import_csv("iris") |> head()
#> 
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1          5.1         3.5          1.4         0.2  setosa
#> 2          4.9         3.0          1.4         0.2  setosa
#> 3          4.7         3.2          1.3         0.2  setosa
#> 4          4.6         3.1          1.5         0.2  setosa
#> 5          5.0         3.6          1.4         0.2  setosa
#> 6          5.4         3.9          1.7         0.4  setosa

# the above is equal to:
# export(iris, "iris.csv")
# import("iris.csv") |> head()


# row names are also supported
export_csv(mtcars)
#> Note: Row names added as first column 'rownames'
#> Exported data set (32 × 12) to '/tmp/Rtmp1G6y8P/file1e52763c45fb/reference/mtcars.csv' (1.7 kB).
import_csv("mtcars") |> head()
#> 
#> Row names restored from first column.
#>                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
#> Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
#> Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1


# Apache's Feather format is column-based
# and allow for specific and fast file reading
library(dplyr, warn.conflicts = FALSE)
starwars |> export_feather()
#> Exported data set (87 × 14) to '/tmp/Rtmp1G6y8P/file1e52763c45fb/reference/starwars.feather' (12.2 kB).
import("starwars.feather",
       col_select = starts_with("h")) |> 
  head()
#>   height  hair_color homeworld
#> 1    172       blond  Tatooine
#> 2    167        <NA>  Tatooine
#> 3     96        <NA>     Naboo
#> 4    202        none  Tatooine
#> 5    150       brown  Alderaan
#> 6    178 brown, grey  Tatooine
  

# (cleanup)
file.remove("iris.csv")
#> [1] TRUE
file.remove("mtcars.csv")
#> [1] TRUE
file.remove("starwars.feather")
#> [1] TRUE

if (FALSE) { # \dontrun{

# ---- Microsoft Teams support -------------------------------------------

# IMPORTING

# import from Teams by picking a folder interactively from any Team
x <- import_teams()

# to NOT pick a Teams folder (e.g. in non-interactive mode), set `full_teams_path`
x <- import_teams(full_teams_path = "MyTeam/MyChannel/MyFolder/MyFile.xlsx")


# EXPORTING

# export to Teams by picking a folder interactively from any Team
mtcars |> export_teams()

# the default is RDS, but you can set `filename` to specify yourself
mtcars |> export_teams("mtcars.xlsx")

# to NOT pick a Teams folder (e.g. in non-interactive mode), set `full_teams_path`
mtcars |> export_teams("mtcars.xlsx", full_teams_path = "MyTeam/MyChannel/MyFolder")
mtcars |> export_teams(full_teams_path = "MyTeam/MyChannel/MyFolder")

} # }

Arguments

Details

See also

Examples