These functions can be used to import data, from local or remote paths, or from the internet. They work closely with the certeprojects package to support Microsoft Planner project numbers. To support row names and older R versions, import_*() functions return plain data.frames, not e.g. tibbles.

import(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  ...
)

import_rds(filename, project_number = project_get_current_id(ask = FALSE), ...)

import_xlsx(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  sheet = 1,
  range = NULL,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_excel(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  sheet = 1,
  range = NULL,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_csv(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_csv2(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ",",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_tsv(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_txt(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  sep = "\t",
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ",",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_sav(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_spss(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  auto_transform = TRUE,
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_feather(
  filename,
  project_number = project_get_current_id(ask = FALSE),
  col_select = everything(),
  ...
)

import_clipboard(
  sep = "\t",
  header = TRUE,
  startrow = 1,
  auto_transform = TRUE,
  datenames = "nl",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = dec_mark(),
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  ...
)

import_mail_attachment(
  search = "hasattachment:yes",
  search_subject = NULL,
  search_from = NULL,
  search_when = NULL,
  search_attachment = NULL,
  folder = certemail::get_inbox_name(account = account),
  n = 5,
  sort = "received desc",
  account = certemail::connect_outlook(),
  auto_transform = TRUE,
  sep = ",",
  ...
)

import_url(
  url,
  auto_transform = TRUE,
  sep = ",",
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0,
  ...
)

import_teams(
  full_teams_path = NULL,
  account = connect_teams(),
  auto_transform = TRUE,
  sep = ",",
  datenames = "en",
  dateformat = "yyyy-mm-dd",
  timeformat = "HH:MM",
  decimal.mark = ".",
  big.mark = "",
  timezone = "UTC",
  na = c("", "NULL", "NA", "<NA>"),
  skip = 0
)

Arguments

filename

the full path of the file to be imported, will be parsed to a character, can also be a remote location (from http/https/ftp/ssh, GitHub/GitLab)

project_number

a Microsoft Planner project number

auto_transform

transform the imported data with auto_transform()

...

arguments passed on to methods

sheet

Excel sheet to import, defaults to first sheet

range

a cell range to read from, allows typical Excel ranges such as "B3:D87" and "Budget!B2:G14"

datenames

language of the date names, such as weekdays and months

dateformat

expected date format, will be coerced with format_datetime()

timeformat

expected time format, will be coerced with format_datetime()

decimal.mark

separator for decimal numbers

big.mark

separator for thousands

timezone

expected time zone

na

values to interpret as NA

skip

number of first rows to skip

sep

character to separate values in a row

col_select

columns to select, supports the tidyselect language)

header

use first row as header

startrow

first row to start importing

an ODATA filter, ignores sort and defaults to search only mails with attachments

search_subject

a character, equal to search = "subject:(search_subject)", case-insensitive

search_from

a character, equal to search = "from:(search_from)", case-insensitive

search_when

a Date vector of size 1 or 2, equal to search = "received:date1..date2", see Examples

search_attachment

a character to use a regular expression for attachment file names

folder

email folder name to search in, defaults to Inbox of the current user by calling get_inbox_name()

n

maximum number of emails to search

sort

initial sorting

account

a Teams account from Azure or an AzureAuth Microsoft 365 token, e.g. retrieved with certeprojects::connect_teams()

url

remote location of any data set, can also be a (non-raw) GitHub/GitLab link

full_teams_path

a full path in Teams, including the Team name and the channel name. Leave blank to use interactive mode, which allows file/folder picking from a list in the console.

Details

Importing any unlisted filetype using import() requires the rio package to be installed.

Importing an Excel file using import_xlsx() or import_excel() requires the readxl package to be installed.

Importing an SPSS file using import_sav() or import_spss() requires the haven package to be installed.

Importing a Feather file using import_feather() requires the arrow package to be installed. Apache Feather provides efficient binary columnar serialization for data sets, enabling easy sharing data across data analysis languages (such as between Python and R). Use the col_select argument (which supports the tidyselect language) for specific data selection to improve importing speed.

Importing the clipboard using import_clipboard() requires the clipr package to be installed.

Importing mail attachments using import_mail_attachment() requires the certemail package to be installed. It calls download_mail_attachment() internally and saves the attachment to a temporary folder. For all folder names, run: sapply(certemail::connect_outlook()$list_folders(), function(x) x$properties$displayName).

The import_url() function tries to download the file first, after which it will be imported using the appropriate import_*() function.

The import_teams() function uses certeprojects::teams_download_file() to provide an interactive way to select a file in any Team, to download the file, and to import the file using the appropriate import_*() function.

See also

Examples

export_csv(iris)
#> Exported data set (150 × 5) to '/tmp/RtmppMroYG/file16d864c92d0b/reference/iris.csv' (3.9 kB).
import_csv("iris") |> head()
#> 
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1          5.1         3.5          1.4         0.2  setosa
#> 2          4.9         3.0          1.4         0.2  setosa
#> 3          4.7         3.2          1.3         0.2  setosa
#> 4          4.6         3.1          1.5         0.2  setosa
#> 5          5.0         3.6          1.4         0.2  setosa
#> 6          5.4         3.9          1.7         0.4  setosa

# the above is equal to:
# export(iris, "iris.csv")
# import("iris.csv") |> head()


# row names are also supported
export_csv(mtcars)
#> Note: Row names added as first column 'rownames'
#> Exported data set (32 × 12) to '/tmp/RtmppMroYG/file16d864c92d0b/reference/mtcars.csv' (1.7 kB).
import_csv("mtcars") |> head()
#> 
#> Row names restored from first column.
#>                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
#> Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
#> Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1


# Apache's Feather format is column-based
# and allow for specific and fast file reading
library(dplyr, warn.conflicts = FALSE)
starwars |> export_feather()
#> Exported data set (87 × 14) to '/tmp/RtmppMroYG/file16d864c92d0b/reference/starwars.feather' (12.2 kB).
import("starwars.feather",
       col_select = starts_with("h")) |> 
  head()
#>   height  hair_color homeworld
#> 1    172       blond  Tatooine
#> 2    167        <NA>  Tatooine
#> 3     96        <NA>     Naboo
#> 4    202        none  Tatooine
#> 5    150       brown  Alderaan
#> 6    178 brown, grey  Tatooine
  

# (cleanup)
file.remove("iris.csv")
#> [1] TRUE
file.remove("mtcars.csv")
#> [1] TRUE
file.remove("starwars.feather")
#> [1] TRUE

if (FALSE) { # \dontrun{

# ---- Microsoft Teams support -------------------------------------------

# IMPORTING

# import from Teams by picking a folder interactively from any Team
x <- import_teams()

# to NOT pick a Teams folder (e.g. in non-interactive mode), set `full_teams_path`
x <- import_teams(full_teams_path = "MyTeam/MyChannel/MyFolder/MyFile.xlsx")


# EXPORTING

# export to Teams by picking a folder interactively from any Team
mtcars |> export_teams()

# the default is RDS, but you can set `filename` to specify yourself
mtcars |> export_teams("mtcars.xlsx")

# to NOT pick a Teams folder (e.g. in non-interactive mode), set `full_teams_path`
mtcars |> export_teams("mtcars.xlsx", full_teams_path = "MyTeam/MyChannel/MyFolder")
mtcars |> export_teams(full_teams_path = "MyTeam/MyChannel/MyFolder")

} # }