These functions can be used to import data, from local or remote paths, or from the internet. They work closely with the certeprojects
package to support Microsoft Planner project numbers. To support row names and older R versions, import_*()
functions return plain data.frames, not e.g. tibbles.
import(
filename,
project_number = project_get_current_id(ask = FALSE),
auto_transform = TRUE,
...
)
import_rds(filename, project_number = project_get_current_id(ask = FALSE), ...)
import_xlsx(
filename,
project_number = project_get_current_id(ask = FALSE),
sheet = 1,
range = NULL,
auto_transform = TRUE,
datenames = "nl",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = dec_mark(),
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
skip = 0,
...
)
import_excel(
filename,
project_number = project_get_current_id(ask = FALSE),
sheet = 1,
range = NULL,
auto_transform = TRUE,
datenames = "nl",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = dec_mark(),
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
skip = 0,
...
)
import_csv(
filename,
project_number = project_get_current_id(ask = FALSE),
auto_transform = TRUE,
datenames = "nl",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = ".",
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
skip = 0,
...
)
import_csv2(
filename,
project_number = project_get_current_id(ask = FALSE),
auto_transform = TRUE,
datenames = "nl",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = ",",
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
skip = 0,
...
)
import_tsv(
filename,
project_number = project_get_current_id(ask = FALSE),
auto_transform = TRUE,
datenames = "nl",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = ".",
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
skip = 0,
...
)
import_txt(
filename,
project_number = project_get_current_id(ask = FALSE),
auto_transform = TRUE,
sep = "\t",
datenames = "nl",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = ",",
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
skip = 0,
...
)
import_sav(
filename,
project_number = project_get_current_id(ask = FALSE),
auto_transform = TRUE,
datenames = "en",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = ".",
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
...
)
import_spss(
filename,
project_number = project_get_current_id(ask = FALSE),
auto_transform = TRUE,
datenames = "en",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = ".",
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
...
)
import_feather(
filename,
project_number = project_get_current_id(ask = FALSE),
col_select = everything(),
...
)
import_clipboard(
sep = "\t",
header = TRUE,
startrow = 1,
auto_transform = TRUE,
datenames = "nl",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = dec_mark(),
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
...
)
import_mail_attachment(
search = "hasattachment:yes",
search_subject = NULL,
search_from = NULL,
search_when = NULL,
search_attachment = NULL,
folder = certemail::get_inbox_name(account = account),
n = 5,
sort = "received desc",
account = certemail::connect_outlook(),
auto_transform = TRUE,
sep = ",",
...
)
import_url(
url,
auto_transform = TRUE,
sep = ",",
datenames = "en",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = ".",
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
skip = 0,
...
)
import_teams(
full_teams_path = NULL,
account = connect_teams(),
auto_transform = TRUE,
sep = ",",
datenames = "en",
dateformat = "yyyy-mm-dd",
timeformat = "HH:MM",
decimal.mark = ".",
big.mark = "",
timezone = "UTC",
na = c("", "NULL", "NA", "<NA>"),
skip = 0
)
the full path of the file to be imported, will be parsed to a character, can also be a remote location (from http/https/ftp/ssh, GitHub/GitLab)
a Microsoft Planner project number
transform the imported data with auto_transform()
arguments passed on to methods
Excel sheet to import, defaults to first sheet
a cell range to read from, allows typical Excel ranges such as "B3:D87" and "Budget!B2:G14"
language of the date names, such as weekdays and months
expected date format, will be coerced with format_datetime()
expected time format, will be coerced with format_datetime()
separator for decimal numbers
separator for thousands
expected time zone
values to interpret as NA
number of first rows to skip
character to separate values in a row
columns to select, supports the tidyselect language)
use first row as header
first row to start importing
an ODATA filter, ignores sort
and defaults to search only mails with attachments
a character, equal to search = "subject:(search_subject)"
, case-insensitive
a character, equal to search = "from:(search_from)"
, case-insensitive
a Date vector of size 1 or 2, equal to search = "received:date1..date2"
, see Examples
a character to use a regular expression for attachment file names
email folder name to search in, defaults to Inbox of the current user by calling get_inbox_name()
maximum number of emails to search
initial sorting
a Teams account from Azure or an AzureAuth
Microsoft 365 token, e.g. retrieved with certeprojects::connect_teams()
remote location of any data set, can also be a (non-raw) GitHub/GitLab link
a full path in Teams, including the Team name and the channel name. Leave blank to use interactive mode, which allows file/folder picking from a list in the console.
Importing any unlisted filetype using import()
requires the rio
package to be installed.
Importing an Excel file using import_xlsx()
or import_excel()
requires the readxl
package to be installed.
Importing an SPSS file using import_sav()
or import_spss()
requires the haven
package to be installed.
Importing a Feather file using import_feather()
requires the arrow
package to be installed. Apache Feather provides efficient binary columnar serialization for data sets, enabling easy sharing data across data analysis languages (such as between Python and R). Use the col_select
argument (which supports the tidyselect language) for specific data selection to improve importing speed.
Importing the clipboard using import_clipboard()
requires the clipr
package to be installed.
Importing mail attachments using import_mail_attachment()
requires the certemail
package to be installed. It calls download_mail_attachment()
internally and saves the attachment to a temporary folder. For all folder names, run: sapply(certemail::connect_outlook()$list_folders(), function(x) x$properties$displayName)
.
The import_url()
function tries to download the file first, after which it will be imported using the appropriate import_*()
function.
The import_teams()
function uses certeprojects::teams_download_file()
to provide an interactive way to select a file in any Team, to download the file, and to import the file using the appropriate import_*()
function.
export_csv(iris)
#> Exported data set (150 × 5) to '/tmp/RtmppMroYG/file16d864c92d0b/reference/iris.csv' (3.9 kB).
import_csv("iris") |> head()
#>
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 5.1 3.5 1.4 0.2 setosa
#> 2 4.9 3.0 1.4 0.2 setosa
#> 3 4.7 3.2 1.3 0.2 setosa
#> 4 4.6 3.1 1.5 0.2 setosa
#> 5 5.0 3.6 1.4 0.2 setosa
#> 6 5.4 3.9 1.7 0.4 setosa
# the above is equal to:
# export(iris, "iris.csv")
# import("iris.csv") |> head()
# row names are also supported
export_csv(mtcars)
#> Note: Row names added as first column 'rownames'
#> Exported data set (32 × 12) to '/tmp/RtmppMroYG/file16d864c92d0b/reference/mtcars.csv' (1.7 kB).
import_csv("mtcars") |> head()
#>
#> Row names restored from first column.
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Apache's Feather format is column-based
# and allow for specific and fast file reading
library(dplyr, warn.conflicts = FALSE)
starwars |> export_feather()
#> Exported data set (87 × 14) to '/tmp/RtmppMroYG/file16d864c92d0b/reference/starwars.feather' (12.2 kB).
import("starwars.feather",
col_select = starts_with("h")) |>
head()
#> height hair_color homeworld
#> 1 172 blond Tatooine
#> 2 167 <NA> Tatooine
#> 3 96 <NA> Naboo
#> 4 202 none Tatooine
#> 5 150 brown Alderaan
#> 6 178 brown, grey Tatooine
# (cleanup)
file.remove("iris.csv")
#> [1] TRUE
file.remove("mtcars.csv")
#> [1] TRUE
file.remove("starwars.feather")
#> [1] TRUE
if (FALSE) { # \dontrun{
# ---- Microsoft Teams support -------------------------------------------
# IMPORTING
# import from Teams by picking a folder interactively from any Team
x <- import_teams()
# to NOT pick a Teams folder (e.g. in non-interactive mode), set `full_teams_path`
x <- import_teams(full_teams_path = "MyTeam/MyChannel/MyFolder/MyFile.xlsx")
# EXPORTING
# export to Teams by picking a folder interactively from any Team
mtcars |> export_teams()
# the default is RDS, but you can set `filename` to specify yourself
mtcars |> export_teams("mtcars.xlsx")
# to NOT pick a Teams folder (e.g. in non-interactive mode), set `full_teams_path`
mtcars |> export_teams("mtcars.xlsx", full_teams_path = "MyTeam/MyChannel/MyFolder")
mtcars |> export_teams(full_teams_path = "MyTeam/MyChannel/MyFolder")
} # }