Available formats:
From sources:
CSV
Import from local storage:
library(data.table)
products_csv <- fread("data/products.csv")
products_csv
Import from web (this repo in github):
products_web <- fread("https://raw.githubusercontent.com/codez0mb1e/StarRter/master/data/products.csv")
products_web
Compare results:
dim(products_web)
## [1] 151141 5
names(products_web)
## [1] "product_num" "department" "commodity" "brand_ty" "x5"
stopifnot(
dim(products_web) == dim(products_csv),
names(products_web) == names(products_csv)
)
Excel
library(readxl)
# read workbook sheets
excel_sheets(path = "data/products.xlsx")
## [1] "metadata" "products data" "grocery list"
# import sheet of interest
products_xlsx <- read_excel(path = "data/products.xlsx", sheet = "products data")
products_xlsx
SQL Server
library(DBI)
library(odbc)
# NOTE: change on actual connection string
conn <- dbConnect(odbc(),
.connection_string = "Driver={SQL Server};Driver={SQL Server};Server=<server_db>;database=<db_name>;Trusted_Connection=yes;",
timeout = 10)
products_sql <- dbGetQuery(conn, "select * from dbo.products")
CSV
Filter data for export:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
products_csv %>%
count(department, brand_ty, sort = T)
new_products <- products_csv %>%
filter(department == "PHARMA" & brand_ty == "PRIVATE") %>%
mutate_if(is.character, tolower) %>%
select(-x5)
new_products
Export:
write.table(new_products, "data/new_products.csv",
sep = ",",
row.names = F)
And now check result via ‘Import dataset’ wizard in RStudio.