Connect to azure datalake store using R


The following code snippets are on creating a connection to Azure Data Lake Storage Gen1 using R with Service-to-Service authentication with client secret and client id using REST API. Follow the link, for more details on different ways to connect to Azure Data Lake Storage Gen1


Import Prerequisite

library(httr)
library(curl)  
library(stringr)  

Authenticate

authentication_token <- function(tenant, client_id, client_secret){
  h <- new_handle()
  handle_setform(h,
                 "grant_type"="client_credentials",
                 "resource"="https://management.core.windows.net/",
                 "client_id" = client_id,
                 "client_secret" = client_secret
  )
  path = str_interp("https://login.windows.net/${tenant}/oauth2/token")
  req <- curl_fetch_memory(path, handle = h)
  res <- fromJSON(rawToChar(req$content))
  return(paste("Bearer",res$access_token))
}

token <- authentication_token(tenant = "TENANT",
                              client_id = "CLIENT ID",
                              client_secret = "CLIENT SECRET")

Read

load_data <- function(datalake, path, auth_token){
  file_path = str_interp("https://${datalake}.azuredatalakestore.net/webhdfs/v1/${path}?op=OPEN&read=true")
  r <- httr::GET(file_path, add_headers(Authorization = auth_token))
  return(read.csv(textConnection(content(r, 'text', encoding="UTF-8")), check.names=FALSE))
}

datalake_name <- "NAME OF THE DATALAKE"
file_path <- "FILE PATH IN THE DATALAKE FOLDER"
authentication_token <- "TOKEN CREATED"

load_data(datalake_name, file_path, authentication_token)

Write

upload_data <- function(dataset, datalake, path, auth_token){
  write.csv(dataset, textConnection("filecontent","w"), row.names=F)
  file_path <- str_interp("https://${datalake}.azuredatalakestore.net/webhdfs/v1/${path}?op=CREATE&overwrite=true&write=true")
  httr::PUT(file_path,
            body = filecontent,
            add_headers(Authorization = auth_token,
                        "Transfer-Encoding" = "chunked"))
  return("The file is uploaded")
}

dataset <- "DATASET TO UPLOAD"
datalake_name <- "NAME OF THE DATALAKE"
file_path <- "FILE PATH IN THE DATALAKE FOLDER"
authentication_token <- "TOKEN CREATED"

upload_data(dataset, datalake_name, file_path, authentication_token)

Related Posts

Connect to azure datalake store using python

December 20, 2018

Read More
Connect to azure storage (blob) using python

December 22, 2018

Read More
Join, Merge, Append and Concatenate

March 25, 2019

Read More