Read in the following 3 functions. You'll use the last function, trumptweets() to download the data
#' get_trumptwitterarchive#'#' Returns data from trumptwitterarchive.com.#'#' @param years Years from which to collect data. Defaults (NULL, TRUE, or "all")#' to 2008-current year.#' @return Returns data frame (tbl) of status IDs with "data" attribute consisting of#' list of data by year.#' @importFrom jsonlite fromJSON#' @importFrom tibble as_tibble#' @examples#' \dontrun{#' ## get data#' tta <- get_trumptwitterarchive()#'#' @exportget_trumptwitterarchive<-function(years=NULL) {
## default to years 2008 through currentif (is.null(years) || isTRUE(years) || identical(tolower(years), "all")) {
years<- seq(2009, as.integer(format(Sys.Date(), "%Y")))
}
stopifnot(is.numeric(years))
## get data for each yeartta<- lapply(years, trumptwitterarchive_)
## make status IDs data frame with tta as attributeids<- lapply(tta, "[[", "id_str")
ids<-tibble::as_tibble(
list(status_id= unlist(ids)),
validate=FALSE
)
## list of data (element = year)
attr(ids, "data") <-tta
attr(ids, "years") <-yearsids
}
#' trumptwitterarchive_data#'#' Extracts full trumptwitterarchive data#'#' @param data Data frame returned by \code{\link{get_trumptwitterarchive}} with#' "data" attribute.#' @param years Optional integer used to subset data to return only certain years.#' Defaults to NULL, which means all data is returned.#' @return List of full data from trumptwitterarchive.com.#' @exporttrumptwitterarchive_data<-function(data, years=NULL) {
if (!"data"%in% names(attributes(data))) {
stop("Archive data not found", call.=FALSE)
}
tta<- attr(data, "data")
## by default, return tta dataif (is.null(years)) {
return(tta)
}
## if years to subset are provideddata_years<- attr(data, "years")
## if no years attr or if years length differs return w/ warningif (is.null(data_years) || length(data_years) != length(years)) {
warning(
"Length of years attribute differs from length of data. Returning all extracted data",
call.=FALSE
)
} else {
tta<-tta[data_years%in%years]
}
tta
}
#' trumptwitterarchive_#'#' Internal function used to retrieve trumptwitterarchive data#'#' @param year Integer, specifying year of data to return.#' @param fromJSON Logical, indicating whether to convert repsonse object to#' nested list object.#' @return Response object from trumptwitterarchive request converted (by default)#' to R-friendly list object.#' @importFrom httr content GET#' @importFrom jsonlite fromJSON#' @noRd#' @keywords internaltrumptwitterarchive_<-function(year, fromJSON=TRUE) {
## build and send requesturl<- paste0(
"http://trumptwitterarchive.com/",
"data/realdonaldtrump/",
year,
".json"
)
## response objectr<-httr::GET(url)
## check html statushttr::warn_for_status(r)
## if fromJSON then convert to list otherwise return response objectif (fromJSON) {
r<-httr::content(r, "text")
## if html return empty data frameif (grepl("^\\<\\!DOCTYPE", r)) {
r<-data.frame()
} else {
r<-jsonlite::fromJSON(r)
}
}
r
}
## function to download status idstrumpids<-function(trumptwitterarchive=TRUE) {
## scrape from trumptwitterarchive.comif (trumptwitterarchive) {
ids<- c(2009:2017) %>%
lapply(.trumpids) %>%
unlist(use.names=FALSE)
} else {
## or from my github page (note: this one is unlikely to## be updated as frequently as trumptwitterarchive)ids<- paste0(
"https://github.com/mkearney/trumptweets/blob/",
"master/data/realdonaldtrump-ids-2009-2017.csv") %>%
read.csv(stringsAsFactors=FALSE) %>%
unlist(use.names=FALSE)
}
## return idsids
}
## function to download twitter datatrumptweets<-function() {
## get archive of status idsids<- trumpids()
## get newest trump tweets (set to 1000 to be safe)rt1<- get_timeline(
"realdonaldtrump", n=1000,
since_id=ids[length(ids)])
## download archive
message(" Downloading ", length(ids), " tweets...")
rt2<- lookup_statuses(ids[1:16000])
message(" You're halfway there...")
rt3<- lookup_statuses(ids[16001:(length(ids))])
message(" Huzzah!!!")
## combine data into listrt<-list(rt1, rt2, rt3)
## collapse into data frame (or salvage list if error)
tryCatch(do.call("rbind", rt),
error=function(e) return(rt))
}
Download all of Trump's tweets.
## run function to download Trump's twitter archivedjt<- trumptweets()
Save the data file.
## To save as an excel file:
install.packages("openxlsx")
openxlsx::write.xlsx(djt, "realdonaltrump-fullarchive.xlsx")
## To save as csv file
write.csv(djt, "realdonaltrump-fullarchive.csv",
row.names=FALSE)
## To preserve meta information and save as csv file
install.packages("readr")
readr::write_csv(djt, "realdonaltrump-fullarchive.csv")
Inspecting the data
## preview data
head(djt)
## check 100 most popular hashtagsdjt$hashtags %>%
strsplit("") %>%
unlist(use.names=FALSE) %>%
tolower %>%
table() %>%
sort(decreasing=TRUE) %>%
head(100)
## check 100 most popular mentionsdjt$mentions_screen_name %>%
strsplit("") %>%
unlist(use.names=FALSE) %>%
tolower %>%
table() %>%
sort(decreasing=TRUE) %>%
head(100)
## check text of 50 most recent tweetsdjt$text[1:50]
Plotting the data
## use the built in rtweet function
ts_plot(p, theme="nerdy")
## plot four groups of hashtagsp<- ts_filter(djt, "2 days", txt="hashtags",
filter= c("makeamericagreatagain|maga",
"trump",
"debate",
"draintheswamp|americafirst"),
key= c("MakeAmericaGreatAgain",
"Trump",
"Debates",
"DrainTheSwamp/AmericaFirst"))
## you can continue plotting with rtweet functions but## the current version (0.4.0) prints incorrect labels for## the x-axis for multi-year plots.
ts_plot(p, theme="spacegray")
## ggplot2 doesn't have that problem and is more robust and## flexible anyway## install and load ggplot2
install.packages("ggplot2")
library(ggplot2)
## uncomment following line and final line to save image## png("trumptweets.png", 7, 5, "in", res = 127.5)p %>%
ggplot(aes(x=time, y=freq, color=filter)) +
theme_bw() +
geom_line() +
facet_wrap( ~filter, ncol=2) +
labs(x="", y="",
title="Hashtags used by Donald Trump",
subtitle="Used entire archive of @realDonaldTrumpTweets") +
theme(legend.position="none",
text= element_text(size=12,
family="Avenir Next Condensed"),
plot.title= element_text(
family="Avenir Next Condensed Medium", size=20))
## dev.off()## image I created using this code displayed below## note: if Avenir Next Condensed will only work if currently## installed on your machine. If that's the case, then either## delete the family arguments or replace Avenir with the font## of your choosing
Note that the project description data, including the texts, logos, images, and/or trademarks,
for each open source project belongs to its rightful owner.
If you wish to add or remove any projects, please contact us at [email protected].