sanfran <- GET("https://api.teleport.org/api/urban_areas/slug:san-francisco-bay-area/scores/")
LA <- GET("https://api.teleport.org/api/urban_areas/slug:los-angeles/scores/")
NY <- GET("https://api.teleport.org/api/urban_areas/slug:new-york/scores/")
Memphis <- GET("https://api.teleport.org/api/urban_areas/slug:memphis/scores/")
Omaha <- GET("https://api.teleport.org/api/urban_areas/slug:omaha/scores/")
Columbus <- GET("https://api.teleport.org/api/urban_areas/slug:columbus/scores/")
dataSanFran <- fromJSON(rawToChar(sanfran$content))$categories %>%
mutate(City = "San Francisco, CA")
dataLA <- fromJSON(rawToChar(LA$content))$categories %>%
mutate(City = "Los Angeles, CA")
dataNY <- fromJSON(rawToChar(NY$content))$categories %>%
mutate(City = "New York, NY")
dataMemphis <- fromJSON(rawToChar(Memphis$content))$categories %>%
mutate(City = "Memphis, OH")
dataOmaha <- fromJSON(rawToChar(Omaha$content))$categories %>%
mutate(City = "Omaha, TN")
dataColumbus <- fromJSON(rawToChar(Columbus$content))$categories %>%
mutate(City = "Columbus, OH")
fullData <- union(dataSanFran, dataLA, by = "City") %>%
union( dataNY, by = "City") %>% union( dataMemphis, by = "City") %>%
union( dataOmaha, by = "City") %>%
union( dataColumbus, by = "City") %>%
select(-color) %>%
filter(name == "Cost of Living" | name == "Leisure & Culture") %>%
pivot_wider(names_from = name, values_from = score_out_of_10)
model <- lm(`Leisure & Culture` ~ `Cost of Living`, data = fullData)
summ(model)
## MODEL INFO:
## Observations: 6
## Dependent Variable: Leisure & Culture
## Type: OLS linear regression
##
## MODEL FIT:
## F(1,4) = 15.66, p = 0.02
## R² = 0.80
## Adj. R² = 0.75
##
## Standard errors: OLS
## -----------------------------------------------------
## Est. S.E. t val. p
## ---------------------- ------- ------ -------- ------
## (Intercept) 12.69 1.30 9.75 0.00
## Cost of Living -1.08 0.27 -3.96 0.02
## -----------------------------------------------------
With a F-statistic of 15.66 and a p-value of .02, we can conclude that the overall model is significant. Thus, there is sufficient evidence that the happiness with the leisure & culture in a city depends on the cost of living there. Unfortunately, this displays a negative association with happiness with leisure/culture and hapiness with the cost of living in a particular city.
#creating interactive plot so user can hover over point to see the city, the score designated with cost of living, and the score designated with leisure & culture
plotData <- fullData %>%
rename(
Cost = `Cost of Living`,
Leisure = `Leisure & Culture`
)
scatter_p <- ggplot(plotData, aes(x = Cost, y= Leisure, color = City, text = paste("City: ", City, "\n",
"Happiness with Cost of Living: ", Cost, "\n",
"Happiness with Leisure & Culture: ", Leisure,
sep = ""))) +
geom_point() +
ggtitle("People’s Satisfaction with Different Aspects of Living in Urban Areas") +
theme(plot.title.position = "plot", axis.text.y = element_text(angle= 180)) +
xlab("Leisure & Culture Satisfaction (Scale of 1-10)") +
ylab("Cost of Living Satisfaction (Scale of 1-10)") +
theme_minimal() +
scale_color_viridis_d() +
xlim(1,10) +
ylim(1,10)
ggplotly(scatter_p, tooltip = "text") %>%
hide_legend() %>%
layout(title = list( x=0))