Challenge 3

sanfran <- GET("https://api.teleport.org/api/urban_areas/slug:san-francisco-bay-area/scores/")

LA <- GET("https://api.teleport.org/api/urban_areas/slug:los-angeles/scores/")

NY <- GET("https://api.teleport.org/api/urban_areas/slug:new-york/scores/")

Memphis <- GET("https://api.teleport.org/api/urban_areas/slug:memphis/scores/")

Omaha <- GET("https://api.teleport.org/api/urban_areas/slug:omaha/scores/")

Columbus <- GET("https://api.teleport.org/api/urban_areas/slug:columbus/scores/")

dataSanFran <- fromJSON(rawToChar(sanfran$content))$categories %>% 
  mutate(City = "San Francisco, CA")
dataLA <- fromJSON(rawToChar(LA$content))$categories %>% 
  mutate(City = "Los Angeles, CA")
dataNY <- fromJSON(rawToChar(NY$content))$categories %>% 
  mutate(City = "New York, NY")
dataMemphis <- fromJSON(rawToChar(Memphis$content))$categories %>% 
  mutate(City = "Memphis, OH")
dataOmaha <- fromJSON(rawToChar(Omaha$content))$categories %>% 
  mutate(City = "Omaha, TN")
dataColumbus <- fromJSON(rawToChar(Columbus$content))$categories %>% 
  mutate(City = "Columbus, OH")

fullData <- union(dataSanFran, dataLA, by = "City") %>% 
  union( dataNY, by = "City") %>% union( dataMemphis, by = "City") %>%
  union( dataOmaha, by = "City") %>% 
  union( dataColumbus, by = "City") %>% 
  select(-color) %>%
  filter(name == "Cost of Living" | name == "Leisure & Culture") %>%
  pivot_wider(names_from = name, values_from = score_out_of_10)

model <- lm(`Leisure & Culture` ~ `Cost of Living`, data = fullData)
summ(model)

## MODEL INFO:
## Observations: 6
## Dependent Variable: Leisure & Culture
## Type: OLS linear regression 
## 
## MODEL FIT:
## F(1,4) = 15.66, p = 0.02
## R² = 0.80
## Adj. R² = 0.75 
## 
## Standard errors: OLS
## -----------------------------------------------------
##                           Est.   S.E.   t val.      p
## ---------------------- ------- ------ -------- ------
## (Intercept)              12.69   1.30     9.75   0.00
## Cost of Living           -1.08   0.27    -3.96   0.02
## -----------------------------------------------------

With a F-statistic of 15.66 and a p-value of .02, we can conclude that the overall model is significant. Thus, there is sufficient evidence that the happiness with the leisure & culture in a city depends on the cost of living there. Unfortunately, this displays a negative association with happiness with leisure/culture and hapiness with the cost of living in a particular city.

#creating interactive plot so user can hover over point to see the city, the score designated with cost of living, and the score designated with leisure & culture
plotData <- fullData %>%
  rename(
    Cost = `Cost of Living`,
    Leisure = `Leisure & Culture`
  )
scatter_p <- ggplot(plotData, aes(x = Cost, y= Leisure, color = City, text = paste("City: ", City, "\n",
                    "Happiness with Cost of Living: ", Cost, "\n",
                    "Happiness with Leisure & Culture: ", Leisure,
                     sep = ""))) + 
  geom_point() +
  ggtitle("People’s Satisfaction with Different Aspects of Living in Urban Areas") + 
  theme(plot.title.position = "plot", axis.text.y = element_text(angle= 180)) + 
  xlab("Leisure & Culture Satisfaction (Scale of 1-10)") + 
  ylab("Cost of Living Satisfaction (Scale of 1-10)") + 
  theme_minimal() +
  scale_color_viridis_d() +
  xlim(1,10) +
  ylim(1,10) 

ggplotly(scatter_p, tooltip = "text")  %>% 
  hide_legend() %>%
  layout(title = list( x=0))