Creating a Dataframe
ua = GET("https://api.teleport.org/api/urban_areas/")
ua = fromJSON(rawToChar(ua$content))
df1 = data.frame('country' = NA, 'continent' = NA, 'ua_name' = NA,
'housing' = NA, 'cost_of_living' = NA,
'startups' = NA, 'venture_capital' = NA,
'travel_connectivity' = NA, 'commute' = NA,
'business_freedom' = NA, 'safety' = NA,
'healthcare' = NA, 'education' = NA, 'enviromental_quality' = NA,
'economy' = NA, 'taxation' = NA, 'internet_access' = NA,
'leisure_culture' = NA, 'tolerance' = NA, 'outdoors'= NA)
for(i in 1:266){
mydata = GET(paste(ua$`_links`$`ua:item`$href[i], 'scores', sep = ''))
mydata = fromJSON(rawToChar(mydata$content))
scores = mydata$categories$score_out_of_10
ua_names = GET(ua$`_links`$`ua:item`$href[i])
ua_names = fromJSON(rawToChar(ua_names$content))
full_data = c(strsplit(ua_names$full_name, ", ")[[1]][2], ua_names$continent,
strsplit(ua_names$full_name, ", ")[[1]][1], scores)
df1 = rbind(full_data, df1)
}
df1 = na.omit(df1)
for(i in 4:20){
df1[,i] = as.numeric(df1[,i])
}
Correlation Matrix
x = correlate(df1[4:20], use = "pairwise.complete.obs", method = "pearson")
##
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
x
## # A tibble: 17 x 18
## rowname housing cost_of_living startups venture_capital travel_connecti~
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 housing NA 0.795 -0.354 -0.469 -0.241
## 2 cost_o~ 0.795 NA 0.00993 -0.143 -0.149
## 3 startu~ -0.354 0.00993 NA 0.795 0.270
## 4 ventur~ -0.469 -0.143 0.795 NA 0.365
## 5 travel~ -0.241 -0.149 0.270 0.365 NA
## 6 commute 0.167 0.238 0.0570 0.0867 0.326
## 7 busine~ -0.461 -0.470 0.199 0.243 0.251
## 8 safety 0.0180 -0.0682 -0.202 -0.115 0.271
## 9 health~ -0.465 -0.516 0.0864 0.187 0.280
## 10 educat~ -0.561 -0.453 0.369 0.521 0.463
## 11 enviro~ -0.417 -0.468 0.0132 0.105 0.112
## 12 economy -0.516 -0.368 0.330 0.368 0.0217
## 13 taxati~ -0.00812 0.0486 -0.0271 -0.103 -0.166
## 14 intern~ -0.299 -0.253 0.201 0.272 0.240
## 15 leisur~ -0.0157 0.172 0.441 0.328 0.306
## 16 tolera~ -0.228 -0.330 -0.136 -0.0901 0.0243
## 17 outdoo~ -0.168 -0.0360 0.252 0.268 0.135
## # ... with 12 more variables: commute <dbl>, business_freedom <dbl>,
## # safety <dbl>, healthcare <dbl>, education <dbl>,
## # enviromental_quality <dbl>, economy <dbl>, taxation <dbl>,
## # internet_access <dbl>, leisure_culture <dbl>, tolerance <dbl>,
## # outdoors <dbl>
Parallel Plots
df1 %>% ggparcoord(columns = c(19,4, 5 ,12), groupColumn = 2, scale="globalminmax",
showPoints = TRUE, title = "", alphaLines = 0.3
) + scale_color_viridis(discrete = T) +
theme_ipsum() + theme(plot.title = element_text(size=10)) + xlab("") + ylab("")
df1 %>% ggparcoord(columns = c(13,12,14,10), groupColumn = 2, scale="globalminmax",
showPoints = TRUE, title = "", alphaLines = 0.3
) + scale_color_viridis(discrete = T, option = 'B') +
theme_ipsum() + theme(plot.title = element_text(size=10)) + xlab("") + ylab("")