#devtools::install_github("rstudio-education/dsbox")
library(tidyverse)
library(dsbox)
states <- read_csv("data/states.csv")
The Denny’s dataset has 1643 rows and 6 columns. Each row is a different restaurant and there are 6 variables named address, city, state, zip, longitude, latitude.
The LaQuinta dataset has 909 rows and 6 columns. Each row is a different restaurant and there are 6 variables named address, city, state, zip, longitude, latitude.
There are no Denny’s locations outside of the USA. There are LaQuinta’s in Mexico, China, Chile, Columbia, UAE, Turkey, Honduras, New Zealand, and Canada.
We will do this using the state variable, but you might also describe a way to pick out locations in the USA by the longitude and latitude.
These are the Denny’s locations not in the USA:
dennys %>% filter(!(state %in% states$abbreviation))
## # A tibble: 0 x 6
## # ... with 6 variables: address <chr>, city <chr>, state <chr>, zip <chr>,
## # longitude <dbl>, latitude <dbl>
There are none.
When we add the country to Denny’s, it is always “United States”:
dn = dennys %>% mutate(country = "United States")
names(dn)
## [1] "address" "city" "state" "zip" "longitude" "latitude"
## [7] "country"
These are the LaQuinta locations not in the USA:
laquinta %>% filter(!(state %in% states$abbreviation)) %>%
select(state, city, zip)
## # A tibble: 14 x 3
## state city zip
## <chr> <chr> <chr>
## 1 AG "\nAguascalientes" 20345
## 2 QR "\nCancun" 77500
## 3 CH "Col\nPartido Iglesias\nJuarez" 32528
## 4 NL "Parque Industrial Interamerican\nApodaca" 66600
## 5 ANT "\nMedellin Colombia" 050016
## 6 NL "Col. Centro\nMonterrey" 64000
## 7 NL "\nMonterrey" 64190
## 8 ON "\nOshawa" L1H1B4
## 9 VE "\nPoza Rica" 93210
## 10 PU "\nSan Jose Chiapa" 75010
## 11 PU "Col. ReservaTerritorial Atlixcayotl San\nPuebla" 72810
## 12 SL "\nSan Luis Potosi" 78399
## 13 FM "contiguo Mall Las Cascadas\nTegucigalpa" 11101
## 14 BC "\nRichmond" V6X1C4
lq = laquinta %>%
mutate(country = case_when(
state %in% states$abbreviation ~ "United States",
state %in% c("ON", "BC") ~ "Canada",
state == "ANT" ~ "Colombia",
state %in% c("AG","VE","PU","SL","CH","QR","FM","NL") ~ "Mexico"
))
A little extra:
lq %>%
group_by(country) %>%
summarise(n())
## # A tibble: 4 x 2
## country `n()`
## <chr> <int>
## 1 Canada 2
## 2 Colombia 1
## 3 Mexico 11
## 4 United States 895
lq = laquinta %>% filter((state %in% states$abbreviation)) %>%
mutate(country="United States")
dn %>%
count(state) %>% arrange(desc(n))
## # A tibble: 51 x 2
## state n
## <chr> <int>
## 1 CA 403
## 2 TX 200
## 3 FL 140
## 4 AZ 83
## 5 IL 56
## 6 NY 56
## 7 WA 49
## 8 OH 44
## 9 MO 42
## 10 PA 40
## # ... with 41 more rows
lq %>%
count(state) %>% arrange(desc(n))
## # A tibble: 48 x 2
## state n
## <chr> <int>
## 1 TX 237
## 2 FL 74
## 3 CA 56
## 4 GA 41
## 5 TN 30
## 6 OK 29
## 7 LA 28
## 8 CO 27
## 9 NM 19
## 10 NY 19
## # ... with 38 more rows
dn %>%
count(state) %>%
inner_join(states, by = c("state" = "abbreviation")) %>%
mutate(nperksqmiles = n/area*1000) %>%
arrange(desc(nperksqmiles))
## # A tibble: 51 x 5
## state n name area nperksqmiles
## <chr> <int> <chr> <dbl> <dbl>
## 1 DC 2 District of Columbia 68.3 29.3
## 2 RI 5 Rhode Island 1545. 3.24
## 3 CA 403 California 163695. 2.46
## 4 CT 12 Connecticut 5543. 2.16
## 5 FL 140 Florida 65758. 2.13
## 6 MD 26 Maryland 12406. 2.10
## 7 NJ 10 New Jersey 8723. 1.15
## 8 NY 56 New York 54555. 1.03
## 9 IN 37 Indiana 36420. 1.02
## 10 OH 44 Ohio 44826. 0.982
## # ... with 41 more rows
lq %>%
count(state) %>%
inner_join(states, by = c("state" = "abbreviation")) %>%
mutate(nperksqmiles = n/area*1000) %>%
arrange(desc(nperksqmiles))
## # A tibble: 48 x 5
## state n name area nperksqmiles
## <chr> <int> <chr> <dbl> <dbl>
## 1 RI 2 Rhode Island 1545. 1.29
## 2 FL 74 Florida 65758. 1.13
## 3 CT 6 Connecticut 5543. 1.08
## 4 MD 13 Maryland 12406. 1.05
## 5 TX 237 Texas 268596. 0.882
## 6 TN 30 Tennessee 42144. 0.712
## 7 GA 41 Georgia 59425. 0.690
## 8 NJ 5 New Jersey 8723. 0.573
## 9 MA 6 Massachusetts 10554. 0.568
## 10 LA 28 Louisiana 52378. 0.535
## # ... with 38 more rows
dn = dn %>%
mutate(establishment = "Denny's")
lq = lq %>%
mutate(establishment = "La Quinta")
dn_lq = bind_rows(dn, lq)
dnlq_nj = dn_lq %>% filter(state=="NJ")
ggplot(dnlq_nj, mapping = aes(x = longitude, y = latitude, color = establishment)) +
geom_point(alpha=0.3)
dnlq_nj = dn_lq %>% filter(state=="TX")
ggplot(dnlq_nj, mapping = aes(x = longitude, y = latitude, color = establishment)) +
geom_point(alpha=0.3)
29 points: - 2 points per exercise for correctness of code and adequate prose answers/description (where specified) - 5 points for adequately commented commits