library(tidyverse)
#library(dsbox) ## didn't work for everyone, not needed
states = read_csv("data/states.csv")
laquinta = read_csv("data/laquinta.csv")
dennys = read_csv("data/dennys.csv")
dn_ak = dennys %>%
filter(state == "AK")
nrow(dn_ak)
## [1] 3
There are 3 Denny’s in Alaska.
lq_ak = laquinta %>%
filter(state == "AK")
nrow(lq_ak)
## [1] 2
There are 2 LaQuinta’s in Alaska.
dn_lq_ak = full_join(dn_ak, lq_ak, by = "state")
dn_lq_ak
## # A tibble: 6 x 13
## ...1.x addres~1 city.x state zip.x longi~2 latit~3 ...1.y addre~4 city.y zip.y
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 1 2900 De~ Ancho~ AK 99503 -150. 61.2 23 3501 M~ "\nAn~ 99503
## 2 1 2900 De~ Ancho~ AK 99503 -150. 61.2 295 4920 D~ "\nFa~ 99709
## 3 2 3850 De~ Ancho~ AK 99508 -150. 61.2 23 3501 M~ "\nAn~ 99503
## 4 2 3850 De~ Ancho~ AK 99508 -150. 61.2 295 4920 D~ "\nFa~ 99709
## 5 3 1929 Ai~ Fairb~ AK 99701 -148. 64.8 23 3501 M~ "\nAn~ 99503
## 6 3 1929 Ai~ Fairb~ AK 99701 -148. 64.8 295 4920 D~ "\nFa~ 99709
## # ... with 2 more variables: longitude.y <dbl>, latitude.y <dbl>, and
## # abbreviated variable names 1: address.x, 2: longitude.x, 3: latitude.x,
## # 4: address.y
There are 6 pairs of Denny’s and LaQuinta’s in Alaska. Therefore, we need to calculate 6 distances between establishments.
There are 6 observations in the dn_lq_ak
data frame. The
variable names are …1.x, address.x, city.x, state, zip.x, longitude.x,
latitude.x, …1.y, address.y, city.y, zip.y, longitude.y, latitude.y.
source("haversine.R")
dn_lq_ak =
dn_lq_ak %>% mutate(distance = haversine(longitude.x, latitude.x, longitude.y, latitude.y))
dn_lq_ak_mindist = dn_lq_ak %>%
group_by(address.x) %>%
summarise(closest = min(distance))
dn_lq_ak_mindist
## # A tibble: 3 x 2
## address.x closest
## <chr> <dbl>
## 1 1929 Airport Way 5.20
## 2 2900 Denali 2.04
## 3 3850 Debarr Road 6.00
dn_lq_ak_mindist %>%
ggplot(aes(x=closest))+
geom_dotplot()+
scale_y_continuous(NULL, breaks = NULL)+
xlim(c(0,10))+
labs(x="Closest Denny's (km)",
title="LaQuinta in Alaska are close to Denny's")
## Bin width defaults to 1/30 of the range of the data. Pick better value with `binwidth`.
The Denny’s in Alaska are all within a few miles of a LaQuinta.
dn_sts = dennys %>%
filter(state %in% c("AK","NY","CA"))
lq_sts = laquinta %>%
filter(state %in% c("AK","NY","CA"))
dn_lq_sts = full_join(dn_sts, lq_sts, by = "state")
dn_lq_sts =
dn_lq_sts %>% mutate(distance = haversine(longitude.x, latitude.x, longitude.y, latitude.y))
dn_lq_sts %>%
group_by(state) %>%
summarise(n())
## # A tibble: 3 x 2
## state `n()`
## <chr> <int>
## 1 AK 6
## 2 CA 22568
## 3 NY 1064
dn_lq_sts_mindist = dn_lq_sts %>%
group_by(address.x, state) %>%
summarise(closest = min(distance))
## `summarise()` has grouped output by 'address.x'. You can override using the
## `.groups` argument.
ggplot(dn_lq_sts_mindist, aes(x=closest)) +
geom_histogram() +
facet_grid(~state) +
labs(x="Closest Denny's (km)",
title="LaQuinta are often very close to Denny's",
subtitle="but they can be far in California")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Comparing CA, NY, and AK, the joke is the most true in Alaska. In that state, every Denny’s is within a few miles of a LaQuinta.
25 points total. * 10 questions @ 2 points for correct and complete answers * 5 points github commit history