Load packages and data

#devtools::install_github("rstudio-education/dsbox")
library(tidyverse) 
library(dsbox) 
states <- read_csv("data/states.csv")

Exercise 1

The Denny’s dataset has 1643 rows and 6 columns. Each row is a different restaurant and there are 6 variables named address, city, state, zip, longitude, latitude.

Exercise 2

The LaQuinta dataset has 909 rows and 6 columns. Each row is a different restaurant and there are 6 variables named address, city, state, zip, longitude, latitude.

Exercise 3

There are no Denny’s locations outside of the USA. There are LaQuinta’s in Mexico, China, Chile, Columbia, UAE, Turkey, Honduras, New Zealand, and Canada.

Exercise 4

We will do this using the state variable, but you might also describe a way to pick out locations in the USA by the longitude and latitude.

Exercise 5

These are the Denny’s locations not in the USA:

dennys %>% filter(!(state %in% states$abbreviation))
## # A tibble: 0 x 6
## # ... with 6 variables: address <chr>, city <chr>, state <chr>, zip <chr>,
## #   longitude <dbl>, latitude <dbl>

There are none.

Exercise 6

When we add the country to Denny’s, it is always “United States”:

dn = dennys %>% mutate(country = "United States")
names(dn)
## [1] "address"   "city"      "state"     "zip"       "longitude" "latitude" 
## [7] "country"

Exercise 7

These are the LaQuinta locations not in the USA:

laquinta %>% filter(!(state %in% states$abbreviation)) %>%
  select(state, city, zip)
## # A tibble: 14 x 3
##    state city                                              zip   
##    <chr> <chr>                                             <chr> 
##  1 AG    "\nAguascalientes"                                20345 
##  2 QR    "\nCancun"                                        77500 
##  3 CH    "Col\nPartido Iglesias\nJuarez"                   32528 
##  4 NL    "Parque Industrial Interamerican\nApodaca"        66600 
##  5 ANT   "\nMedellin Colombia"                             050016
##  6 NL    "Col. Centro\nMonterrey"                          64000 
##  7 NL    "\nMonterrey"                                     64190 
##  8 ON    "\nOshawa"                                        L1H1B4
##  9 VE    "\nPoza Rica"                                     93210 
## 10 PU    "\nSan Jose Chiapa"                               75010 
## 11 PU    "Col. ReservaTerritorial Atlixcayotl San\nPuebla" 72810 
## 12 SL    "\nSan Luis Potosi"                               78399 
## 13 FM    "contiguo Mall Las Cascadas\nTegucigalpa"         11101 
## 14 BC    "\nRichmond"                                      V6X1C4

Exercise 8

lq = laquinta %>%
  mutate(country = case_when(
    state %in% states$abbreviation ~ "United States",
    state %in% c("ON", "BC") ~ "Canada",
    state == "ANT"           ~ "Colombia",
    state %in% c("AG","VE","PU","SL","CH","QR","FM","NL")           ~ "Mexico"
  ))

A little extra:

lq %>%
  group_by(country) %>%
  summarise(n())
## # A tibble: 4 x 2
##   country       `n()`
##   <chr>         <int>
## 1 Canada            2
## 2 Colombia          1
## 3 Mexico           11
## 4 United States   895

Exercise 9

lq = laquinta %>% filter((state %in% states$abbreviation)) %>%
  mutate(country="United States")
dn %>%
  count(state) %>% arrange(desc(n))
## # A tibble: 51 x 2
##    state     n
##    <chr> <int>
##  1 CA      403
##  2 TX      200
##  3 FL      140
##  4 AZ       83
##  5 IL       56
##  6 NY       56
##  7 WA       49
##  8 OH       44
##  9 MO       42
## 10 PA       40
## # ... with 41 more rows
lq %>%
  count(state) %>% arrange(desc(n))
## # A tibble: 48 x 2
##    state     n
##    <chr> <int>
##  1 TX      237
##  2 FL       74
##  3 CA       56
##  4 GA       41
##  5 TN       30
##  6 OK       29
##  7 LA       28
##  8 CO       27
##  9 NM       19
## 10 NY       19
## # ... with 38 more rows

Exercise 10

dn %>%
  count(state) %>%
  inner_join(states, by = c("state" = "abbreviation")) %>% 
  mutate(nperksqmiles = n/area*1000) %>% 
  arrange(desc(nperksqmiles))
## # A tibble: 51 x 5
##    state     n name                     area nperksqmiles
##    <chr> <int> <chr>                   <dbl>        <dbl>
##  1 DC        2 District of Columbia     68.3       29.3  
##  2 RI        5 Rhode Island           1545.         3.24 
##  3 CA      403 California           163695.         2.46 
##  4 CT       12 Connecticut            5543.         2.16 
##  5 FL      140 Florida               65758.         2.13 
##  6 MD       26 Maryland              12406.         2.10 
##  7 NJ       10 New Jersey             8723.         1.15 
##  8 NY       56 New York              54555.         1.03 
##  9 IN       37 Indiana               36420.         1.02 
## 10 OH       44 Ohio                  44826.         0.982
## # ... with 41 more rows
lq %>%
  count(state) %>%
  inner_join(states, by = c("state" = "abbreviation")) %>% 
  mutate(nperksqmiles = n/area*1000) %>% 
  arrange(desc(nperksqmiles))
## # A tibble: 48 x 5
##    state     n name             area nperksqmiles
##    <chr> <int> <chr>           <dbl>        <dbl>
##  1 RI        2 Rhode Island    1545.        1.29 
##  2 FL       74 Florida        65758.        1.13 
##  3 CT        6 Connecticut     5543.        1.08 
##  4 MD       13 Maryland       12406.        1.05 
##  5 TX      237 Texas         268596.        0.882
##  6 TN       30 Tennessee      42144.        0.712
##  7 GA       41 Georgia        59425.        0.690
##  8 NJ        5 New Jersey      8723.        0.573
##  9 MA        6 Massachusetts  10554.        0.568
## 10 LA       28 Louisiana      52378.        0.535
## # ... with 38 more rows

Exercise 11

dn = dn %>%
  mutate(establishment = "Denny's")
lq = lq %>%
  mutate(establishment = "La Quinta")
dn_lq = bind_rows(dn, lq)
dnlq_nj = dn_lq %>% filter(state=="NJ")
ggplot(dnlq_nj, mapping = aes(x = longitude, y = latitude, color = establishment)) +
  geom_point(alpha=0.3)

Exercise 12

dnlq_nj = dn_lq %>% filter(state=="TX")
ggplot(dnlq_nj, mapping = aes(x = longitude, y = latitude, color = establishment)) +
  geom_point(alpha=0.3)

Rubric

29 points: - 2 points per exercise for correctness of code and adequate prose answers/description (where specified) - 5 points for adequately commented commits