Libraries

SPDS

library(tidyverse)
library(sf)
library(units)

Data

library(USAboundaries)
library(rnaturalearth)

Visualization

library(gghighlight)
library(ggrepel)
library(ggthemes)
library(knitr)
library(kableExtra)

Creating a common projected crs

projcs = "+proj=eqdc +lat_0=40 +lon_0=-96 +lat_1=20 +lat_2=60 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs"

Question 1

Creating the three necessary databases

The first part of this question was done previously, by installing the correct packages to use.

region = data.frame(state_name = state.name,
                    region = state.region)

usboundaries = USAboundaries::us_states(resolution = "low") %>% 
  filter(!state_name %in% c("Hawaii", "Alaska", "Puerto Rico", "District of Columbia")) %>% 
  left_join(region) %>% 
  st_transform(projcs)

wldboundaries = rnaturalearth::countries110 %>% 
  st_as_sf() %>% 
  filter(admin %in% c("Mexico", "United States of America", "Canada")) %>% 
  st_transform(projcs)

cities = read_csv("../data/uscities.csv") %>% 
  st_as_sf(coords = c("lng", "lat"), crs = 4326) %>% 
  st_transform(projcs) %>% 
  st_filter(usboundaries, .predicate = st_intersects)

After creating the main databases, I narrowed them down to calculate distances later. I’m sure there’s a better way to do this but for the time being this will work just fine.

wldboundaries_can = wldboundaries %>% 
  filter(admin %in% c("Canada"))

wldboundaries_mex = wldboundaries %>% 
  filter(admin %in% c("Mexico"))

This data set calls for the conterminous United States which are all the states South of Canada and North of Mexico. The & sign tells R to filter rows that are of jurisdiction type state, and are not called Alaska or Hawaii. st_crs shows us the current coordinate reference system being used. We can see it’s user defined (by the projcs variable created earlier).

Question 2

Union and Combine

Here I mainly used the combine function, for the purposes of this lab I don’t really see the need to use a union

Unions:

usboundaries_un = st_union(usboundaries) %>% 
  st_cast("MULTILINESTRING")

wldboundaries_un = st_union(wldboundaries) %>% 
  st_cast("MULTILINESTRING")

Combines:

usboundaries_com = st_combine(usboundaries) %>% 
  st_cast("MULTILINESTRING")

wldboundaries_com = st_combine(wldboundaries) %>% 
  st_cast("MULTILINESTRING")

wldboundaries_can_com = st_combine(wldboundaries_can) %>% 
  st_cast("MULTILINESTRING")

wldboundaries_mex_com = st_combine(wldboundaries_mex) %>% 
  st_cast("MULTILINESTRING")

Not sure why but I had to transform the coordinate systems to EPSG 5070 again

usboundaries_com = st_transform(usboundaries_com, projcs)
wldboundaries_un = st_transform(wldboundaries_un, projcs)
wldboundaries_com = st_transform(wldboundaries_com, projcs)
wldboundaries_can_com = st_transform(wldboundaries_can_com, projcs)
wldboundaries_mex_com = st_transform(wldboundaries_mex_com, projcs)
cities = st_transform(cities, projcs)

Distance Calculations

Remember to make sure the distance calculations are all within the cities data.frame

cities = cities %>% 
  mutate(dist_to_state = st_distance(., usboundaries_com),
         dist_to_state = set_units(dist_to_state, "km"),
         dist_to_state = drop_units(dist_to_state),
         dist_to_bor = st_distance(., wldboundaries_com),
         dist_to_bor = set_units(dist_to_bor, "km"),
         dist_to_bor = drop_units(dist_to_bor),
         dist_to_can = st_distance(., wldboundaries_can_com),
         dist_to_can = set_units(dist_to_can, "km"),
         dist_to_can = drop_units(dist_to_can),
         dist_to_mex = st_distance(., wldboundaries_mex_com),
         dist_to_mex = set_units(dist_to_mex, "km"),
         dist_to_mex = drop_units(dist_to_mex))

Narrowing down certain cities for later

cities_big = cities %>% 
  group_by(state_name) %>% 
  slice_max(population, n=1) %>% 
  ungroup()

cities_160km = cities %>% 
  group_by(state_name) %>% 
  filter(dist_to_bor <= 160.934 | dist_to_can <= 160.934 | dist_to_mex <= 160.934) %>% 
  ungroup()

big_cities_160km = cities_160km %>% 
  group_by(state_name) %>% 
  slice_max(population, n=1) %>% 
  ungroup()

Distance to Borders

top5far_cites = cities %>% 
  slice_max(dist_to_bor, n=5) %>% 
  select(city, state_name, dist_to_bor) %>% 
  st_drop_geometry()

table1 = kable(top5far_cites,
             col.names = c("City", "State", "Distance from Border"),
             caption = "Top 5 Cities Furthest from any Border") %>% 
         kable_styling(bootstrap_options = "striped", full_width = F)

Distances to a State Border

top5state_cites = cities %>% 
  slice_max(dist_to_state, n=5) %>% 
  select(city, state_name, dist_to_state) %>% 
  st_drop_geometry()

table2 = kable(top5state_cites,
             col.names = c("City", "State", "Distance from a State Border"),
             caption = "Top 5 Cities Furthest from a State Border") %>% 
         kable_styling(bootstrap_options = "striped", full_width = F)

Distance to Mexico Border

top5mex_cites = cities %>% 
  slice_max(dist_to_mex, n=5) %>% 
  select(city, state_name, dist_to_mex) %>% 
  st_drop_geometry()

table3 = kable(top5mex_cites,
             col.names = c("City", "State", "Distance from Mexican Border"),
             caption = "Top 5 Cities Furthest from the Mexican Border") %>% 
         kable_styling(bootstrap_options = "striped", full_width = F)

Distance to Canada Border

top5can_cites = cities %>% 
  slice_max(dist_to_can, n=5) %>% 
  select(city, state_name, dist_to_can) %>% 
  st_drop_geometry()

table4 = kable(top5can_cites,
             col.names = c("City", "State", "Distance from Canadian Border"),
             caption = "Top 5 Cities Furthest from the Canadian Border") %>% 
         kable_styling(bootstrap_options = "striped", full_width = F)

All table results

table1
Top 5 Cities Furthest from any Border
City State Distance from Border
Minneapolis Kansas 1090.628
Barnard Kansas 1089.950
Ada Kansas 1089.876
Bennington Kansas 1083.878
Manchester Kansas 1083.346
table2
Top 5 Cities Furthest from a State Border
City State Distance from a State Border
Lampasas Texas 308.9216
Bertram Texas 302.8190
Kempner Texas 302.5912
Harker Heights Texas 298.8125
Florence Texas 298.6804
table3
Top 5 Cities Furthest from the Mexican Border
City State Distance from Mexican Border
Caribou Maine 3250.334
Presque Isle Maine 3234.570
Calais Maine 3134.348
Eastport Maine 3125.624
Old Town Maine 3048.366
table4
Top 5 Cities Furthest from the Canadian Border
City State Distance from Canadian Border
Guadalupe Guerra Texas 2206.455
Sandoval Texas 2205.641
Fronton Texas 2204.784
Fronton Ranchettes Texas 2202.118
Evergreen Texas 2202.020

Question 3

Plots and Maps

10 Largest Cities in the US

cities_top10 = cities %>% 
  slice_max(population, n=10)

ggplot()+
  geom_sf(data = cities_top10, color = "blue", size = 1.5) + 
  geom_sf(data = usboundaries_com) + 
  ggthemes::scale_colour_economist() + 
  ggrepel::geom_label_repel(
    data = cities_top10,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3) + 
  labs(title = "Top 10 Most Populous Cities in the Continuous US",
       caption = "Here we can see the top 10 cities in the US with the highest population counts. They are also labeled.")

City Distance from a Coastal Border

Rewrite borders data.frame to include sf for labels (i.e. don’t drop geometry):

top5far_map = cities %>% 
  slice_max(dist_to_bor, n=5)

I’m assuming this is meant to be distance to an oceanic border, if not simply change the dist_to_bor variable in the cities data.frame to usr the combination of world boundaries rather than the union.

ggplot() +
  geom_sf(data = cities, aes(col = dist_to_bor), size = .3) + 
  geom_sf(data = top5far_map, color = "red", size = 1) + 
  geom_sf(data = usboundaries_com) + 
  scale_color_gradient(low = "grey", high = "blue") + 
  ggrepel::geom_label_repel(
    data = top5far_map,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3) + 
  labs(title = "City Distances from any Border",
       caption = "Showing cities furthest from any border")

City distance from a State Border

Rewrite states data.frame to include sf (i.e. don’t drop geometry):

top5state_map = cities %>% 
  slice_max(dist_to_state, n=5)

City Map

ggplot() + 
  geom_sf(data = cities, aes(col = dist_to_state), size = .3) + 
  scale_color_gradient(low = "gray", high = "blue") +
  geom_sf(data = top5state_map, color = "red", size = 1) + 
  geom_sf(data = usboundaries_com) +
  ggthemes::theme_map() +
  ggrepel::geom_label_repel(
    data = top5state_map, 
    aes(label = city, geometry = geometry), 
    stat = "sf_coordinates", 
    size = 3) + 
  labs(title ="City Distances to the Nearest State Line",
       caption = "Notice they are all in the center of Texas, the largest state in the conterminous US.") +
  theme(legend.position = "right")

Each dot is representative of one city. Grey dots are closer to state lines while purple dots are further from state lines. Red dots are the cities that are the furthest from a state line. Green dots are the cities with the largest population in each state and the more cyan these major cities are, the further they are from a state border.

Equidistant boundary from Mexico to Canada

Here we have to mutate the distances using an absolute value (you’ll see why in the code). We have to find all the cities that are equal distant from both the Canadian and Mexican border. This should highlight all the cities in the center of the US.

km_100eqd = cities %>% 
  mutate(km100 = abs(dist_to_can - dist_to_mex))

big_cities_100km = km_100eqd %>%
  filter(km100 <= 100) %>% 
  slice_max(population, n=5)

ggplot() + 
  geom_sf(data = km_100eqd, aes(col = km100), size = .3) +
  scale_color_gradient(low = "blue", high = "red") +
  geom_sf(data = usboundaries_com) +
  gghighlight(km100 <= 100) +
  labs(title ="Cities with in the 100 km Center",
       caption = "Cities highlighted are with in 100 km of the center of both land borders. The 5 most populous cities are labeled.") + 
  theme_map() +
  ggrepel::geom_label_repel(
    data = big_cities_100km, 
    aes(label = city, geometry = geometry), 
    stat = "sf_coordinates", 
    size = 3)

The more blue the cities are, the closer they are to the center of both the Canadian and Mexican borders.

Question 4

How many cities are in this 100 mile zone? (100 miles ~ 160 kilometers)

There are 10,163 cities in the 100 mile danger zone.

How many people live in a city within 100 miles of the border?

popcount = cities_160km %>%
  st_drop_geometry %>% 
  summarise(population) %>% 
  sum()

popcount
## [1] 227369789

There are 227,369,789 people living in the 100 mile danger zone.

What percentage of the total population is in this zone?

There are an estimated 328.2 million people in the USA as of 2019.

percent = (popcount/328200000)*100

percent
## [1] 69.27782

About 69% of the population lives within the 100 mile danger zone (2/3 is 67%)