if(!require("tidyverse")) install.packages("tidyverse")
if(!require("dplyr")) install.packages("dplyr")
library(tidyverse)
library(dplyr)
Mini Project 1
Introduction
This project for STA 9750 has us looking at Federal Transit Administration Data, specifically for 2022.
We will try to answer some questions about transit from that year in this file.
Import Libraries
Import Data
# I had to rename several of the files on import,
# When re-running my code, please be mindful of that
# Let's start with Fare Revenue
if(!file.exists("2022_fare_revenue.xlsx")){
# This should work _in theory_ but in practice it's still a bit finicky
# If it doesn't work for you, download this file 'by hand' in your
# browser and save it as "2022_fare_revenue.xlsx" in your project
# directory.
download.file("http://www.transit.dot.gov/sites/fta.dot.gov/files/2024-04/2022%20Fare%20Revenue.xlsx",
destfile="2022_fare_revenue.xlsx",
quiet=FALSE,
method="wget")
}<- readxl::read_xlsx("2022_fare_revenue.xlsx") |>
FARES select(-`State/Parent NTD ID`,
-`Reporter Type`,
-`Reporting Module`,
-`TOS`,
-`Passenger Paid Fares`,
-`Organization Paid Fares`) |>
filter(`Expense Type` == "Funds Earned During Period") |>
select(-`Expense Type`)
if(!file.exists("2022_expenses.csv")){
# This should work _in theory_ but in practice it's still a bit finicky
# If it doesn't work for you, download this file 'by hand' in your
# browser and save it as "2022_expenses.csv" in your project
# directory.
download.file("https://data.transportation.gov/api/views/dkxx-zjd6/rows.csv?date=20231102&accessType=DOWNLOAD&bom=true&format=true",
destfile="2022_expenses.csv",
quiet=FALSE,
method="wget")
}
<- readr::read_csv("2022_expenses.csv") |>
EXPENSES select(`NTD ID`,
`Agency`,
`Total`,
`Mode`) |>
mutate(`NTD ID` = as.integer(`NTD ID`)) |>
rename(Expenses = Total) |>
group_by(`NTD ID`, `Mode`) |>
summarize(Expenses = sum(Expenses)) |>
ungroup()
<- inner_join(FARES, EXPENSES, join_by(`NTD ID`, `Mode`))
FINANCIALS
if(!file.exists("ridership.xlsx")){
# This should work _in theory_ but in practice it's still a bit finicky
# If it doesn't work for you, download this file 'by hand' in your
# browser and save it as "ridership.xlsx" in your project
# directory.
download.file("https://www.transit.dot.gov/sites/fta.dot.gov/files/2024-09/July%202024%20Complete%20Monthly%20Ridership%20%28with%20adjustments%20and%20estimates%29_240903.xlsx",
destfile="ridership.xlsx",
quiet=FALSE,
method="wget")
}
<- readxl::read_xlsx("ridership.xlsx", sheet="UPT") |>
TRIPS filter(`Mode/Type of Service Status` == "Active") |>
select(-`Legacy NTD ID`,
-`Reporter Type`,
-`Mode/Type of Service Status`,
-`UACE CD`,
-`TOS`) |>
pivot_longer(-c(`NTD ID`:`3 Mode`),
names_to="month",
values_to="UPT") |>
drop_na() |>
mutate(month=my(month)) # Parse _m_onth _y_ear date specs
<- readxl::read_xlsx("ridership.xlsx", sheet="VRM") |>
MILES filter(`Mode/Type of Service Status` == "Active") |>
select(-`Legacy NTD ID`,
-`Reporter Type`,
-`Mode/Type of Service Status`,
-`UACE CD`,
-`TOS`) |>
pivot_longer(-c(`NTD ID`:`3 Mode`),
names_to="month",
values_to="VRM") |>
drop_na() |>
group_by(`NTD ID`, `Agency`, `UZA Name`,
`Mode`, `3 Mode`, month) |>
summarize(VRM = sum(VRM)) |>
ungroup() |>
mutate(month=my(month)) # Parse _m_onth _y_ear date specs
<- inner_join(TRIPS, MILES) |>
USAGE mutate(`NTD ID` = as.integer(`NTD ID`))
if(!require("DT")) install.packages("DT")
library(DT)
sample_n(USAGE, 1000) |>
mutate(month=as.character(month)) |>
::datatable() DT
Task 1: Rename column UZA Name to metro_area
colnames(USAGE)
[1] "NTD ID" "Agency" "UZA Name" "Mode" "3 Mode" "month" "UPT"
[8] "VRM"
<- USAGE |> rename("metro_area" = "UZA Name")
USAGE
colnames(USAGE)
[1] "NTD ID" "Agency" "metro_area" "Mode" "3 Mode"
[6] "month" "UPT" "VRM"
Task 2: Find unique Modes, impute with a more understandable value using case-when
head(USAGE)
# A tibble: 6 × 8
`NTD ID` Agency metro_area Mode `3 Mode` month UPT VRM
<int> <chr> <chr> <chr> <chr> <date> <dbl> <dbl>
1 1 King County Seattle--Tacoma,… DR Bus 2002-01-01 135144 746158
2 1 King County Seattle--Tacoma,… DR Bus 2002-02-01 127378 656324
3 1 King County Seattle--Tacoma,… DR Bus 2002-03-01 136030 726578
4 1 King County Seattle--Tacoma,… DR Bus 2002-04-01 142204 736975
5 1 King County Seattle--Tacoma,… DR Bus 2002-05-01 144697 746158
6 1 King County Seattle--Tacoma,… DR Bus 2002-06-01 131833 696633
|>
USAGE distinct(Mode) |>
arrange(Mode)
# A tibble: 18 × 1
Mode
<chr>
1 AR
2 CB
3 CC
4 CR
5 DR
6 FB
7 HR
8 IP
9 LR
10 MB
11 MG
12 PB
13 RB
14 SR
15 TB
16 TR
17 VP
18 YR
<- USAGE |>
USAGE mutate(Mode=case_when(
== "AR" ~ "Alaska Railroad",
Mode == "CB" ~ "Commuter Bus",
Mode == "CC" ~ "Cable Car",
Mode == "CR" ~ "Commuter Rail",
Mode == "DR" ~ "Demand Response",
Mode == "FB" ~ "Ferryboat",
Mode == "HR" ~ "Heavy Rail",
Mode == "IP" ~ "Inclined Plane",
Mode == "LR" ~ "Light Rail",
Mode == "MB" ~ "Bus",
Mode == "MG" ~ "Monorail and Automated Guideway modes",
Mode == "PB" ~ "Publico",
Mode == "RB" ~ "Bus Rapid Transit",
Mode == "SR" ~ "Streetcar Rail",
Mode == "TB" ~ "Trolleybus",
Mode == "TR" ~ "Aerial Tramways",
Mode == "VP" ~ "Vanpool",
Mode == "YR" ~ "Hybrid Rail",
Mode TRUE ~ "Unknown"))
head(USAGE)
# A tibble: 6 × 8
`NTD ID` Agency metro_area Mode `3 Mode` month UPT VRM
<int> <chr> <chr> <chr> <chr> <date> <dbl> <dbl>
1 1 King County Seattle--Tacoma,… Dema… Bus 2002-01-01 135144 746158
2 1 King County Seattle--Tacoma,… Dema… Bus 2002-02-01 127378 656324
3 1 King County Seattle--Tacoma,… Dema… Bus 2002-03-01 136030 726578
4 1 King County Seattle--Tacoma,… Dema… Bus 2002-04-01 142204 736975
5 1 King County Seattle--Tacoma,… Dema… Bus 2002-05-01 144697 746158
6 1 King County Seattle--Tacoma,… Dema… Bus 2002-06-01 131833 696633
|>
USAGE distinct(Mode) |>
arrange(Mode)
# A tibble: 18 × 1
Mode
<chr>
1 Aerial Tramways
2 Alaska Railroad
3 Bus
4 Bus Rapid Transit
5 Cable Car
6 Commuter Bus
7 Commuter Rail
8 Demand Response
9 Ferryboat
10 Heavy Rail
11 Hybrid Rail
12 Inclined Plane
13 Light Rail
14 Monorail and Automated Guideway modes
15 Publico
16 Streetcar Rail
17 Trolleybus
18 Vanpool
<- USAGE |> rename( "unlinked_passenger_trips" = "UPT",
USAGE "vehicle_revenue_miles" = "VRM") |>
select( -`NTD ID`,
-`3 Mode`)
sample_n(USAGE, 1000) |>
mutate(month=as.character(month)) |>
::datatable() DT
Task 3: Answer specific questions
head(USAGE)
# A tibble: 6 × 6
Agency metro_area Mode month unlinked_passenger_t…¹
<chr> <chr> <chr> <date> <dbl>
1 King County Seattle--Tacoma, WA Demand Resp… 2002-01-01 135144
2 King County Seattle--Tacoma, WA Demand Resp… 2002-02-01 127378
3 King County Seattle--Tacoma, WA Demand Resp… 2002-03-01 136030
4 King County Seattle--Tacoma, WA Demand Resp… 2002-04-01 142204
5 King County Seattle--Tacoma, WA Demand Resp… 2002-05-01 144697
6 King County Seattle--Tacoma, WA Demand Resp… 2002-06-01 131833
# ℹ abbreviated name: ¹unlinked_passenger_trips
# ℹ 1 more variable: vehicle_revenue_miles <dbl>
3.1: Which transit agency had the most total VRM in this sample?
# A grouped summarize that takes the first returned item in descending order should get that
|>
USAGE group_by(Agency) |>
summarize(n_vrm = n()) |>
arrange(desc(n_vrm)) |>
slice_head(n=1)
# A tibble: 1 × 2
Agency n_vrm
<chr> <int>
1 New Jersey Transit Corporation 2048
Answer: New Jersey Transit Corporation with 2,048
3.2: Which transit mode had the most total VRM in this sample?
# Same code as 3.1 but replacing Agency with Mode
|>
USAGE group_by(Mode) |>
summarize(n_vrm = n()) |>
arrange(desc(n_vrm)) |>
slice_head(n=1)
# A tibble: 1 × 2
Mode n_vrm
<chr> <int>
1 Demand Response 115701
Answer: Demand Response with 115,701
3.3: How many trips were taken on the NYC Subway (Heavy Rail) in May 2024?
#install.packages("stringr")
library(stringr)
# First checking to ensure that May 2024 is only denoted by 2024-05-01
|>
USAGE filter(str_detect(month, "2024-05-")) |>
distinct(month) #checks out
# A tibble: 1 × 1
month
<date>
1 2024-05-01
# Ensuring I get the right Agency name
|>
USAGE filter(str_detect(Agency,"MTA")) |>
distinct(Agency)
# A tibble: 4 × 1
Agency
<chr>
1 MTA New York City Transit
2 Metro-North Commuter Railroad Company, dba: MTA Metro-North Railroad
3 MTA Long Island Rail Road
4 MTA Bus Company
# Unlinked Passenger Trips (UPT) are defined as:
# The number of passengers who board public transportation vehicles.
# Passengers are counted each time they board vehicles
# no matter how many vehicles they use to travel from their origin
# to their destination.
|>
USAGE filter(month == '2024-05-01',
== "MTA New York City Transit",
Agency == "Heavy Rail") |>
Mode select(unlinked_passenger_trips)
# A tibble: 1 × 1
unlinked_passenger_trips
<dbl>
1 180458819
Answer: 180,458,819 trips
3.4: Which Mode of transport had the longest average trip in May 2024?
|>
USAGE filter(month == '2024-05-01') |>
group_by(Mode) |>
summarize(trips_avg = mean(unlinked_passenger_trips)) |>
arrange(desc(trips_avg)) |>
slice_head(n=1)
# A tibble: 1 × 2
Mode trips_avg
<chr> <dbl>
1 Heavy Rail 14836486.
Answer: Heavy Rail, with 14,836,486 average trips (I did this question before 3.4 was marked as unneccesary due to not having the correct feature to run this analysis, but I’ve left it in regardless)
3.5: How much did NYC Subway ridership fall between April 2019 and April 2020?
|>
USAGE filter(Agency == "MTA New York City Transit",
== "Heavy Rail",
Mode >= '2019-04-01',
month < '2020-05-01') |>
month select(unlinked_passenger_trips)
# A tibble: 13 × 1
unlinked_passenger_trips
<dbl>
1 232223929
2 235967209
3 224274463
4 229774505
5 229171856
6 230694038
7 253609943
8 235137305
9 236357677
10 231863427
11 227432375
12 119654860
13 20254269
# Let's plot the ridership numbers
if(!require("ggplot2")) install.packages("ggplot2")
if(!require("lubridate")) install.packages("lubridate")
library(ggplot2)
library(lubridate)
<- seq(ymd("2019-04-01"), by = "month", length.out = 13)
dates <- c(USAGE |>
ridership filter(month >= '2019-04-01',
< '2020-05-01',
month == 'MTA New York City Transit',
Agency == 'Heavy Rail'))
Mode
# Create a data frame
<- data.frame(Date = dates, Ridership = ridership)
data
if(!require("scales")) install.packages("scales")
Loading required package: scales
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
library(scales)
# Plot ridership over time
ggplot(data, aes(x = Date, y = Ridership.unlinked_passenger_trips)) + # Change x and y accordingly
geom_line() +
geom_point() +
labs(title = "NYC Subway Ridership from April 2019 through April 2020", x = "Date", y = "Total Trips") +
scale_y_continuous(labels = label_number()) + # Prevent scientific notation
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"), # Center the title and make it bold
axis.title.x = element_text(size = 14), # Change x-axis title size
axis.title.y = element_text(size = 14), # Change y-axis title size
axis.text = element_text(size = 12), # Change axis text size
panel.grid.minor = element_blank() # Remove minor grid lines
)
Task 4: Find 3 more interesting facts in this data
head(USAGE, n=7)
# A tibble: 7 × 6
Agency metro_area Mode month unlinked_passenger_t…¹
<chr> <chr> <chr> <date> <dbl>
1 King County Seattle--Tacoma, WA Demand Resp… 2002-01-01 135144
2 King County Seattle--Tacoma, WA Demand Resp… 2002-02-01 127378
3 King County Seattle--Tacoma, WA Demand Resp… 2002-03-01 136030
4 King County Seattle--Tacoma, WA Demand Resp… 2002-04-01 142204
5 King County Seattle--Tacoma, WA Demand Resp… 2002-05-01 144697
6 King County Seattle--Tacoma, WA Demand Resp… 2002-06-01 131833
7 King County Seattle--Tacoma, WA Demand Resp… 2002-07-01 137547
# ℹ abbreviated name: ¹unlinked_passenger_trips
# ℹ 1 more variable: vehicle_revenue_miles <dbl>
4.1: What’s the most miles traveled by agency + mode and how does it compare to the number of trips?
|>
USAGE group_by(Agency, Mode) |>
summarize(total_travel = sum(vehicle_revenue_miles, na.rm = TRUE), .groups="drop") |>
arrange(desc(total_travel)) |>
slice_head(n=3)
# A tibble: 3 × 3
Agency Mode total_travel
<chr> <chr> <dbl>
1 MTA New York City Transit Heavy R… 7732916753
2 New Jersey Transit Corporation Bus 3781858802
3 Los Angeles County Metropolitan Transportation Authority Bus 3501202902
NYC Subway leads the way in revenue miles, followed by NJT Bus and LA Bus
|>
USAGE group_by(Agency, Mode) |>
summarize(total_travel = sum(unlinked_passenger_trips, na.rm = TRUE), .groups="drop") |>
arrange(desc(total_travel)) |>
slice_head(n=3)
# A tibble: 3 × 3
Agency Mode total_travel
<chr> <chr> <dbl>
1 MTA New York City Transit Heavy R… 51672094135
2 MTA New York City Transit Bus 16889723939
3 Los Angeles County Metropolitan Transportation Authority Bus 7114375207
NYC Subway leads the way here as well, while NJT Bus is overtaken by NYC’s bus system. The distance between the MTA and the next few is quite large.
4.2 Which Agency manages the most Modes?
|>
USAGE group_by(Agency) |>
summarize(total_modes = n_distinct(Mode)) |>
arrange(desc(total_modes)) # Massachusetts Bay Transportation Authority
# A tibble: 677 × 2
Agency total_modes
<chr> <int>
1 Massachusetts Bay Transportation Authority 7
2 Central Oklahoma Transportation and Parking Authority 6
3 City and County of San Francisco 6
4 City of Charlotte North Carolina 6
5 County of Miami-Dade 6
6 Dallas Area Rapid Transit 6
7 King County 6
8 Los Angeles County Metropolitan Transportation Authority 6
9 Maryland Transit Administration 6
10 Metropolitan Transit Authority of Harris County, Texas 6
# ℹ 667 more rows
4.3 Which date saw the most usage for the NYC Subway?
|>
USAGE filter(Agency == 'MTA New York City Transit',
== 'Heavy Rail') |>
Mode group_by(month) |>
summarize(most_trips = sum(unlinked_passenger_trips, na.rm = TRUE)) |>
arrange(desc(most_trips)) |>
slice_head(n=1) # 2019-10-01
# A tibble: 1 × 2
month most_trips
<date> <dbl>
1 2019-10-01 253609943
# conversely, which had the fewest?
|>
USAGE filter(Agency == 'MTA New York City Transit',
== 'Heavy Rail') |>
Mode group_by(month) |>
summarize(most_trips = sum(unlinked_passenger_trips, na.rm = TRUE)) |>
arrange((most_trips)) |>
slice_head(n=1) # 2020-04-01
# A tibble: 1 × 2
month most_trips
<date> <dbl>
1 2020-04-01 20254269
2019-10-01 had the most while 2020-04-01 had the fewest
Task 5: Create a new table with annual total (sum) for UPT and VRM for 2022
<- inner_join(TRIPS, MILES) |>
USAGE mutate(`NTD ID` = as.integer(`NTD ID`))
Joining with `by = join_by(`NTD ID`, Agency, `UZA Name`, Mode, `3 Mode`,
month)`
<- USAGE |>
USAGE mutate(Mode=case_when(
== "AR" ~ "Alaska Railroad",
Mode == "CB" ~ "Commuter Bus",
Mode == "CC" ~ "Cable Car",
Mode == "CR" ~ "Commuter Rail",
Mode == "DR" ~ "Demand Response",
Mode == "FB" ~ "Ferryboat",
Mode == "HR" ~ "Heavy Rail",
Mode == "IP" ~ "Inclined Plane",
Mode == "LR" ~ "Light Rail",
Mode == "MB" ~ "Bus",
Mode == "MG" ~ "Monorail and Automated Guideway modes",
Mode == "PB" ~ "Publico",
Mode == "RB" ~ "Bus Rapid Transit",
Mode == "SR" ~ "Streetcar Rail",
Mode == "TB" ~ "Trolleybus",
Mode == "TR" ~ "Aerial Tramways",
Mode == "VP" ~ "Vanpool",
Mode == "YR" ~ "Hybrid Rail",
Mode TRUE ~ "Unknown"))
<- USAGE |>
USAGE_2022_ANNUAL select(-`3 Mode`) |>
rename("metro_area" = "UZA Name") |>
filter(year(month)=='2022') |>
group_by(`NTD ID`, `Agency`,`metro_area`,`Mode`) |>
summarize(UPT = sum(UPT),
VRM = sum(VRM)) |>
ungroup()
`summarise()` has grouped output by 'NTD ID', 'Agency', 'metro_area'. You can
override using the `.groups` argument.
head(USAGE_2022_ANNUAL, n=2)
# A tibble: 2 × 6
`NTD ID` Agency metro_area Mode UPT VRM
<int> <chr> <chr> <chr> <dbl> <dbl>
1 1 King County Seattle--Tacoma, WA Bus 53983641 61632644
2 1 King County Seattle--Tacoma, WA Demand Response 663009 12860448
<- FINANCIALS |>
FINANCIALS mutate(Mode=case_when(
== "AR" ~ "Alaska Railroad",
Mode == "CB" ~ "Commuter Bus",
Mode == "CC" ~ "Cable Car",
Mode == "CR" ~ "Commuter Rail",
Mode == "DR" ~ "Demand Response",
Mode == "FB" ~ "Ferryboat",
Mode == "HR" ~ "Heavy Rail",
Mode == "IP" ~ "Inclined Plane",
Mode == "LR" ~ "Light Rail",
Mode == "MB" ~ "Bus",
Mode == "MG" ~ "Monorail and Automated Guideway modes",
Mode == "PB" ~ "Publico",
Mode == "RB" ~ "Bus Rapid Transit",
Mode == "SR" ~ "Streetcar Rail",
Mode == "TB" ~ "Trolleybus",
Mode == "TR" ~ "Aerial Tramways",
Mode == "VP" ~ "Vanpool",
Mode == "YR" ~ "Hybrid Rail",
Mode TRUE ~ "Unknown"))
<- left_join(USAGE_2022_ANNUAL,
USAGE_AND_FINANCIALS
FINANCIALS,join_by(`NTD ID`, `Mode`)) |>
drop_na()
head(USAGE_AND_FINANCIALS,n=7)
# A tibble: 7 × 9
`NTD ID` Agency metro_area Mode UPT VRM `Agency Name` `Total Fares`
<dbl> <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl>
1 1 King Coun… Seattle--… Bus 5.40e7 6.16e7 King County … 56566150
2 1 King Coun… Seattle--… Bus 5.40e7 6.16e7 King County … 280187
3 1 King Coun… Seattle--… Dema… 6.63e5 1.29e7 King County … 740726
4 1 King Coun… Seattle--… Dema… 6.63e5 1.29e7 King County … 91601
5 1 King Coun… Seattle--… Ferr… 4.00e5 5.12e4 King County … 1715265
6 1 King Coun… Seattle--… Stre… 1.12e6 1.80e5 King County … 588495
7 1 King Coun… Seattle--… Trol… 9.58e6 2.64e6 King County … 10123486
# ℹ 1 more variable: Expenses <dbl>
Task 6: Answer the following 6 questions using USAGE_AND_FINANCIALS
6.1: Which transit system (agency and mode) had the most UPT in 2022?
|>
USAGE_AND_FINANCIALS group_by(`Agency`, `Mode`) |>
summarize(Total_UPT = sum(UPT, na.rm=TRUE), .groups="drop") |>
arrange(desc(Total_UPT)) |>
slice_head(n=1)
# A tibble: 1 × 3
Agency Mode Total_UPT
<chr> <chr> <dbl>
1 MTA New York City Transit Heavy Rail 1793073801
MTA New York City Transit – Heavy Rail
6.2: Which transit system (agency and mode) had the highest farebox recovery, defined as the highest ratio of Total Fares to Expenses?
|>
USAGE_AND_FINANCIALS group_by(`Agency`,`Mode`) |>
summarize(fbx_rcv = sum(`Total Fares`, na.rm = TRUE) / sum(`Expenses`, na.rm = TRUE), .groups="drop") |>
arrange(desc(fbx_rcv)) |>
slice_head(n=1)
# A tibble: 1 × 3
Agency Mode fbx_rcv
<chr> <chr> <dbl>
1 Transit Authority of Central Kentucky Vanpool 2.38
Transit Authority of Central Kentucky – Vanpool
6.3: Which transit system (agency and mode) had the lowest expenses per UPT?
|>
USAGE_AND_FINANCIALS filter(UPT >= 1) |>
group_by(`Agency`,`Mode`) |>
summarize(lwst_exp_upt = sum(`Expenses`, na.rm = TRUE) / sum(`UPT`, na.rm = TRUE), .groups="drop") |>
arrange(lwst_exp_upt) |>
slice_head(n=1)
# A tibble: 1 × 3
Agency Mode lwst_exp_upt
<chr> <chr> <dbl>
1 North Carolina State University Bus 1.18
North Caroline State University – Bus
6.4: Which transit system (agency and mode) had the highest total fares per UPT?
|>
USAGE_AND_FINANCIALS filter(`Total Fares` >= 1) |>
group_by(`Agency`,`Mode`) |>
summarize(hghst_fr_upt = sum(`UPT`, na.rm = TRUE) / sum(`Total Fares`, na.rm = TRUE), .groups="drop") |>
# summarize(hghst_fr_upt = sum(`UPT`, na.rm = TRUE) / sum(`Total Fares`, na.rm = TRUE), .groups="drop") |>
arrange(desc(hghst_fr_upt)) |>
slice_head(n=1)
# A tibble: 1 × 3
Agency Mode hghst_fr_upt
<chr> <chr> <dbl>
1 Athens-Clarke County Unified Government Bus 522940
Athens-Clarke County Unified Government – Bus
6.5: Which transit system (agency and mode) had the lowest expenses per VRM?
|>
USAGE_AND_FINANCIALS filter(UPT >= 1) |>
group_by(`Agency`,`Mode`) |>
summarize(lwst_exp_vrm = sum(`Expenses`, na.rm = TRUE) / sum(`VRM`, na.rm = TRUE), .groups="drop") |>
arrange(lwst_exp_vrm) |>
slice_head(n=1)
# A tibble: 1 × 3
Agency Mode lwst_exp_vrm
<chr> <chr> <dbl>
1 New Mexico Department of Transportation Vanpool 0.337
New Mexico Department of Transportation – Vanpool
6.6: Which transit system (agency and mode) had the highest total fares per VRM?
|>
USAGE_AND_FINANCIALS filter(`Total Fares` >= 1) |>
group_by(`Agency`,`Mode`) |>
summarize(hghst_fr_vrm = sum(`VRM`, na.rm = TRUE) / sum(`Total Fares`, na.rm = TRUE), .groups="drop") |>
arrange(desc(hghst_fr_vrm)) |>
slice_head(n=1)
# A tibble: 1 × 3
Agency Mode hghst_fr_vrm
<chr> <chr> <dbl>
1 Athens-Clarke County Unified Government Bus 380687
Athens-Clarke County Unified Government – Bus