Mini Project 1

Introduction

This project for STA 9750 has us looking at Federal Transit Administration Data, specifically for 2022.

We will try to answer some questions about transit from that year in this file.

Import Libraries

if(!require("tidyverse")) install.packages("tidyverse")
if(!require("dplyr")) install.packages("dplyr")
library(tidyverse)
library(dplyr)

Import Data

# I had to rename several of the files on import,
# When re-running my code, please be mindful of that


# Let's start with Fare Revenue
if(!file.exists("2022_fare_revenue.xlsx")){
    # This should work _in theory_ but in practice it's still a bit finicky
    # If it doesn't work for you, download this file 'by hand' in your
    # browser and save it as "2022_fare_revenue.xlsx" in your project
    # directory.
    download.file("http://www.transit.dot.gov/sites/fta.dot.gov/files/2024-04/2022%20Fare%20Revenue.xlsx", 
                  destfile="2022_fare_revenue.xlsx", 
                  quiet=FALSE, 
                  method="wget")
}
FARES <- readxl::read_xlsx("2022_fare_revenue.xlsx") |>
  select(-`State/Parent NTD ID`, 
         -`Reporter Type`,
         -`Reporting Module`,
         -`TOS`,
         -`Passenger Paid Fares`,
         -`Organization Paid Fares`) |>
  filter(`Expense Type` == "Funds Earned During Period") |>
  select(-`Expense Type`)

if(!file.exists("2022_expenses.csv")){
    # This should work _in theory_ but in practice it's still a bit finicky
    # If it doesn't work for you, download this file 'by hand' in your
    # browser and save it as "2022_expenses.csv" in your project
    # directory.
    download.file("https://data.transportation.gov/api/views/dkxx-zjd6/rows.csv?date=20231102&accessType=DOWNLOAD&bom=true&format=true", 
                  destfile="2022_expenses.csv", 
                  quiet=FALSE, 
                  method="wget")
}

EXPENSES <- readr::read_csv("2022_expenses.csv") |>
  select(`NTD ID`, 
         `Agency`,
         `Total`, 
         `Mode`) |>
  mutate(`NTD ID` = as.integer(`NTD ID`)) |>
  rename(Expenses = Total) |>
  group_by(`NTD ID`, `Mode`) |>
  summarize(Expenses = sum(Expenses)) |>
  ungroup()

FINANCIALS <- inner_join(FARES, EXPENSES, join_by(`NTD ID`, `Mode`))

if(!file.exists("ridership.xlsx")){
    # This should work _in theory_ but in practice it's still a bit finicky
    # If it doesn't work for you, download this file 'by hand' in your
    # browser and save it as "ridership.xlsx" in your project
    # directory.
    download.file("https://www.transit.dot.gov/sites/fta.dot.gov/files/2024-09/July%202024%20Complete%20Monthly%20Ridership%20%28with%20adjustments%20and%20estimates%29_240903.xlsx", 
                  destfile="ridership.xlsx", 
                  quiet=FALSE, 
                  method="wget")
}

TRIPS <- readxl::read_xlsx("ridership.xlsx", sheet="UPT") |>
  filter(`Mode/Type of Service Status` == "Active") |>
  select(-`Legacy NTD ID`, 
         -`Reporter Type`, 
         -`Mode/Type of Service Status`, 
         -`UACE CD`, 
         -`TOS`) |>
  pivot_longer(-c(`NTD ID`:`3 Mode`), 
               names_to="month", 
               values_to="UPT") |>
  drop_na() |>
  mutate(month=my(month)) # Parse _m_onth _y_ear date specs

MILES <- readxl::read_xlsx("ridership.xlsx", sheet="VRM") |>
  filter(`Mode/Type of Service Status` == "Active") |>
  select(-`Legacy NTD ID`, 
         -`Reporter Type`, 
         -`Mode/Type of Service Status`, 
         -`UACE CD`, 
         -`TOS`) |>
  pivot_longer(-c(`NTD ID`:`3 Mode`), 
               names_to="month", 
               values_to="VRM") |>
  drop_na() |>
  group_by(`NTD ID`, `Agency`, `UZA Name`, 
           `Mode`, `3 Mode`, month) |>
  summarize(VRM = sum(VRM)) |>
  ungroup() |>
  mutate(month=my(month)) # Parse _m_onth _y_ear date specs

USAGE <- inner_join(TRIPS, MILES) |>
  mutate(`NTD ID` = as.integer(`NTD ID`))

if(!require("DT")) install.packages("DT")
library(DT)
sample_n(USAGE, 1000) |> 
  mutate(month=as.character(month)) |> 
  DT::datatable()

Task 1: Rename column UZA Name to metro_area

colnames(USAGE)

[1] "NTD ID"   "Agency"   "UZA Name" "Mode"     "3 Mode"   "month"    "UPT"     
[8] "VRM"

USAGE <- USAGE |> rename("metro_area" = "UZA Name")

colnames(USAGE)

[1] "NTD ID"     "Agency"     "metro_area" "Mode"       "3 Mode"    
[6] "month"      "UPT"        "VRM"

Task 2: Find unique Modes, impute with a more understandable value using case-when

head(USAGE)

# A tibble: 6 × 8
  `NTD ID` Agency      metro_area        Mode  `3 Mode` month         UPT    VRM
     <int> <chr>       <chr>             <chr> <chr>    <date>      <dbl>  <dbl>
1        1 King County Seattle--Tacoma,… DR    Bus      2002-01-01 135144 746158
2        1 King County Seattle--Tacoma,… DR    Bus      2002-02-01 127378 656324
3        1 King County Seattle--Tacoma,… DR    Bus      2002-03-01 136030 726578
4        1 King County Seattle--Tacoma,… DR    Bus      2002-04-01 142204 736975
5        1 King County Seattle--Tacoma,… DR    Bus      2002-05-01 144697 746158
6        1 King County Seattle--Tacoma,… DR    Bus      2002-06-01 131833 696633

USAGE |>
  distinct(Mode) |>
  arrange(Mode)

# A tibble: 18 × 1
   Mode 
   <chr>
 1 AR   
 2 CB   
 3 CC   
 4 CR   
 5 DR   
 6 FB   
 7 HR   
 8 IP   
 9 LR   
10 MB   
11 MG   
12 PB   
13 RB   
14 SR   
15 TB   
16 TR   
17 VP   
18 YR

USAGE <- USAGE |>
  mutate(Mode=case_when(
    Mode == "AR" ~ "Alaska Railroad",
    Mode == "CB" ~ "Commuter Bus",
    Mode == "CC" ~ "Cable Car",
    Mode == "CR" ~ "Commuter Rail",
    Mode == "DR" ~ "Demand Response",
    Mode == "FB" ~ "Ferryboat",
    Mode == "HR" ~ "Heavy Rail",
    Mode == "IP" ~ "Inclined Plane",
    Mode == "LR" ~ "Light Rail",
    Mode == "MB" ~ "Bus",
    Mode == "MG" ~ "Monorail and Automated Guideway modes",
    Mode == "PB" ~ "Publico",
    Mode == "RB" ~ "Bus Rapid Transit",
    Mode == "SR" ~ "Streetcar Rail",
    Mode == "TB" ~ "Trolleybus",
    Mode == "TR" ~ "Aerial Tramways",
    Mode == "VP" ~ "Vanpool",
    Mode == "YR" ~ "Hybrid Rail",
    TRUE ~ "Unknown"))

head(USAGE)

# A tibble: 6 × 8
  `NTD ID` Agency      metro_area        Mode  `3 Mode` month         UPT    VRM
     <int> <chr>       <chr>             <chr> <chr>    <date>      <dbl>  <dbl>
1        1 King County Seattle--Tacoma,… Dema… Bus      2002-01-01 135144 746158
2        1 King County Seattle--Tacoma,… Dema… Bus      2002-02-01 127378 656324
3        1 King County Seattle--Tacoma,… Dema… Bus      2002-03-01 136030 726578
4        1 King County Seattle--Tacoma,… Dema… Bus      2002-04-01 142204 736975
5        1 King County Seattle--Tacoma,… Dema… Bus      2002-05-01 144697 746158
6        1 King County Seattle--Tacoma,… Dema… Bus      2002-06-01 131833 696633

USAGE |>
  distinct(Mode) |>
  arrange(Mode)

# A tibble: 18 × 1
   Mode                                 
   <chr>                                
 1 Aerial Tramways                      
 2 Alaska Railroad                      
 3 Bus                                  
 4 Bus Rapid Transit                    
 5 Cable Car                            
 6 Commuter Bus                         
 7 Commuter Rail                        
 8 Demand Response                      
 9 Ferryboat                            
10 Heavy Rail                           
11 Hybrid Rail                          
12 Inclined Plane                       
13 Light Rail                           
14 Monorail and Automated Guideway modes
15 Publico                              
16 Streetcar Rail                       
17 Trolleybus                           
18 Vanpool

USAGE <- USAGE |>     rename( "unlinked_passenger_trips" = "UPT",
                              "vehicle_revenue_miles" = "VRM") |>
  select( -`NTD ID`,
          -`3 Mode`)

sample_n(USAGE, 1000) |> 
    mutate(month=as.character(month)) |> 
    DT::datatable()

Task 3: Answer specific questions

head(USAGE)

# A tibble: 6 × 6
  Agency      metro_area          Mode         month      unlinked_passenger_t…¹
  <chr>       <chr>               <chr>        <date>                      <dbl>
1 King County Seattle--Tacoma, WA Demand Resp… 2002-01-01                 135144
2 King County Seattle--Tacoma, WA Demand Resp… 2002-02-01                 127378
3 King County Seattle--Tacoma, WA Demand Resp… 2002-03-01                 136030
4 King County Seattle--Tacoma, WA Demand Resp… 2002-04-01                 142204
5 King County Seattle--Tacoma, WA Demand Resp… 2002-05-01                 144697
6 King County Seattle--Tacoma, WA Demand Resp… 2002-06-01                 131833
# ℹ abbreviated name: ¹unlinked_passenger_trips
# ℹ 1 more variable: vehicle_revenue_miles <dbl>

3.1: Which transit agency had the most total VRM in this sample?

# A grouped summarize that takes the first returned item in descending order should get that
USAGE |>
  group_by(Agency) |>
  summarize(n_vrm = n()) |>
  arrange(desc(n_vrm)) |>
  slice_head(n=1)

# A tibble: 1 × 2
  Agency                         n_vrm
  <chr>                          <int>
1 New Jersey Transit Corporation  2048

Answer: New Jersey Transit Corporation with 2,048

3.2: Which transit mode had the most total VRM in this sample?

# Same code as 3.1 but replacing Agency with Mode
USAGE |>
  group_by(Mode) |>
  summarize(n_vrm = n()) |>
  arrange(desc(n_vrm)) |>
  slice_head(n=1)

# A tibble: 1 × 2
  Mode             n_vrm
  <chr>            <int>
1 Demand Response 115701

Answer: Demand Response with 115,701

3.3: How many trips were taken on the NYC Subway (Heavy Rail) in May 2024?

#install.packages("stringr")
library(stringr)

# First checking to ensure that May 2024 is only denoted by 2024-05-01
USAGE |>
  filter(str_detect(month, "2024-05-")) |>
  distinct(month) #checks out

# A tibble: 1 × 1
  month     
  <date>    
1 2024-05-01

# Ensuring I get the right Agency name
USAGE |>
  filter(str_detect(Agency,"MTA")) |>
  distinct(Agency)

# A tibble: 4 × 1
  Agency                                                              
  <chr>                                                               
1 MTA New York City Transit                                           
2 Metro-North Commuter Railroad Company, dba: MTA Metro-North Railroad
3 MTA Long Island Rail Road                                           
4 MTA Bus Company

# Unlinked Passenger Trips (UPT) are defined as:
# The number of passengers who board public transportation vehicles.
# Passengers are counted each time they board vehicles
# no matter how many vehicles they use to travel from their origin
# to their destination.


USAGE |>
  filter(month == '2024-05-01',
         Agency == "MTA New York City Transit",
         Mode == "Heavy Rail") |>
  select(unlinked_passenger_trips)

# A tibble: 1 × 1
  unlinked_passenger_trips
                     <dbl>
1                180458819

Answer: 180,458,819 trips

3.4: Which Mode of transport had the longest average trip in May 2024?

USAGE |>
  filter(month == '2024-05-01') |>
  group_by(Mode) |>
  summarize(trips_avg = mean(unlinked_passenger_trips)) |>
  arrange(desc(trips_avg)) |>
  slice_head(n=1)

# A tibble: 1 × 2
  Mode       trips_avg
  <chr>          <dbl>
1 Heavy Rail 14836486.

Answer: Heavy Rail, with 14,836,486 average trips (I did this question before 3.4 was marked as unneccesary due to not having the correct feature to run this analysis, but I’ve left it in regardless)

3.5: How much did NYC Subway ridership fall between April 2019 and April 2020?

USAGE |>
  filter(Agency == "MTA New York City Transit",
         Mode == "Heavy Rail",
         month >= '2019-04-01',
         month < '2020-05-01') |>
  select(unlinked_passenger_trips)

# A tibble: 13 × 1
   unlinked_passenger_trips
                      <dbl>
 1                232223929
 2                235967209
 3                224274463
 4                229774505
 5                229171856
 6                230694038
 7                253609943
 8                235137305
 9                236357677
10                231863427
11                227432375
12                119654860
13                 20254269

# Let's plot the ridership numbers
if(!require("ggplot2")) install.packages("ggplot2")
if(!require("lubridate")) install.packages("lubridate")

library(ggplot2)
library(lubridate)

dates <- seq(ymd("2019-04-01"), by = "month", length.out = 13)

ridership <- c(USAGE |>
                 filter(month >= '2019-04-01',
                        month < '2020-05-01',
                        Agency == 'MTA New York City Transit',
                        Mode == 'Heavy Rail'))

# Create a data frame
data <- data.frame(Date = dates, Ridership = ridership)

if(!require("scales")) install.packages("scales")

Loading required package: scales


Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor

library(scales)

# Plot ridership over time
ggplot(data, aes(x = Date, y = Ridership.unlinked_passenger_trips)) +  # Change x and y accordingly
  geom_line() +
  geom_point() +
  labs(title = "NYC Subway Ridership from April 2019 through April 2020", x = "Date", y = "Total Trips") +
  scale_y_continuous(labels = label_number()) +  # Prevent scientific notation
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),  # Center the title and make it bold
    axis.title.x = element_text(size = 14),  # Change x-axis title size
    axis.title.y = element_text(size = 14),  # Change y-axis title size
    axis.text = element_text(size = 12),  # Change axis text size
    panel.grid.minor = element_blank()  # Remove minor grid lines
  )

Task 4: Find 3 more interesting facts in this data

head(USAGE, n=7)

# A tibble: 7 × 6
  Agency      metro_area          Mode         month      unlinked_passenger_t…¹
  <chr>       <chr>               <chr>        <date>                      <dbl>
1 King County Seattle--Tacoma, WA Demand Resp… 2002-01-01                 135144
2 King County Seattle--Tacoma, WA Demand Resp… 2002-02-01                 127378
3 King County Seattle--Tacoma, WA Demand Resp… 2002-03-01                 136030
4 King County Seattle--Tacoma, WA Demand Resp… 2002-04-01                 142204
5 King County Seattle--Tacoma, WA Demand Resp… 2002-05-01                 144697
6 King County Seattle--Tacoma, WA Demand Resp… 2002-06-01                 131833
7 King County Seattle--Tacoma, WA Demand Resp… 2002-07-01                 137547
# ℹ abbreviated name: ¹unlinked_passenger_trips
# ℹ 1 more variable: vehicle_revenue_miles <dbl>

4.1: What’s the most miles traveled by agency + mode and how does it compare to the number of trips?

USAGE |>
  group_by(Agency, Mode) |>
  summarize(total_travel = sum(vehicle_revenue_miles, na.rm = TRUE), .groups="drop") |>
  arrange(desc(total_travel)) |>
  slice_head(n=3)

# A tibble: 3 × 3
  Agency                                                   Mode     total_travel
  <chr>                                                    <chr>           <dbl>
1 MTA New York City Transit                                Heavy R…   7732916753
2 New Jersey Transit Corporation                           Bus        3781858802
3 Los Angeles County Metropolitan Transportation Authority Bus        3501202902

NYC Subway leads the way in revenue miles, followed by NJT Bus and LA Bus

USAGE |>
  group_by(Agency, Mode) |>
  summarize(total_travel = sum(unlinked_passenger_trips, na.rm = TRUE), .groups="drop") |>
  arrange(desc(total_travel)) |>
  slice_head(n=3)

# A tibble: 3 × 3
  Agency                                                   Mode     total_travel
  <chr>                                                    <chr>           <dbl>
1 MTA New York City Transit                                Heavy R…  51672094135
2 MTA New York City Transit                                Bus       16889723939
3 Los Angeles County Metropolitan Transportation Authority Bus        7114375207

NYC Subway leads the way here as well, while NJT Bus is overtaken by NYC’s bus system. The distance between the MTA and the next few is quite large.

4.2 Which Agency manages the most Modes?

USAGE |>
  group_by(Agency) |>
  summarize(total_modes = n_distinct(Mode)) |>
  arrange(desc(total_modes)) # Massachusetts Bay Transportation Authority

# A tibble: 677 × 2
   Agency                                                   total_modes
   <chr>                                                          <int>
 1 Massachusetts Bay Transportation Authority                         7
 2 Central Oklahoma Transportation and Parking Authority              6
 3 City and County of San Francisco                                   6
 4 City of Charlotte North Carolina                                   6
 5 County of Miami-Dade                                               6
 6 Dallas Area Rapid Transit                                          6
 7 King County                                                        6
 8 Los Angeles County Metropolitan Transportation Authority           6
 9 Maryland Transit Administration                                    6
10 Metropolitan Transit Authority of Harris County, Texas             6
# ℹ 667 more rows

4.3 Which date saw the most usage for the NYC Subway?

USAGE |>
  filter(Agency == 'MTA New York City Transit',
         Mode == 'Heavy Rail') |>
  group_by(month) |>
  summarize(most_trips = sum(unlinked_passenger_trips, na.rm = TRUE)) |>
  arrange(desc(most_trips)) |>
  slice_head(n=1) # 2019-10-01

# A tibble: 1 × 2
  month      most_trips
  <date>          <dbl>
1 2019-10-01  253609943

# conversely, which had the fewest?
USAGE |>
  filter(Agency == 'MTA New York City Transit',
         Mode == 'Heavy Rail') |>
  group_by(month) |>
  summarize(most_trips = sum(unlinked_passenger_trips, na.rm = TRUE)) |>
  arrange((most_trips)) |>
  slice_head(n=1) # 2020-04-01

# A tibble: 1 × 2
  month      most_trips
  <date>          <dbl>
1 2020-04-01   20254269

2019-10-01 had the most while 2020-04-01 had the fewest

Task 5: Create a new table with annual total (sum) for UPT and VRM for 2022

USAGE <- inner_join(TRIPS, MILES) |>
  mutate(`NTD ID` = as.integer(`NTD ID`))

Joining with `by = join_by(`NTD ID`, Agency, `UZA Name`, Mode, `3 Mode`,
month)`

USAGE <- USAGE |>
  mutate(Mode=case_when(
    Mode == "AR" ~ "Alaska Railroad",
    Mode == "CB" ~ "Commuter Bus",
    Mode == "CC" ~ "Cable Car",
    Mode == "CR" ~ "Commuter Rail",
    Mode == "DR" ~ "Demand Response",
    Mode == "FB" ~ "Ferryboat",
    Mode == "HR" ~ "Heavy Rail",
    Mode == "IP" ~ "Inclined Plane",
    Mode == "LR" ~ "Light Rail",
    Mode == "MB" ~ "Bus",
    Mode == "MG" ~ "Monorail and Automated Guideway modes",
    Mode == "PB" ~ "Publico",
    Mode == "RB" ~ "Bus Rapid Transit",
    Mode == "SR" ~ "Streetcar Rail",
    Mode == "TB" ~ "Trolleybus",
    Mode == "TR" ~ "Aerial Tramways",
    Mode == "VP" ~ "Vanpool",
    Mode == "YR" ~ "Hybrid Rail",
    TRUE ~ "Unknown"))


USAGE_2022_ANNUAL <- USAGE |>
  select(-`3 Mode`) |>
  rename("metro_area" = "UZA Name") |>
  filter(year(month)=='2022') |>
  group_by(`NTD ID`, `Agency`,`metro_area`,`Mode`) |>
  summarize(UPT = sum(UPT),
            VRM = sum(VRM)) |>
  ungroup()

`summarise()` has grouped output by 'NTD ID', 'Agency', 'metro_area'. You can
override using the `.groups` argument.

head(USAGE_2022_ANNUAL, n=2)

# A tibble: 2 × 6
  `NTD ID` Agency      metro_area          Mode                 UPT      VRM
     <int> <chr>       <chr>               <chr>              <dbl>    <dbl>
1        1 King County Seattle--Tacoma, WA Bus             53983641 61632644
2        1 King County Seattle--Tacoma, WA Demand Response   663009 12860448

FINANCIALS <- FINANCIALS |>
  mutate(Mode=case_when(
    Mode == "AR" ~ "Alaska Railroad",
    Mode == "CB" ~ "Commuter Bus",
    Mode == "CC" ~ "Cable Car",
    Mode == "CR" ~ "Commuter Rail",
    Mode == "DR" ~ "Demand Response",
    Mode == "FB" ~ "Ferryboat",
    Mode == "HR" ~ "Heavy Rail",
    Mode == "IP" ~ "Inclined Plane",
    Mode == "LR" ~ "Light Rail",
    Mode == "MB" ~ "Bus",
    Mode == "MG" ~ "Monorail and Automated Guideway modes",
    Mode == "PB" ~ "Publico",
    Mode == "RB" ~ "Bus Rapid Transit",
    Mode == "SR" ~ "Streetcar Rail",
    Mode == "TB" ~ "Trolleybus",
    Mode == "TR" ~ "Aerial Tramways",
    Mode == "VP" ~ "Vanpool",
    Mode == "YR" ~ "Hybrid Rail",
    TRUE ~ "Unknown"))


USAGE_AND_FINANCIALS <- left_join(USAGE_2022_ANNUAL,
                                  FINANCIALS,
                                  join_by(`NTD ID`, `Mode`)) |>
  drop_na()


head(USAGE_AND_FINANCIALS,n=7)

# A tibble: 7 × 9
  `NTD ID` Agency     metro_area Mode     UPT    VRM `Agency Name` `Total Fares`
     <dbl> <chr>      <chr>      <chr>  <dbl>  <dbl> <chr>                 <dbl>
1        1 King Coun… Seattle--… Bus   5.40e7 6.16e7 King County …      56566150
2        1 King Coun… Seattle--… Bus   5.40e7 6.16e7 King County …        280187
3        1 King Coun… Seattle--… Dema… 6.63e5 1.29e7 King County …        740726
4        1 King Coun… Seattle--… Dema… 6.63e5 1.29e7 King County …         91601
5        1 King Coun… Seattle--… Ferr… 4.00e5 5.12e4 King County …       1715265
6        1 King Coun… Seattle--… Stre… 1.12e6 1.80e5 King County …        588495
7        1 King Coun… Seattle--… Trol… 9.58e6 2.64e6 King County …      10123486
# ℹ 1 more variable: Expenses <dbl>

Task 6: Answer the following 6 questions using USAGE_AND_FINANCIALS

6.1: Which transit system (agency and mode) had the most UPT in 2022?

USAGE_AND_FINANCIALS |>
  group_by(`Agency`, `Mode`) |>
  summarize(Total_UPT = sum(UPT, na.rm=TRUE), .groups="drop") |>
  arrange(desc(Total_UPT)) |>
  slice_head(n=1)

# A tibble: 1 × 3
  Agency                    Mode        Total_UPT
  <chr>                     <chr>           <dbl>
1 MTA New York City Transit Heavy Rail 1793073801

MTA New York City Transit – Heavy Rail

6.2: Which transit system (agency and mode) had the highest farebox recovery, defined as the highest ratio of Total Fares to Expenses?

USAGE_AND_FINANCIALS |>
  group_by(`Agency`,`Mode`) |>
  summarize(fbx_rcv = sum(`Total Fares`, na.rm = TRUE) / sum(`Expenses`, na.rm = TRUE), .groups="drop") |>
  arrange(desc(fbx_rcv)) |>
  slice_head(n=1)

# A tibble: 1 × 3
  Agency                                Mode    fbx_rcv
  <chr>                                 <chr>     <dbl>
1 Transit Authority of Central Kentucky Vanpool    2.38

Transit Authority of Central Kentucky – Vanpool

6.3: Which transit system (agency and mode) had the lowest expenses per UPT?

USAGE_AND_FINANCIALS |>
  filter(UPT >= 1) |>
  group_by(`Agency`,`Mode`) |>
  summarize(lwst_exp_upt =  sum(`Expenses`, na.rm = TRUE) / sum(`UPT`, na.rm = TRUE), .groups="drop") |>
  arrange(lwst_exp_upt) |>
  slice_head(n=1)

# A tibble: 1 × 3
  Agency                          Mode  lwst_exp_upt
  <chr>                           <chr>        <dbl>
1 North Carolina State University Bus           1.18

North Caroline State University – Bus

6.4: Which transit system (agency and mode) had the highest total fares per UPT?

USAGE_AND_FINANCIALS |>
  filter(`Total Fares` >= 1) |>
  group_by(`Agency`,`Mode`) |>
  summarize(hghst_fr_upt = sum(`UPT`, na.rm = TRUE) / sum(`Total Fares`, na.rm = TRUE), .groups="drop") |>
  # summarize(hghst_fr_upt = sum(`UPT`, na.rm = TRUE) / sum(`Total Fares`, na.rm = TRUE), .groups="drop")  |>
  arrange(desc(hghst_fr_upt)) |>
  slice_head(n=1)

# A tibble: 1 × 3
  Agency                                  Mode  hghst_fr_upt
  <chr>                                   <chr>        <dbl>
1 Athens-Clarke County Unified Government Bus         522940

Athens-Clarke County Unified Government – Bus

6.5: Which transit system (agency and mode) had the lowest expenses per VRM?

USAGE_AND_FINANCIALS |>
  filter(UPT >= 1) |>
  group_by(`Agency`,`Mode`) |>
  summarize(lwst_exp_vrm = sum(`Expenses`, na.rm = TRUE) / sum(`VRM`, na.rm = TRUE), .groups="drop") |>
  arrange(lwst_exp_vrm) |>
  slice_head(n=1)

# A tibble: 1 × 3
  Agency                                  Mode    lwst_exp_vrm
  <chr>                                   <chr>          <dbl>
1 New Mexico Department of Transportation Vanpool        0.337

New Mexico Department of Transportation – Vanpool

6.6: Which transit system (agency and mode) had the highest total fares per VRM?

USAGE_AND_FINANCIALS |>
  filter(`Total Fares` >= 1) |>
  group_by(`Agency`,`Mode`) |>
  summarize(hghst_fr_vrm = sum(`VRM`, na.rm = TRUE) / sum(`Total Fares`, na.rm = TRUE), .groups="drop") |>
  arrange(desc(hghst_fr_vrm)) |>
  slice_head(n=1)

# A tibble: 1 × 3
  Agency                                  Mode  hghst_fr_vrm
  <chr>                                   <chr>        <dbl>
1 Athens-Clarke County Unified Government Bus         380687

Athens-Clarke County Unified Government – Bus