if (!require("data.table")) install.packages("data.table")
if (!require("dotenv")) install.packages("dotenv")
if (!require("dplyr")) install.packages("dplyr")
if (!require("DT")) install.packages("DT")
if (!require("glue")) install.packages("glue")
if (!require("httr2")) install.packages("httr2")
if (!require("plotly")) install.packages("plotly")
if (!require("scales")) install.packages("scales")
if (!require("tidyr")) install.packages("tidyr")
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tools")) install.packages("tools")
library(data.table)
library(dotenv)
library(dplyr)
library(DT)
library(glue)
library(httr2)
library(plotly)
library(scales)
library(tidyr)
library(tidyverse)
library(tools)
Mini Project 4
Introduction
This project looks into different retirement packages available for new CUNY employees.
To do this, it looks at market data from two sources:
The Federal Reserve Bank of St. Louis which has available information on inflation, wage growth, bonds, and U.S. debt
Alpha Vantage which has available information on the U.S. and International Stock Markets.
The ultimate goal is to compare the traditional Teaching Retirement Plan and the newer Optional Retirement Plan and make a recommendation to a new CUNY employee for which to take.
First is reviewing it through the lens of an employee who worked for CUNY from 1983 - 2013 and has been retired since.
Afterwards, I will make a recommendation for an incoming CUNY employee as a benefits-specialist, to help guide them on their journey to retirement.
We will accomplish this task using R
.
Task 00: Install Packages and Libraries
Task 01: Register for AlphaVantage API Key
I’ve registered the API Keys and hidden them internally. You will have to request your own from AlphaVantage and FRED respectivcely
[1] "API Key 1 Loaded"
Task 02: Register for FRED API Key
[1] "API Key 2 Loaded"
Task 03: Data Acquisition
With the API Keys in hand, the next step is to pull data that can influence our decision on which retirement package is better.
This is the relevant data from FRED:
The AlphaVantage data comes from querying on different symbols through their API
- US Equity Market total returns
- For this one using SPY which is the S&P 500 Index gives us a good indicator of how U.S. markets are performing in general,
- International Equity Market total returns
- Similarly, VXUS or the Vanguard Total International Stock Index can give us an inkling for international markets, though it is limited in data (discussed further below)
First creating functions so only a couple of parameters need to be tweaked to call in from each dataset.
Code
# Creating a request and clean function for FRED Data
= "https://api.stlouisfed.org/fred/series/observations?series_id="
series_url
<- function(url, series_id, api_key) {
get_fred_data <- paste0(url, series_id, "&api_key=", api_key, "&file_type=json")
lookup_url
<- request(lookup_url)
req
<- req_perform(req)
resp
<- resp_body_json(resp)
data
<- data$observations
df
return(df)
}
The above function will automatically pull data from FRED for a symbol, the below from AlphaVantage.
Code
# Creating a similar function for Alpha Data
<- "https://www.alphavantage.co/query"
base_alpha_url
<- function(url, lookup_symbol, api_key) {
get_alpha_data <- request(url) |>
req req_url_query(
`function` = "TIME_SERIES_DAILY",
symbol = lookup_symbol,
interval = "60min",
apikey = alpha_api_key,
outputsize = "full"
)
<- req_perform(req)
resp
<- resp_body_json(resp)
data
<- data$`Time Series (Daily)`
time_series
# Convert to dataframe
<- do.call(rbind, lapply(time_series, function(row) {
df as.data.frame(t(row), stringsAsFactors = FALSE)
}))
$Date <- rownames(time_series)
df
<- rownames_to_column(df, var = "Date")
df
# Remove the row names
rownames(df) <- NULL
$Date <- as.Date(df$Date)
df
<- df |>
df rename(
`open` = `1. open`,
`high` = `2. high`,
`low` = `3. low`,
`close` = `4. close`,
`volume` = `5. volume`
)
<- df |>
df mutate(
open = as.numeric(open),
high = as.numeric(high),
low = as.numeric(low),
close = as.numeric(close),
volume = as.numeric(volume)
)
# Create YearMonth column and calculate medians
<- df |>
medians mutate(YearMonth = format(Date, "%Y-%m")) |>
group_by(YearMonth) |>
summarize(across(
c(open, high, low, close, volume),
median,na.rm = TRUE
|>
)) ungroup()
return(medians)
}
The next few sections are just pulling in these data sets and working through them.
3a. Inflation Data
Code
# Pulling this data from the FRED website
# If you look up the series ID in FRED, you can directly access the data and download its .CSV file there
<- "FPCPITOTLZGUSA"
inflation_series_id
<- get_fred_data(url = series_url, series_id = inflation_series_id, api_key = fred_api_key)
inflation_df
<- data.frame(
clean_inflation_df DATE = sapply(inflation_df, function(x) x$date),
inflation_rate = as.numeric(sapply(inflation_df, function(x) x$value))
)
<- clean_inflation_df |>
clean_inflation_df mutate(
year = substr(DATE, start = 0, stop = 4),
inflation_rate = round(inflation_rate, 3),
yoy_diff = round(inflation_rate - lag(inflation_rate), 2),
yoy_diff = if_else(is.na(yoy_diff), 0, yoy_diff)
|>
) select(
`Year` = year,
`Inflation Rate` = inflation_rate,
`YoY Differential` = yoy_diff
)
|>
clean_inflation_df ::datatable() DT
Code
print(glue("The Inflation Dataset covers a year range of {min(clean_inflation_df$Year)} to {max(clean_inflation_df$Year)}"))
The Inflation Dataset covers a year range of 1960 to 2023
3b. Wage Data
Note: This data spans from January 1983 through October 2024. However, there are two date ranges where there is no recorded data: July 1985 - September 1986 (15 months) and June 1995 - August 1996 (15 months). This is “…due to changes in methodology.”
Code
<- "FRBATLWGTUMHWG83O"
wage_series_id
<- get_fred_data(url = series_url, series_id = wage_series_id, api_key = fred_api_key)
wage_df
<- data.frame(
clean_wage_df DATE = sapply(wage_df, function(x) x$date),
wage_growth = as.numeric(sapply(wage_df, function(x) x$value))
)
<- clean_wage_df |>
clean_wage_df mutate(year = substr(DATE, start = 0, stop = 4)) |>
group_by(year) |>
summarize(
mwg = round(median(wage_growth, na.rm = TRUE), 1),
null_months = sum(is.na(wage_growth)) # Count of NULL months
|>
) ungroup() |>
mutate(
yoy_diff = round(mwg - lag(mwg), 2),
yoy_diff = if_else(is.na(yoy_diff), 0, yoy_diff)
|>
) select(
`Year` = year,
`NULL Month Count` = null_months,
`Median Wage Growth` = mwg,
`YoY Differential` = yoy_diff
)
|>
clean_wage_df ::datatable() DT
Code
print(glue("The Wage Dataset covers a year range of {min(clean_wage_df$Year)} to {max(clean_wage_df$Year)}"))
The Wage Dataset covers a year range of 1983 to 2024
3c. U.S. Equity Market Total Returns (Lookup Symbol = SPY)
Code
<- get_alpha_data(url = base_alpha_url, lookup_symbol = "SPY", api_key = alpha_api_key)
us_monthly_medians
<- us_monthly_medians |>
us_monthly_medians mutate(
year = substr(YearMonth, start = 0, stop = 4)
|>
) group_by(year) |>
summarize(across(
c(open, high, low, close, volume),
median,na.rm = TRUE
|>
)) ungroup() |>
mutate(
open = round(open, 2),
high = round(high, 2),
low = round(low, 2),
close = round(close, 2),
high_low_diff = round(high - low, 2),
open_close_diff = round(open - close, 2),
yoy_diff = round(volume - lag(volume), 2),
yoy_diff = if_else(is.na(yoy_diff), 0, yoy_diff)
|>
) select(
`Year` = year,
`Open` = open,
`Close` = close,
`High` = high,
`Low` = low,
`Traded Volume` = volume,
`YoY Volume Difference` = yoy_diff
)
|>
us_monthly_medians ::datatable() DT
Code
print(glue("The US Equity Market Dataset covers a year range of {min(us_monthly_medians$Year)} to {max(us_monthly_medians$Year)}"))
The US Equity Market Dataset covers a year range of 1999 to 2024
3d. International Equity Market Total Returns (Lookup Symbol = VXUS)
Code
<- get_alpha_data(url = base_alpha_url, lookup_symbol = "VXUS", api_key = alpha_api_key)
intl_monthly_medians
<- intl_monthly_medians |>
intl_monthly_medians mutate(
year = substr(YearMonth, start = 0, stop = 4)
|>
) group_by(year) |>
summarize(across(
c(open, high, low, close, volume),
median,na.rm = TRUE
|>
)) ungroup() |>
mutate(
open = round(open, 2),
high = round(high, 2),
low = round(low, 2),
close = round(close, 2),
high_low_diff = round(high - low, 2),
open_close_diff = round(open - close, 2),
yoy_diff = round(volume - lag(volume), 2),
yoy_diff = if_else(is.na(yoy_diff), 0, yoy_diff)
|>
) select(
`Year` = year,
`Open` = open,
`Close` = close,
`High` = high,
`Low` = low,
`Traded Volume` = volume,
`YoY Volume Difference` = yoy_diff
)
|>
intl_monthly_medians ::datatable() DT
Code
print(glue("The International Equity Market Dataset covers a year range of {min(intl_monthly_medians$Year)} to {max(intl_monthly_medians$Year)}"))
The International Equity Market Dataset covers a year range of 2011 to 2024
3e. Bond Returns
Code
<- "BAMLCC0A4BBBTRIV"
bond_series_id
<- get_fred_data(url = series_url, series_id = bond_series_id, api_key = fred_api_key)
bond_df
<- data.frame(
clean_bond_df DATE = sapply(bond_df, function(x) x$date),
bond = as.numeric(sapply(bond_df, function(x) x$value))
)
<- clean_bond_df |>
clean_bond_df mutate(year = substr(DATE, start = 0, stop = 4)) |>
group_by(year) |>
summarize(
median_bond_price = median(bond, na.rm = TRUE)
|>
) ungroup() |>
mutate(
yoy_diff = round(median_bond_price - lag(median_bond_price), 2),
yoy_diff = if_else(is.na(yoy_diff), 0, yoy_diff)
|>
) select(
`Year` = year,
`Median Bond Price` = median_bond_price,
`YoY Differential` = yoy_diff
)
|>
clean_bond_df ::datatable() DT
Code
print(glue("The Bond Dataset covers a year range of {min(clean_bond_df$Year)} to {max(clean_bond_df$Year)}"))
The Bond Dataset covers a year range of 1988 to 2024
3f. Short-Term Debt Returns
Code
<- "QFRD304INFUSNO"
short_term_debt_series_id
<- get_fred_data(url = series_url, series_id = short_term_debt_series_id, api_key = fred_api_key)
short_term_debt_df
<- data.frame(
clean_short_term_debt_df DATE = sapply(short_term_debt_df, function(x) x$date),
quarterly_financials = as.numeric(sapply(short_term_debt_df, function(x) x$value))
)
<- clean_short_term_debt_df |>
clean_short_term_debt_df mutate(year = substr(DATE, start = 0, stop = 4)) |>
group_by(year) |>
summarize(
annual_financials = round(median(quarterly_financials, na.rm = TRUE), 1)
|>
) mutate(
yoy_diff = round(annual_financials - lag(annual_financials), 2),
yoy_diff = if_else(is.na(yoy_diff), 0, yoy_diff)
|>
) ungroup() |>
select(
`Year` = year,
`Annual Financials` = annual_financials,
`YoY Differential` = yoy_diff
)
|>
clean_short_term_debt_df ::datatable() DT
Code
print(glue("The Short-Term Debt Dataset covers a year range of {min(clean_short_term_debt_df$Year)} to {max(clean_short_term_debt_df$Year)}"))
The Short-Term Debt Dataset covers a year range of 2009 to 2024
Task 04: Initial Analysis
There are now 6 datasets to look into.
The first step is to do some EDA. We could look at the data as a whole or compare like-to-like.
This divides our data into three groups: Inflation + Wages, US + International Markets, and Bond + Short-Term Debt.
There are two reasons for this: the first is these compare similar indicators to each other. The second is the date ranges are different across the board, so throwing everything into a single table and graph would just look messy.
First up, let’s look at how both Inflation and Wages have changed over the years. The first graph compares the Rate of Inflation to the Median Wage Growth per year. The second shows the Year-over-year differential between
Code
# dropping NULL month count from wages since it won't really apply to any further analysis
<- clean_wage_df |>
wgs select(
-`NULL Month Count`
)
# join dfs on year, this will shorten the overall date range to 1983-2023 (40 year span)
<- clean_inflation_df |>
infl_wgs_df inner_join(wgs, by = "Year") |>
select(
`Year`,
`Inflation Rate`,
`YoY Inflation Differential` = `YoY Differential.x`,
`Median Wage Growth`,
`YoY Wages Differential` = `YoY Differential.y`
|>
) mutate(
`YoY Inflation Differential` = if_else(
== 1983, 0, `YoY Inflation Differential` # Since it's cutting off a portion of the inflation dataset...
Year
)
)
# quick look at the table
|>
infl_wgs_df ::datatable() DT
Code
# Ensure Year is numeric
$Year <- as.numeric(as.character(infl_wgs_df$Year))
infl_wgs_df
# Inflation Rate and Median Wage Growth Plot
<- ggplot(infl_wgs_df, aes(x = Year)) +
inflation_wage_plot geom_line(aes(y = `Inflation Rate`, color = "Inflation Rate"), size = 1) +
geom_line(aes(y = `Median Wage Growth`, color = "Median Wage Growth"), size = 1) +
scale_color_manual(values = c("Inflation Rate" = "darkblue", "Median Wage Growth" = "#44b80a")) +
labs(
title = "Inflation Rate and Median Wage Growth (1983-2023)",
x = "Year",
y = "Rate (%)",
color = "Legend"
+
) theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "top"
+
) scale_x_continuous(breaks = seq(1960, 2024, by = 10)) +
scale_y_continuous()
print(inflation_wage_plot)
Code
<- ggplot(infl_wgs_df, aes(x = Year)) +
infl_wgs_yoy_plot geom_line(aes(y = `YoY Inflation Differential`, color = "Inflation Differential"), size = 1) +
geom_line(aes(y = `YoY Wages Differential`, color = "Wage Differential"), size = 1) +
scale_color_manual(values = c("Inflation Differential" = "darkblue", "Wage Differential" = "#44b80a")) +
labs(
title = "YoY Differentials (1983-2023)",
x = "Year",
y = "Differential (%)",
color = "Legend"
+
) theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
legend.position = "top"
+
) scale_x_continuous(breaks = seq(1960, 2024, by = 10)) +
scale_y_continuous() +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray")
print(infl_wgs_yoy_plot)
Market Dataframe and Plots
Code
<- us_monthly_medians |>
market_df inner_join(intl_monthly_medians, by = "Year", suffix = c("_US", "_Intl")) |>
mutate(
Avg_Price_US = (High_US + Low_US) / 2,
Avg_Price_Intl = (High_Intl + Low_Intl) / 2
|>
) select(
`Year`,
`Average Price U.S. Markets` = `Avg_Price_US`, # decided to go with the average
`Traded Volume US` = `Traded Volume_US`,
`Average Price International Markets` = `Avg_Price_Intl`,
`Traded Volume Intl` = `Traded Volume_Intl`
)
|>
market_df ::datatable() DT
Code
$Year <- as.numeric(as.character(market_df$Year))
market_df
ggplot(market_df, aes(x = Year)) +
geom_line(aes(y = `Average Price U.S. Markets`, color = "U.S. Markets"), size = 1) +
geom_line(aes(y = `Average Price International Markets`, color = "International Markets"), size = 1) +
scale_color_manual(values = c("U.S. Markets" = "darkblue", "International Markets" = "#44b80a")) +
labs(
title = "Average Price by Year (U.S. and International Markets)",
x = "Year",
y = "Average Price",
color = "Market"
+
) theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
Code
# Transform data to long format for plotting for bar graph
<- market_df |>
volume_data pivot_longer(
cols = c(`Traded Volume US`, `Traded Volume Intl`),
names_to = "Market",
values_to = "Traded Volume"
|>
) mutate(Market = recode(Market,
`Traded Volume US` = "U.S. Markets",
`Traded Volume Intl` = "International Markets"
))
# Overlaid bar graph
ggplot(volume_data, aes(x = Year, y = `Traded Volume`, fill = Market)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("U.S. Markets" = "darkblue", "International Markets" = "#44b80a")) +
scale_y_continuous(
labels = scales::comma,
expand = c(0, 0)
+
) scale_x_continuous(
breaks = seq(2011, 2023, by = 1),
limits = c(2011, 2023)
+
) labs(
title = "Traded Volume by Year (U.S. and International Markets)",
x = "Year",
y = "Traded Volume",
fill = "Market"
+
) theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.background = element_rect(fill = "gray", color = NA),
panel.background = element_rect(fill = "gray", color = NA)
)
Finally, looking at a bonds-to-debt ratio over time to try and get an understanding for how investing and risk in the U.S. has looked over time.
Code
# Combine the two datasets on Year
<- clean_bond_df |>
bonds_and_debts inner_join(clean_short_term_debt_df, by = "Year", suffix = c("_Bond", "_Debt")) |>
mutate(
Bond_to_Debt_Ratio = `Median Bond Price` / `Annual Financials` # Calculate bond-to-debt ratio
)
$Year <- as.numeric(as.character(bonds_and_debts$Year))
bonds_and_debts
ggplot(bonds_and_debts, aes(x = Year, y = Bond_to_Debt_Ratio)) +
geom_line(color = "purple", size = 1) +
geom_hline(yintercept = 1, linetype = "dashed", color = "gray") +
labs(
title = "Bond-to-Debt Ratio Over Time",
x = "Year",
y = "Bond-to-Debt Ratio"
+
) theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
Task 05: Historical Comparison
Now we’ll compare the two plans, TRS and ORP, to get a sense of how each performs, for a CUNY employee who joined in the first month of the historical data.
For this, we’ll look at Professor Bob. Bob Started teaching at Baruch back in 1983; he’s seen his income increase over time exactly in-line with his fellow Americans, and he’s been saving from the minute he took the position. Bob retired in 2013, 30 years after starting. He’s now had 10 years of post-retirement pension. After this time, he’s wondering if he made the right choice.
You see, back in 1983, he was offered two plans. The more traditional plan, the Teachers Retirement System (TRS), where Baruch takes the brunt of the market risk for Bob, and the new Optional Retirement Plan (ORP), a more aggressive approach to financial planning, which would allow Bob to more directly invest in the market, similar to a 401(k).
Bob is a more prudent person, always has been, he took the more understandable (and still very good) TRS, but he’s always wondered “what if?”.
Let’s help Bob out. Looking at market data over the same time span and understanding how each plan works, would Bob have been better off with the ORP, or did he make the right decision sticking with the TRS?
First, let’s see how Bob did. His starting wage in 1983 was $52,567 which grew yearly in-line with the U.S. Median Wage.
Code
# The story of Professor Bob
<- wgs |>
working_bob filter(Year <= 2013) |>
rename(mwg = `Median Wage Growth`) |>
mutate(
Income = ifelse(Year == 1983, 52267.01, NA),
mwg = (mwg / 100)
|>
) select(-`YoY Differential`)
for (i in 2:nrow(working_bob)) {
$Income[i] <- working_bob$Income[i - 1] * (1 + working_bob$mwg[i])
working_bob
}
$Year <- as.numeric(as.character(working_bob$Year))
working_bob
print(glue("Professor Bob's starting income in {min(working_bob$Year)} is {dollar(round(min(working_bob$Income), 2))}.\n His salary at retirement in {max(working_bob$Year)} is {dollar(round(max(working_bob$Income), 2))}."))
Professor Bob's starting income in 1983 is $52,267.01.
His salary at retirement in 2013 is $177,919.
This meant that by his retirement in 2013, Bob was earning an Adjusted Gross Income (AGI) of nearly $180K! Good on ya, Bob.
Code
<- ggplot(working_bob, aes(x = Year, y = Income)) + # they call him the working bob
working_bob_plot geom_line(color = "#44b80a", size = 1.5) +
geom_hline(yintercept = 100000, linetype = "dashed", color = "gray") + # i guess that's what he is
geom_hline(yintercept = 200000, linetype = "dashed", color = "gray") +
labs(
title = "Professor Bob's Income Over Time (1983 - 2013)",
x = "Year",
y = "Income ($)"
+
) scale_x_continuous(
breaks = seq(min(working_bob$Year), max(working_bob$Year), by = 5)
+
) scale_y_continuous(labels = dollar_format(accuracy = 0.01)) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1),
plot.background = element_rect(fill = "gray", color = NA),
panel.background = element_rect(fill = "gray", color = NA)
)
<- working_bob_plot +
working_bob_plot geom_line(aes(text = paste0("Year: ", Year, "<br>Income: ", dollar(Income, accuracy = 0.01))), size = 1, color = NA)
<- ggplotly(working_bob_plot, tooltip = "text")
bobs_working_plot
bobs_working_plot
The exact math behind each retirement package can get a little tricky. For TRS, first, employees pay a fixed percentage of their paycheck into the pension fund based on their AGI.
We can monitor Bob’s contributions over the years here:
Code
# TRS employee contribution function
<- function(salary) {
trs_pct case_when(
<= 45000 ~ 0.03,
salary <= 55000 ~ 0.035,
salary <= 75000 ~ 0.045,
salary <= 100000 ~ 0.0575,
salary TRUE ~ 0.06
)
}
# Bob's contributions
<- working_bob |>
working_bob mutate(
bobs_contributions = round(Income * trs_pct(Income), 2)
)
# View Bob's contributions in-line w/ MWG in the U.S.
|>
working_bob mutate(Income = round(Income, 2)) |>
rename(
`Median Wage Growth (U.S.)` = mwg,
`Bob's Income` = Income
|>
) ::datatable(
DToptions = list(
pageLength = 11,
columnDefs = list(
# Adding commas for these columns
list(
targets = c(3, 4),
render = JS(
"function(data, type, row, meta) {",
" return type === 'display' ? '$ ' + data.toString().replace(/\\B(?=(\\d{3})+(?!\\d))/g, ',') : data;",
"}"
)
),# Formatting as percentages
list(
targets = c(2),
render = JS(
"function(data, type, row, meta) {",
" return type === 'display' ? (data*100).toFixed(2) + '%' : data;", # Formatting as percentage
"}"
)
)
)
) )
Then, the retirement benefit is calculated based on a Final Average Salary, which computes the average of the final 3 years salary for an employee to use in a calculation, which also factors in the number of years an employee worked for Baruch.
The average salary for Bob's final 3 years of work at CUNY is $174,173
Knowing this, we can calculate Bob’s annual retirement benefit from CUNY, including how much it increases yearly (in-line with inflation).
What this means is, the benefit amount is increased annually in-line with the rate of inflation; the exact calculation halves the rate of inflation then rounds-up to the nearest tenth, for example a 2.9% rate of inflation divided by 2 and rounded up to the nearest-tenth returns a 1.5% annual increase. However, the increase exists within a band of 1-3% and can not exceed that band on either end.
[1] 196710.1
[1] 13788.66
CUNY's average monthly contribution to Bob's account since 2014 is $1,215.74.
The average Monthly Available Pension since 2014 is $22,324.85.
Bob is left with a neat monthly contribution from CUNY of $1,215.74 on average since 2014, and has an available pension of $22,324.85 to withdraw from.
Let’s check if Bob’s monthly available pension over the past decade would’ve been greater if he went with the ORP instead of TRS.
Bob joined CUNY Baruch at Age 35.
He worked from 1983 - 2013 (30 years) with a starting salary of $52,267.
The ORP method is an account on an Index or Fund that Bob can contribute to with various contribution levels and asset allocations depending on age and income. Finally, it also has an employer (CUNY) contribution of 8% for the first seven years of employment, then 10% every year after (so 23 years for Bob at 10%).
NOTE: Due to the fact the data on US Markets, International Markets, U.S. Bonds, and Short-Term Debt do not completely go back to 1983 (with International Markets only being tracked under the VXUS symbol since 2011), I have used a resampling method to generate dummy historical data to fill in the gap. Please keep this in mind while proceeding through Task 5.
Bob's total savings with the ORP plan, averaged on the various markets he invested in
, would give him a final investment portfolio of $1,073,572.00.
That is significantly more than he received from the TRP.
Task 06: Fixed-Rate Analysis
This task wants to modify the simulation from the previous section to project an employee’s benefit or withdrawal amount from retirement until death.
Bob retired at the end of 2013 at age 70. According to the CDC, as of 2013, the average male-lifespan in the U.S. as of 2013 was 76.4 years. However, let’s say that our Bob lives to 80 years, 10 years after he retired.
Our previous analysis assumed Bob wouldn’t be withdrawing anything at all, but Bob needs that money to live from 70 - 80 years old.
Bob never lived too lavishly. He lives with his wife Martha in their 3-bedroom condo in SoHo (he bought following the 1983 market crash when the condo’s value was at an all-time low, prudent fella) of $400,000 on a 30-year mortgage which he’s since paid off.
e has adult children who visit occasionally, but they don’t account for his regular expenses anymore.
With 1 dog and 1 cat at home, we can estimate Bob’s monthly expenses:
Code
# creating dummy variables to run numbers off of
<- 400000
home_purchase_price <- 0.125 # this was the average in 2014
price_ptax_2014 <- 500 # guesstimate, based off of nothing but vibes
monthly_col_2014
<- retirement_bob |>
bobs_expenses select(
Year,
cpi
)
<- bobs_expenses |>
bobs_expenses mutate(
monthly_property_tax = if_else(row_number() == 1, round((home_purchase_price * price_ptax_2014) / 12, 2), NA_real_),
monthly_col = if_else(row_number() == 1, round(monthly_col_2014 + (monthly_col_2014 * cpi), 2), NA_real_)
)
# all these for (i in 2:nrow) functions are basically to backfill cols that rely on py data
for (i in 2:nrow(bobs_expenses)) {
$monthly_property_tax[i] <- (bobs_expenses$monthly_property_tax[i] <- bobs_expenses$monthly_property_tax[i - 1] + (bobs_expenses$monthly_property_tax[i - 1] * bobs_expenses$cpi[i]))
bobs_expenses
$monthly_col[i] <- (bobs_expenses$monthly_col[i] <- bobs_expenses$monthly_col[i - 1] + (bobs_expenses$monthly_col[i - 1] * bobs_expenses$cpi[i]))
bobs_expenses
}
<- bobs_expenses |>
bobs_expenses mutate(
annual_col = round((monthly_property_tax * 12) + (monthly_col * 12), 2),
monthly_property_tax = round(monthly_property_tax, 2),
monthly_col = round(monthly_col, 2)
)
|>
bobs_expenses ::datatable(
DToptions = list(
pageLength = 10,
columnDefs = list(
# Adding commas for these columns
list(
targets = c(3, 4, 5),
render = JS(
"function(data, type, row, meta) {",
" return type === 'display' ? '$ ' + data.toString().replace(/\\B(?=(\\d{3})+(?!\\d))/g, ',') : data;",
"}"
)
),# Formatting as percentages
list(
targets = c(2),
render = JS(
"function(data, type, row, meta) {",
" return type === 'display' ? (data*100).toFixed(2) + '%' : data;", # Formatting as percentage
"}"
)
)
)
) )
Let’s recall the tables retirement_bob
and bobs_markets
to look at how far his pension versus withdrawal would’ve gone in the 10 year span following his retirement.
Code
# bringing in pension
<- retirement_bob$starting_pension[1]
starting_pension_2014
<- retirement_bob |>
ret_bob select(
Year,
contribution
)
<- bobs_expenses |>
bobs_expenses_trs left_join(ret_bob, by = "Year") |>
mutate(
starting_pension = if_else(row_number() == 1, starting_pension_2014, NA_real_)
)
<- bobs_expenses_trs |>
bobs_expenses_trs mutate(
ending_pension = if_else(row_number() == 1, starting_pension + contribution - annual_col, NA_real_)
)
for (i in 2:nrow(bobs_expenses_trs)) {
$starting_pension[i] <- bobs_expenses_trs$ending_pension[i - 1]
bobs_expenses_trs
$ending_pension[i] <- (bobs_expenses_trs$ending_pension[i] <- bobs_expenses_trs$starting_pension[i] + bobs_expenses_trs$contribution[i] - bobs_expenses_trs$annual_col)
bobs_expenses_trs
}
|>
bobs_expenses_trs mutate(
contribution = round(contribution, 2),
starting_pension = round(starting_pension, 2),
ending_pension = round(ending_pension, 2)
|>
) rename(
`Consumer Price Index` = cpi,
`Monthly Property Tax` = monthly_property_tax,
`Monthly Cost of Living (CoL)` = monthly_col,
`Annual Cost of Living (CoL)` = annual_col,
`Annual Pension Contribution` = contribution,
`Yearly Starting Pension` = starting_pension,
`Yearly Ending Pension` = ending_pension
|>
) ::datatable(
DToptions = list(
pageLength = 10,
columnDefs = list(
# Adding commas for these columns
list(
targets = c(3, 4, 5, 6, 7, 8),
render = JS(
"function(data, type, row, meta) {",
" return type === 'display' ? '$ ' + data.toString().replace(/\\B(?=(\\d{3})+(?!\\d))/g, ',') : data;",
"}"
)
),# Formatting as percentages
list(
targets = c(2),
render = JS(
"function(data, type, row, meta) {",
" return type === 'display' ? (data).toFixed(2) + '%' : data;", # Formatting as percentage
"}"
)
)
)
) )
So under the TRS, Bob would be screwed by 2018 if he tried to hold on to that Condo for all these years (though this also assumes he’s the only one with any retirement between he and his wife).
Code
# looking at bobs expenses w/ orp plan
<- bobs_expenses |>
bobs_expenses_orp mutate(
funds = if_else(row_number() == 1, bob_total_savings, NA_real_)
)
for (i in 2:nrow(bobs_expenses_orp)) {
$funds[i] <- (bobs_expenses$funds[i] <- bobs_expenses$funds[i - 1] - bobs_expenses$annual_col[i])
bobs_expenses
}
<- bobs_expenses_orp |>
bobs_expenses_orp mutate(
funds = round(funds, 2)
)
for (i in 2:nrow(bobs_expenses_orp)) {
$funds[i] <- round((bobs_expenses_orp$funds[i - 1] - bobs_expenses$annual_col[i]), 2)
bobs_expenses_orp
}
|>
bobs_expenses_orp rename(
`Consumer Price Index` = cpi,
`Monthly Property Tax` = monthly_property_tax,
`Monthly Cost of Living (CoL)` = monthly_col,
`Annual Cost of Living (CoL)` = annual_col,
`Withdrawable Funds` = funds
|>
) ::datatable(
DToptions = list(
pageLength = 10,
columnDefs = list(
# Adding commas for these columns
list(
targets = c(3, 4, 5, 6),
render = JS(
"function(data, type, row, meta) {",
" return type === 'display' ? '$ ' + data.toString().replace(/\\B(?=(\\d{3})+(?!\\d))/g, ',') : data;",
"}"
)
),# Formatting as percentages
list(
targets = c(2),
render = JS(
"function(data, type, row, meta) {",
" return type === 'display' ? (data).toFixed(2) + '%' : data;", # Formatting as percentage
"}"
)
)
)
) )
While with the ORP, Bob will still have a lot of money leftover, assuming he’s doing monthly withdrawals without replenishment. ORP seems to have been the way to go if he wanted to keep that condo.
Task 07: Monte-Carlo Analysis
Performing some bootstrap analysis using 200 simulation on the 30-year retirement data, let’s see if with resampling we can still conclude that ORP is the better option.
Code
# Create resampling function
<- function(data, n_samples = 200) {
bootstrap_resample replicate(n_samples, data[sample(nrow(data), replace = TRUE), ], simplify = FALSE)
}
# resample
<- bootstrap_resample(retirement_bob)
trs_bootstrap_samples <- bootstrap_resample(bobs_markets)
orp_bootstrap_samples
# 10 years
<- function(bootstrap_samples, years = 10) {
simulate_projection <- lapply(bootstrap_samples, function(sample) {
projections |>
sample sample_n(years, replace = TRUE) |>
summarise(across(where(is.numeric), mean, na.rm = TRUE))
})bind_rows(projections)
}
<- simulate_projection(trs_bootstrap_samples)
trs_projections <- simulate_projection(orp_bootstrap_samples)
orp_projections
<- data.frame(
projections TRS_Monthly_Pension = trs_projections$monthly_pension,
ORP_US_Shares_Price = orp_projections$bobs_us_shares_price
)
# summary
<- projections |>
projection_summary summarise(
TRS_Mean = mean(TRS_Monthly_Pension, na.rm = TRUE),
TRS_SD = sd(TRS_Monthly_Pension, na.rm = TRUE),
ORP_Mean = mean(ORP_US_Shares_Price, na.rm = TRUE),
ORP_SD = sd(ORP_US_Shares_Price, na.rm = TRUE)
|>
) pivot_longer(cols = everything(), names_to = c("Group", ".value"), names_sep = "_")
# plot projs
ggplot(projection_summary, aes(x = Group, y = Mean, fill = Group)) +
geom_bar(stat = "identity", position = position_dodge(), alpha = 0.7) +
geom_errorbar(aes(ymin = Mean - SD, ymax = Mean + SD), width = 0.2, position = position_dodge(0.9)) + # on average, how off are we?
scale_y_continuous(labels = comma) +
labs(
title = "Bootstrapped Monte Carlo Projections (10-Year Outlook)",
x = "Retirement Fund",
y = "Projected Value"
+
) scale_fill_manual(values = c("#2386ca", "#ebb78b")) +
theme_minimal() +
theme(
legend.position = "none",
plot.background = element_rect(fill = "gray75"),
panel.background = element_rect(fill = "gray75", color = NA)
)
This seems to conclude in favor of ORP. Looking at a 4% monthly withdrawal rate:
Code
# simulating projections using bootstrap function with a withdrawl rate of 4% per year for 10 yrs
<- function(bootstrap_samples, years = 10, withdrawal_rate = 0.04, type = "TRS") {
simulate_projection_with_withdrawals <- lapply(bootstrap_samples, function(sample) {
projections if (type == "TRS") {
<- sample |>
projected_funds sample_n(years, replace = TRUE) |>
summarise(
avg_starting_funds = mean(starting_pension, na.rm = TRUE),
avg_contributions = mean(contribution, na.rm = TRUE)
)else if (type == "ORP") {
} <- sample |>
projected_funds sample_n(years, replace = TRUE) |>
summarise(
avg_starting_funds = mean(
+ bobs_intl_shares_price + bobs_bonds_shares_price + bobs_debts_shares_price,
bobs_us_shares_price na.rm = TRUE
),avg_contributions = 0
)
}
<- numeric(years)
funds 1] <- projected_funds$avg_starting_funds + projected_funds$avg_contributions
funds[for (i in 2:years) {
<- funds[i - 1] * (1 - withdrawal_rate)
funds[i]
}
return(data.frame(Year = 1:years, Remaining_Funds = funds))
})
bind_rows(projections, .id = "Bootstrap_Sample")
}
<- simulate_projection_with_withdrawals(trs_bootstrap_samples, type = "TRS")
trs_withdrawals <- simulate_projection_with_withdrawals(orp_bootstrap_samples, type = "ORP")
orp_withdrawals
$Type <- "TRS"
trs_withdrawals$Type <- "ORP"
orp_withdrawals
<- bind_rows(trs_withdrawals, orp_withdrawals)
withdrawal_projections
<- withdrawal_projections |>
main_projections group_by(Type, Year) |>
summarise(
Mean_Remaining_Funds = mean(Remaining_Funds, na.rm = TRUE),
.groups = "drop"
|>
) mutate(Year = Year + 2013)
# callout start and end funds
<- main_projections |>
callout_data filter(Year == 2014 | Year == 2023)
ggplot(main_projections, aes(x = Year, y = Mean_Remaining_Funds, color = Type)) +
geom_line(size = 1.2) +
geom_text(
data = callout_data,
aes(label = scales::comma(round(Mean_Remaining_Funds))),
vjust = -0.5,
size = 4,
show.legend = FALSE
+
) labs(
title = "Remaining Funds Over 10 Years with 4% Withdrawal Rate",
x = "Year",
y = "Remaining Funds",
color = "Projection Type"
+
) scale_fill_manual(values = c("#2386ca", "#ebb78b")) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(breaks = seq(2014, 2023, 1)) +
theme_minimal() +
theme(
legend.position = "right",
plot.background = element_rect(fill = "gray90"),
panel.background = element_rect(fill = "gray90", color = NA),
panel.grid.major = element_line(color = "gray20", size = 0.35),
panel.grid.minor = element_line(color = "gray30", size = 0.35)
)
Conclusion
It would seem the Optional Retirement Plan with investments in low-risk-low-reward stocks would give our new CUNY employee the best outlook towards retirement.
Although it can be nerve wracking to have a bit more of a “loose” retirement portfolio, judging by stocks yearly growth and the ability to diversify a portfolio and have more control over your future finances, to really have a retirement that doesn’t instantly drain the funds, I would recommend this plan.