library(ggplot2)
library(readxl)
library(dplyr)
library(knitr)
college_games <- read_excel("College_Football_2025_Season_Games.xlsx")College Football Rankings
Loading the Necessary Packages and Data
Introduction
This project will create a ranking system for college football and rank teams for the 2025 season, using data through week ten. It will also predict the winner of some week eleven games and compare it to the actual outcome.
Ranking System
For my ranking system I created variables that I thought are important and assigned a point value to each one. Then ranked the teams based on who had the most points.
If you win the game:
Win: 1 point
MOV: MOV / 30 * 0.4, 0.4 points max
Opponent’s current record: opponent win% * 0.6, 0.6 points max
Opponent’s rank: (26 - opponent rank) / 25 * 0.85, 0.85 points max
If you lose
- Team that beats you rank: (26 - winner rank) / 25 * 1.2, 1.2 points max
Margin of Victory and Recent Games
I considered margin of victory in my rankings system because being able to beat a team by a large margin demonstrates that a team is better than if the majority of games they play are close. For my ranking system, I assigned teams extra points depending on their margin of victory. I capped the points assigned for margin of victory at 30. Winning by 30 and by 50 is the same amount of extra points, for example. Also I did not consider margin of victory for FCS games.
I gave equal weight to all games in my ranking system regardless of when the game was played.
Rankings vs AP top 25 (Week 11)
Code
# Find each teams overall record
home_record <- college_games %>%
group_by(Home_Team) %>%
mutate(wins = ifelse(Home_Team_Win == 1, 1, 0),
losses = ifelse(Home_Team_Win == 1, 0, 1)) %>%
summarize(wins = sum(wins),
losses = sum(losses)) %>%
rename(Team = Home_Team)
away_record <- college_games %>%
group_by(Away_Team) %>%
mutate(wins = ifelse(Home_Team_Win == 1, 0, 1),
losses = ifelse(Home_Team_Win == 1, 1, 0)) %>%
summarize(wins = sum(wins),
losses = sum(losses)) %>%
rename(Team = Away_Team)
team_record <- bind_rows(home_record, away_record) %>%
group_by(Team) %>%
summarize(Wins = sum(wins),
Losses = sum(losses),
win_pct = Wins / (Wins + Losses))
# Create variables for winner, loser, and the losing team win percentage
college_games <- college_games %>%
mutate(winner = ifelse(Home_Team_Win == 1, Home_Team, Away_Team),
loser = ifelse(Home_Team_Win == 1, Away_Team, Home_Team)) %>%
left_join(team_record %>% select(Team, win_pct), by = c("loser" = "Team")) %>%
rename(opp_win_pct = win_pct)
# create and use the variables for a win, MOV, the losing teams current record, and the losing teams rank (the points a team gets for winning)
win_points <- college_games %>%
mutate(MOV = ifelse(Away_Team_Conference != "FCS",
ifelse(Home_Team_Margin_of_Victory <= 30, abs(Home_Team_Margin_of_Victory), 30), 0),
win_points = 1 + ((MOV / 30) * 0.4),
opponent_rank = as.numeric(ifelse(Home_Team_Win == 1, Away_Team_Rank, Home_Team_Rank)),
opp_record_points = round(opp_win_pct * 0.6, 2),
rank_points = ifelse(!is.na(opponent_rank), (26 - opponent_rank) / 25 * 0.85, 0),
total_win_points = win_points + rank_points + opp_record_points) %>%
group_by(winner) %>%
summarize(overall_win_points = sum(total_win_points)) %>%
arrange(desc(overall_win_points)) %>%
mutate(Team = winner)
# Give points if a team lost to a ranked team
loss_points <- college_games %>%
mutate(winner_rank = as.numeric(ifelse(Home_Team_Win == 1, Home_Team_Rank, Away_Team_Rank)),
rank_loss_points = ifelse(!is.na(winner_rank), (26 - winner_rank) / 25 * 1.2, 0)) %>%
group_by(loser) %>%
summarize(overall_loss_points = sum(rank_loss_points)) %>%
arrange(desc(overall_loss_points)) %>%
mutate(Team = loser)
# combine and sort the overall points each team got
rankings <- left_join(win_points, loss_points, by = "Team") %>%
mutate(overall_loss_points = ifelse(is.na(overall_loss_points), 0, overall_loss_points),
overall_points = overall_win_points + overall_loss_points) %>%
arrange(desc(overall_points)) %>%
mutate(Rank = row_number()) %>%
select(Rank, Team, overall_points)
colnames(rankings) <- c("Rank", "Team", "Points in my System")
rankings %>%
head(n=25) %>%
kable()| Rank | Team | Points in my System |
|---|---|---|
| 1 | Indiana | 15.80667 |
| 2 | Ohio State | 13.74600 |
| 3 | Ole Miss | 13.59600 |
| 4 | Texas | 12.79867 |
| 5 | Texas Tech | 12.78000 |
| 6 | Texas A&M | 12.70267 |
| 7 | Alabama | 12.69533 |
| 8 | Utah | 12.62267 |
| 9 | Vanderbilt | 12.57733 |
| 10 | Oklahoma | 12.16533 |
| 11 | Virginia | 11.94733 |
| 12 | Oregon | 11.86067 |
| 13 | Georgia | 11.85333 |
| 14 | Memphis | 11.76533 |
| 15 | North Texas | 11.70600 |
| 16 | Georgia Tech | 11.46600 |
| 17 | BYU | 11.39533 |
| 18 | Notre Dame | 10.88200 |
| 19 | Louisville | 10.79533 |
| 20 | Michigan | 10.73733 |
| 21 | Illinois | 10.54600 |
| 22 | Tennessee | 10.48400 |
| 23 | South Florida | 10.44400 |
| 24 | USC | 10.43533 |
| 25 | Houston | 10.27800 |
Actual Ap Rankings
Code
include_graphics("top_rank.jpeg")
Notable Results
Underrated: North Texas was ranked 30 in the AP poll and jumped to 15 in my rankings
Overrated: Miami was ranked 18 in the AP poll and dropped to 28 in my rankings
When comparing my rankings to the AP poll, Oregon, Georgia, BYU, and Notre Dame dropped a noticeable amount of spots but all stayed in the top 25
Prediction for Week 11 Games Based on my Rankings
I chose seventeen games with teams ranked in the AP top 25 poll or ranked in the top 25 of my poll. I predicted the winner of each game based on which team ranked higher in my system. My ranking system accurately predicted the winner 76.47% of the time.
Code
library(kableExtra)
home_team <- c("Purdue", "Penn State", "Missouri", "Alabama", "Miss State", "Ole Miss", "Texas Tech", "Notre Dame", "Virginia", "Louisville", "Vanderbilt", "Miami", "USC", "Memphis", "Wisconsin", "Iowa", "South Florida")
away_team <- c("Ohio State", "Indiana", "Texas A&M", "LSU", "Georgia", "The Citadel", "BYU", "Navy", "Wake Forest", "California", "Auburn", "Syracuse", "Northwestern", "Tulane", "Washington", "Oregon", "UTSA")
predicted_winner <- c("Ohio State", "Indiana", "Texas A&M", "Alabama", "Georgia", "Ole Miss", "Texas Tech", "Notre Dame", "Virginia", "Louisville", "Vanderbilt", "Miami", "USC", "Memphis", "Washington", "Oregon", "South Florida")
actual_outcome <- c("Ohio State", "Indiana", "Texas A&M", "Alabama", "Georgia", "Ole Miss", "Texas Tech", "Notre Dame", "Wake Forest", "California", "Vanderbilt", "Miami", "USC", "Tulane", "Wisconsin", "Oregon", "South Florida")
df <- data.frame(home_team, away_team, predicted_winner, actual_outcome)
colnames(df) <- c("Home Team", "Away Team", "Predicted Winner", "Actual Winner")
colors <- c(
"lightgreen", "lightgreen", "lightgreen", "lightgreen",
"lightgreen", "lightgreen", "lightgreen", "lightgreen",
"salmon", "salmon", "lightgreen", "lightgreen",
"lightgreen", "salmon", "salmon", "lightgreen", "lightgreen"
)
df %>%
kable() %>%
kable_paper("striped") %>%
column_spec(4, background = colors)| Home Team | Away Team | Predicted Winner | Actual Winner |
|---|---|---|---|
| Purdue | Ohio State | Ohio State | Ohio State |
| Penn State | Indiana | Indiana | Indiana |
| Missouri | Texas A&M | Texas A&M | Texas A&M |
| Alabama | LSU | Alabama | Alabama |
| Miss State | Georgia | Georgia | Georgia |
| Ole Miss | The Citadel | Ole Miss | Ole Miss |
| Texas Tech | BYU | Texas Tech | Texas Tech |
| Notre Dame | Navy | Notre Dame | Notre Dame |
| Virginia | Wake Forest | Virginia | Wake Forest |
| Louisville | California | Louisville | California |
| Vanderbilt | Auburn | Vanderbilt | Vanderbilt |
| Miami | Syracuse | Miami | Miami |
| USC | Northwestern | USC | USC |
| Memphis | Tulane | Memphis | Tulane |
| Wisconsin | Washington | Washington | Wisconsin |
| Iowa | Oregon | Oregon | Oregon |
| South Florida | UTSA | South Florida | South Florida |