Rstudio code for MWP
You can read about this project at
https://comfortablynumb-ers.blogspot.com/2020/02/win-probability-analysis-part-1-mean.html
follow me at @kevgk2 on Twitter
check out more rstudio guides and nflscrapR links at the bottom
#legend:
#pbp: play by play
#WP: win probability
#MWP: average win probability
#WPE: win probability expected wins
#pyth: pythagorean wins
#install packages
install.packages("nflscrapr")
install.packages("tidyverse")
install.packages("lubridate")
install.packages("na.tools")
install.packages("ggimage")
#load libraries
library(nflscrapR)
library(tidyverse)
library(lubridate)
library(na.tools)
library(ggimage)
#determining the mean WP for a particular game
#scrape games
week_14_games <- scrape_game_ids(2019, weeks = 14)
#pull play by play for specific game
Gb_Was_PbP <- week_14_games %>%
filter(home_team == "GB") %>%
pull(game_id) %>%
scrape_json_play_by_play()
#omit na and select WPs
Gb_Was_WP<-Gb_Was_PbP %>%
dplyr::select(home_wp, away_wp) %>%
na.omit()
#determing mean and sum, put in displayable table
table(mean(Gb_Was_WP$home_wp),sum(Gb_Was_WP$home_wp))
#Creating WP analysis project
#scrape season play by play data
pbp <- read_csv(url("https://github.com/ryurko/nflscrapR-data/raw/master/play_by_play_data/regular_season/reg_pbp_2019.csv"))
#omit NA values of selection of specific columns. if you omit NA of every column, you lose A LOT of relevant data, since most of the NA data is in classifier variable columns like "if punt"
subpbp<-na.omit(dplyr::select(pbp, game_id, home_team, away_team, game_date, home_wp, away_wp))
#create schedule for later use. this finds every unique game
schedule<-unique(dplyr::select(subpbp, game_id, home_team, away_team, game_date))
#create WP stats per team per game ID, result is an away team MWP and home team MWP
wpreport<- subpbp %>%
group_by(game_id) %>%
summarize(mean_away = mean(away_wp), mean_home = mean(home_wp))
#merge the WP stats to the schedule
wpreport_total<-merge(schedule, wpreport)
#create expected season values for away teams, grouping by team, arranging by descending values
expectation_away<-wpreport_total %>%
group_by(away_team) %>%
summarize(exp = sum(mean_away)) %>%
arrange(desc(exp))
# repeat for home teams
expectation_home<-wpreport_total %>%
group_by(home_team) %>%
summarize(exp = sum(mean_home)) %>%
arrange(desc(exp))
#rename columns
colnames(expectation_home) <- c("team","exp1")
colnames(expectation_away) <- c("team","exp2")
#create total expected wins by adding away and home WPE for each team
totalexp<-transform(merge(expectation_home, expectation_away, by = "team"), total_expected_wins = exp1 + exp2)
#load up the data from footballoutsiders.com, mine was saved in a csv file
nfl2019results <- read.csv("~/wp/nfl2019results.csv", header=FALSE)
colnames(nfl2019results)<-c("team","wins","dvoa","pyth")
#merge other expectation values
totalexp<-merge(totalexp, nfl2019results, by = c("team","wins"))
View(totalexp)
#create regression model between actual wins and WPE
wp2019 <- lm(wins ~ wp.expected, data = expwins)
#view regression model
summary(wp2019)
#regression for wins and dvoa wins
dvoa2019 <- lm(wins ~ expwins$`dvoa wins`, data = expwins)
summary(dvoa2019)
#regression for pythagorean wins
pyth2019 <- lm(wins ~ pyth, data = expwins)
summary(pyth2019)
#export dataframe into a csv file
write.csv(expwins, "expwins")
#this section is about plotting
#plot from expwins data, aes() is just necessary commonplace.
#x variables is expected wins from WP, y is actual wins
#geom_text provides team names
#geom_smooth provides a line, the typical least squares regression line
ggplot(expwins, aes(x=wp.expected, y=wins)) +
geom_text(aes(label=team)) +
geom_smooth(method = "lm")
#create the residuals between actual wins and expected
expwins<-transform(expwins, wpresidual = wins - wp.expected)
#this is for using the NFL logos in plotting
nfl_logos_df <- read_csv("https://raw.githubusercontent.com/statsbylopez/BlogPosts/master/nfl_teamlogos.csv")
#create a new dataframe merging expected wins data and the logos
#I just called it chart b/c I ran out of names
chart <- expwins %>% left_join(nfl_logos_df, by = c("team" = "team_code"))
#create plot of WPE versus actual
#geom image adds the logos
#geom smooth is the least squared line
#geom abline adds a particular line
#creating a histogram of ranking residuals
#this ranks the residual values of WPE
expwins<-arrange(expwins, wpresid)
#this sets the residual values of WPE so when we plot, it does not plot them alphabetically but a set order
expwins$team <- factor(expwins$team,levels=expwins$team)
#graphing the histogram. this uses the color dataframe from the @LeeSharpeNFL guide
ggplot(expwins,aes(x=team,y=wpresid)) +
scale_color_manual(values=expwins$color) +
scale_fill_manual(values=expwins$color) +
geom_bar(stat="identity",aes(color=team,fill=team),show.legend=FALSE) +
xlab("") +
ylab("Wins minus expected") +
labs(title="Wins over/under expectation per meanWP",
caption="graph by nflscrapr and @kevgk2")
#some kind of importing error left a column named "."
expwins<-transform(expwins, dvoaresid = .)
#density plots of residuals for the three win expectation metrics
#xlim provides a specific range of x values
#geom_text makes specific text labels
ggplot(expwins, aes(x=.))+
xlim(-5,5)+
geom_density(color = "green")+
geom_text(label = "dvoa", color = "green", x= 1, y= 0.4)+
theme_light()+
geom_density(aes(x=wpresid), color = "blue") +
geom_text(label = "WP", color = "blue", x= -2, y= 0.15)+
geom_density(aes(x=pythresid), color = "red")+
geom_text(label = "pythag", color = "red", x= 1.25, y= 0.21)+
labs(
title="Smoothed density of residuals for predicted wins vs actual",
caption="by nflscrapr and @kevgk2")
#calculating R^2 from wins = a + b(expected wins) where a = 0, and b = 1
#this lets us interpret the error better than regression
1 - (sum((check$wp.expected - check$wins)^2)/(sum((check$wins - mean(check$wins))^2)))
[1] 0.608686
1 - (sum((check$dvoa.wins - check$wins)^2)/(sum((check$wins - mean(check$wins))^2)))
[1] 0.782159
1 - (sum((check$pyth - check$wins)^2)/(sum((check$wins - mean(check$wins))^2)))
[1] 0.7399274
#looking at predictability for first 8 weeks predicting following 9
#same method as above for WPE but filter first and do it twice
#filtering by game date to collect first 8 weeks of games
wpreport_total_week8<-wpreport_total %>%
select(game_id, home_team, away_team, game_date, mean_away, mean_home) %>%
filter(game_date < as.Date("2019-11-03"))
#collecting final 9 weeks
wpreport_total_week17<-wpreport_total %>%
select(game_id, home_team, away_team, game_date, mean_away, mean_home) %>%
filter(game_date >= as.Date("2019-11-03"))
#create expected season values for away teams, grouping by team, arranging by descending values
expectation_away_week8<-wpreport_total_week8 %>%
group_by(away_team) %>%
summarize(exp = sum(mean_away)) %>%
arrange(desc(exp))
expectation_home_week8<-wpreport_total_week8 %>%
group_by(home_team) %>%
summarize(exp = sum(mean_home)) %>%
arrange(desc(exp))
colnames(expectation_home_week8) <- c("team","exp1")
colnames(expectation_away_week8) <- c("team","exp2")
#create total expected wins by adding away and home for each team
week_exp<-transform(merge(expectation_home_week8, expectation_away_week8, by = "team"), total_expected_wins_week8 = exp1 + exp2)
expectation_away_week17<-wpreport_total_week17 %>%
group_by(away_team) %>%
summarize(exp = sum(mean_away)) %>%
arrange(desc(exp))
expectation_home_week17<-wpreport_total_week17 %>%
group_by(home_team) %>%
summarize(exp = sum(mean_home)) %>%
arrange(desc(exp))
colnames(expectation_home_week17) <- c("team","exp1")
colnames(expectation_away_week17) <- c("team","exp2")
week_exp17<-transform(merge(expectation_home_week17, expectation_away_week17, by = "team"), total_expected_wins_week17 = exp1 + exp2)
#merge the expected wins for first 8 and last 9 wins
week_exp<-merge(week_exp, select(week_exp17, team, total_expected_wins_week17), by = "team")
#find correlation between expectations
cor(week_exp$total_expected_wins_week8,week_exp$total_expected_wins_week17)
[1] 0.1365074
#this section is about tweaking the expected wins per first 8 weeks model, but instead finds the total MWP for first 8 weeks and following 9 to fix inconsistent bye week issues
awaywp_week8<- subpbp %>%
filter(game_date < as.Date("2019-11-03")) %>%
group_by(away_team) %>%
summarize(sumaway = sum(away_wp), countaway=length(away_wp))
homewp_week8<- subpbp %>%
filter(game_date < as.Date("2019-11-03")) %>%
group_by(home_team) %>%
summarize(sumhome = sum(home_wp), counthome=length(home_wp))
awaywp_week17<- subpbp %>%
filter(game_date >= as.Date("2019-11-03")) %>%
group_by(away_team) %>%
summarize(sumaway = sum(away_wp), countaway=length(away_wp))
homewp_week17<- subpbp %>%
filter(game_date >= as.Date("2019-11-03")) %>%
group_by(home_team) %>%
summarize(sumhome = sum(home_wp), counthome=length(home_wp))
awp_week8<-merge(awaywp_week8, homewp_week8, by.x = "away_team", by.y = "home_team")
awp_week8<-transform(awp_week8, total= (sumaway+sumhome)/(countaway+counthome))
awp_week17<-merge(awaywp_week17, homewp_week17, by.x = "away_team", by.y = "home_team")
awp_week17<-transform(awp_week17, total= (sumaway+sumhome)/(countaway+counthome))
awp_week8<-select(awp_week8, away_team, total)
awp_week17<-select(awp_week17, away_team, total)
colnames(awp_week8)<-c("team","awp_week8")
colnames(awp_week17)<-c("team","awp_week17")
awp_perweek<-merge(awp_week8,awp_week17, by = "team")
cor(awp_perweek$awp_week8, awp_perweek$awp_week17)
[1] 0.2302562
#merge logos for plotting
awp_perweek<-merge(awp_perweek, nfl_logos_df, by.x = "team", by.y = "team_code")
#create a plot of first 8 weeks and last 9
ggplot(awp_perweek, aes(x = awp_week8, y = awp_week17)) +
geom_image(aes(image = url), size = 0.05) +
labs(x = "meanWP through first 8 weeks",
y = "meanWP through first 17 weeks",
caption = "graph by nflscrapr and @kevgk2",
title = "Mean WP for first 8 weeks and following 9",
subtitle = "2019") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 14),
plot.caption = element_text(size = 12))+
geom_abline(slope = 1, intercept = 0)
https://gist.github.com/guga31bb/5634562c5a2a7b1e9961ac9b6c568701
https://github.com/leesharpe/nfldata/blob/master/RSTUDIO-INTRO.md
https://github.com/leesharpe/nfldata/blob/master/WPCHARTS.md
https://github.com/leesharpe/nfldata/blob/master/COLORS_IN_R.md
https://github.com/maksimhorowitz/nflscrapR/blob/master/R/ep_wp_calculator.R
https://github.com/maksimhorowitz/nflscrapR
https://www.dropbox.com/s/5k2wbnroyn8i1ux/Getting%20Started%20With%20R%20for%20NFL.pdf?dl=0
https://comfortablynumb-ers.blogspot.com/2020/02/win-probability-analysis-part-1-mean.html
follow me at @kevgk2 on Twitter
check out more rstudio guides and nflscrapR links at the bottom
#legend:
#pbp: play by play
#WP: win probability
#MWP: average win probability
#WPE: win probability expected wins
#pyth: pythagorean wins
#install packages
install.packages("nflscrapr")
install.packages("tidyverse")
install.packages("lubridate")
install.packages("na.tools")
install.packages("ggimage")
#load libraries
library(nflscrapR)
library(tidyverse)
library(lubridate)
library(na.tools)
library(ggimage)
#determining the mean WP for a particular game
#scrape games
week_14_games <- scrape_game_ids(2019, weeks = 14)
#pull play by play for specific game
Gb_Was_PbP <- week_14_games %>%
filter(home_team == "GB") %>%
pull(game_id) %>%
scrape_json_play_by_play()
#omit na and select WPs
Gb_Was_WP<-Gb_Was_PbP %>%
dplyr::select(home_wp, away_wp) %>%
na.omit()
#determing mean and sum, put in displayable table
table(mean(Gb_Was_WP$home_wp),sum(Gb_Was_WP$home_wp))
#Creating WP analysis project
#scrape season play by play data
pbp <- read_csv(url("https://github.com/ryurko/nflscrapR-data/raw/master/play_by_play_data/regular_season/reg_pbp_2019.csv"))
#omit NA values of selection of specific columns. if you omit NA of every column, you lose A LOT of relevant data, since most of the NA data is in classifier variable columns like "if punt"
subpbp<-na.omit(dplyr::select(pbp, game_id, home_team, away_team, game_date, home_wp, away_wp))
#create schedule for later use. this finds every unique game
schedule<-unique(dplyr::select(subpbp, game_id, home_team, away_team, game_date))
#create WP stats per team per game ID, result is an away team MWP and home team MWP
wpreport<- subpbp %>%
group_by(game_id) %>%
summarize(mean_away = mean(away_wp), mean_home = mean(home_wp))
#merge the WP stats to the schedule
wpreport_total<-merge(schedule, wpreport)
#create expected season values for away teams, grouping by team, arranging by descending values
expectation_away<-wpreport_total %>%
group_by(away_team) %>%
summarize(exp = sum(mean_away)) %>%
arrange(desc(exp))
# repeat for home teams
expectation_home<-wpreport_total %>%
group_by(home_team) %>%
summarize(exp = sum(mean_home)) %>%
arrange(desc(exp))
#rename columns
colnames(expectation_home) <- c("team","exp1")
colnames(expectation_away) <- c("team","exp2")
#create total expected wins by adding away and home WPE for each team
totalexp<-transform(merge(expectation_home, expectation_away, by = "team"), total_expected_wins = exp1 + exp2)
#load up the data from footballoutsiders.com, mine was saved in a csv file
nfl2019results <- read.csv("~/wp/nfl2019results.csv", header=FALSE)
colnames(nfl2019results)<-c("team","wins","dvoa","pyth")
#merge other expectation values
totalexp<-merge(totalexp, nfl2019results, by = c("team","wins"))
View(totalexp)
#create regression model between actual wins and WPE
wp2019 <- lm(wins ~ wp.expected, data = expwins)
#view regression model
summary(wp2019)
Call:
lm(formula = wins ~ wp.expected, data = expwins)
Residuals:
Min 1Q Median 3Q Max
-4.3807 -0.9011 -0.1003 0.7087 3.1848
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -7.3637 1.4688 -5.013 2.24e-05
wp.expected 1.9092 0.1806 10.570 1.24e-11
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.5 on 30 degrees of freedom
Multiple R-squared: 0.7883, Adjusted R-squared: 0.7813
F-statistic: 111.7 on 1 and 30 DF, p-value: 1.24e-11
#regression for wins and dvoa wins
dvoa2019 <- lm(wins ~ expwins$`dvoa wins`, data = expwins)
summary(dvoa2019)
Call:
lm(formula = wins ~ dvoa.wins, data = expwins)
Residuals:
Min 1Q Median 3Q Max
-3.1793 -0.3988 -0.0447 0.7547 3.1517
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.28768 0.83078 -0.346 0.732
dvoa.wins 1.02384 0.09825 10.420 1.74e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.517 on 30 degrees of freedom
Multiple R-squared: 0.7835, Adjusted R-squared: 0.7763
F-statistic: 108.6 on 1 and 30 DF, p-value: 1.738e-11
#regression for pythagorean wins
pyth2019 <- lm(wins ~ pyth, data = expwins)
summary(pyth2019)
Call:
lm(formula = wins ~ pyth, data = expwins)
Residuals:
Min 1Q Median 3Q Max
-2.9218 -0.9419 0.2260 0.8704 3.2110
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.3037 0.9335 -0.325 0.747
pyth 1.0299 0.1112 9.264 2.63e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.659 on 30 degrees of freedom
Multiple R-squared: 0.741, Adjusted R-squared: 0.7323
F-statistic: 85.82 on 1 and 30 DF, p-value: 2.631e-10
write.csv(expwins, "expwins")
#this section is about plotting
#plot from expwins data, aes() is just necessary commonplace.
#x variables is expected wins from WP, y is actual wins
#geom_text provides team names
#geom_smooth provides a line, the typical least squares regression line
ggplot(expwins, aes(x=wp.expected, y=wins)) +
geom_text(aes(label=team)) +
geom_smooth(method = "lm")
#create the residuals between actual wins and expected
expwins<-transform(expwins, wpresidual = wins - wp.expected)
#this is for using the NFL logos in plotting
nfl_logos_df <- read_csv("https://raw.githubusercontent.com/statsbylopez/BlogPosts/master/nfl_teamlogos.csv")
#create a new dataframe merging expected wins data and the logos
#I just called it chart b/c I ran out of names
chart <- expwins %>% left_join(nfl_logos_df, by = c("team" = "team_code"))
#create plot of WPE versus actual
#geom image adds the logos
#geom smooth is the least squared line
#geom abline adds a particular line
ggplot(chart, aes(x = wp.expected, y = wins)) +
geom_image(aes(image = url), size = 0.05) +
labs(x = "meanWP Expected Wins",
y = "Actual Wins",
caption = "graph by nflscrapr and @kevgk2",
title = "Wins vs Expectation (meanWP)",
subtitle = "2019") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 14),
plot.caption = element_text(size = 12)) +
geom_smooth(method = "lm") +
geom_abline(slope = 1, intercept = 0)
#creating a histogram of ranking residuals
#this ranks the residual values of WPE
expwins<-arrange(expwins, wpresid)
#this sets the residual values of WPE so when we plot, it does not plot them alphabetically but a set order
expwins$team <- factor(expwins$team,levels=expwins$team)
#graphing the histogram. this uses the color dataframe from the @LeeSharpeNFL guide
ggplot(expwins,aes(x=team,y=wpresid)) +
scale_color_manual(values=expwins$color) +
scale_fill_manual(values=expwins$color) +
geom_bar(stat="identity",aes(color=team,fill=team),show.legend=FALSE) +
xlab("") +
ylab("Wins minus expected") +
labs(title="Wins over/under expectation per meanWP",
caption="graph by nflscrapr and @kevgk2")
#some kind of importing error left a column named "."
expwins<-transform(expwins, dvoaresid = .)
#density plots of residuals for the three win expectation metrics
#xlim provides a specific range of x values
#geom_text makes specific text labels
ggplot(expwins, aes(x=.))+
xlim(-5,5)+
geom_density(color = "green")+
geom_text(label = "dvoa", color = "green", x= 1, y= 0.4)+
theme_light()+
geom_density(aes(x=wpresid), color = "blue") +
geom_text(label = "WP", color = "blue", x= -2, y= 0.15)+
geom_density(aes(x=pythresid), color = "red")+
geom_text(label = "pythag", color = "red", x= 1.25, y= 0.21)+
labs(
title="Smoothed density of residuals for predicted wins vs actual",
caption="by nflscrapr and @kevgk2")
#calculating R^2 from wins = a + b(expected wins) where a = 0, and b = 1
#this lets us interpret the error better than regression
1 - (sum((check$wp.expected - check$wins)^2)/(sum((check$wins - mean(check$wins))^2)))
[1] 0.608686
1 - (sum((check$dvoa.wins - check$wins)^2)/(sum((check$wins - mean(check$wins))^2)))
[1] 0.782159
1 - (sum((check$pyth - check$wins)^2)/(sum((check$wins - mean(check$wins))^2)))
[1] 0.7399274
#looking at predictability for first 8 weeks predicting following 9
#same method as above for WPE but filter first and do it twice
#filtering by game date to collect first 8 weeks of games
wpreport_total_week8<-wpreport_total %>%
select(game_id, home_team, away_team, game_date, mean_away, mean_home) %>%
filter(game_date < as.Date("2019-11-03"))
#collecting final 9 weeks
wpreport_total_week17<-wpreport_total %>%
select(game_id, home_team, away_team, game_date, mean_away, mean_home) %>%
filter(game_date >= as.Date("2019-11-03"))
#create expected season values for away teams, grouping by team, arranging by descending values
expectation_away_week8<-wpreport_total_week8 %>%
group_by(away_team) %>%
summarize(exp = sum(mean_away)) %>%
arrange(desc(exp))
expectation_home_week8<-wpreport_total_week8 %>%
group_by(home_team) %>%
summarize(exp = sum(mean_home)) %>%
arrange(desc(exp))
colnames(expectation_home_week8) <- c("team","exp1")
colnames(expectation_away_week8) <- c("team","exp2")
#create total expected wins by adding away and home for each team
week_exp<-transform(merge(expectation_home_week8, expectation_away_week8, by = "team"), total_expected_wins_week8 = exp1 + exp2)
expectation_away_week17<-wpreport_total_week17 %>%
group_by(away_team) %>%
summarize(exp = sum(mean_away)) %>%
arrange(desc(exp))
expectation_home_week17<-wpreport_total_week17 %>%
group_by(home_team) %>%
summarize(exp = sum(mean_home)) %>%
arrange(desc(exp))
colnames(expectation_home_week17) <- c("team","exp1")
colnames(expectation_away_week17) <- c("team","exp2")
week_exp17<-transform(merge(expectation_home_week17, expectation_away_week17, by = "team"), total_expected_wins_week17 = exp1 + exp2)
#merge the expected wins for first 8 and last 9 wins
week_exp<-merge(week_exp, select(week_exp17, team, total_expected_wins_week17), by = "team")
#find correlation between expectations
cor(week_exp$total_expected_wins_week8,week_exp$total_expected_wins_week17)
[1] 0.1365074
#this section is about tweaking the expected wins per first 8 weeks model, but instead finds the total MWP for first 8 weeks and following 9 to fix inconsistent bye week issues
awaywp_week8<- subpbp %>%
filter(game_date < as.Date("2019-11-03")) %>%
group_by(away_team) %>%
summarize(sumaway = sum(away_wp), countaway=length(away_wp))
homewp_week8<- subpbp %>%
filter(game_date < as.Date("2019-11-03")) %>%
group_by(home_team) %>%
summarize(sumhome = sum(home_wp), counthome=length(home_wp))
awaywp_week17<- subpbp %>%
filter(game_date >= as.Date("2019-11-03")) %>%
group_by(away_team) %>%
summarize(sumaway = sum(away_wp), countaway=length(away_wp))
homewp_week17<- subpbp %>%
filter(game_date >= as.Date("2019-11-03")) %>%
group_by(home_team) %>%
summarize(sumhome = sum(home_wp), counthome=length(home_wp))
awp_week8<-merge(awaywp_week8, homewp_week8, by.x = "away_team", by.y = "home_team")
awp_week8<-transform(awp_week8, total= (sumaway+sumhome)/(countaway+counthome))
awp_week17<-merge(awaywp_week17, homewp_week17, by.x = "away_team", by.y = "home_team")
awp_week17<-transform(awp_week17, total= (sumaway+sumhome)/(countaway+counthome))
awp_week8<-select(awp_week8, away_team, total)
awp_week17<-select(awp_week17, away_team, total)
colnames(awp_week8)<-c("team","awp_week8")
colnames(awp_week17)<-c("team","awp_week17")
awp_perweek<-merge(awp_week8,awp_week17, by = "team")
cor(awp_perweek$awp_week8, awp_perweek$awp_week17)
[1] 0.2302562
#merge logos for plotting
awp_perweek<-merge(awp_perweek, nfl_logos_df, by.x = "team", by.y = "team_code")
#create a plot of first 8 weeks and last 9
ggplot(awp_perweek, aes(x = awp_week8, y = awp_week17)) +
geom_image(aes(image = url), size = 0.05) +
labs(x = "meanWP through first 8 weeks",
y = "meanWP through first 17 weeks",
caption = "graph by nflscrapr and @kevgk2",
title = "Mean WP for first 8 weeks and following 9",
subtitle = "2019") +
theme_bw() +
theme(axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 14),
plot.caption = element_text(size = 12))+
geom_abline(slope = 1, intercept = 0)
https://gist.github.com/guga31bb/5634562c5a2a7b1e9961ac9b6c568701
https://github.com/leesharpe/nfldata/blob/master/RSTUDIO-INTRO.md
https://github.com/leesharpe/nfldata/blob/master/WPCHARTS.md
https://github.com/leesharpe/nfldata/blob/master/COLORS_IN_R.md
https://github.com/maksimhorowitz/nflscrapR/blob/master/R/ep_wp_calculator.R
https://github.com/maksimhorowitz/nflscrapR
https://www.dropbox.com/s/5k2wbnroyn8i1ux/Getting%20Started%20With%20R%20for%20NFL.pdf?dl=0
Comments
Post a Comment