import warnings
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error, mean_absolute_error 
from sklearn import preprocessing 
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import Lasso
from numpy import mean
from numpy import std
from numpy import absolute


warnings.filterwarnings('ignore')
nfl_df = pd.read_csv("nfl2023.csv") 
nfl_df.head()


n = 41
 
# Dropping last n rows using drop
corr_df = nfl_df.drop(nfl_df.tail(n).index,
        inplace = True)


corr_df = nfl_df.drop(columns=['rank', 'player', 'team', 'pod'])
corr_df.head()


corr = corr_df.corr()
corr.style.background_gradient(cmap='coolwarm')


length = 29

x = corr_df.tdpct.values
y = corr_df.rate.values

x = x.reshape(length, 1)
y = y.reshape(length, 1)

regr = LinearRegression().fit(x, y)


x2 = corr_df.aya.values
y2 = corr_df.rate.values

x2 = x2.reshape(length, 1)
y2 = y2.reshape(length, 1)

regr2 = LinearRegression().fit(x2, y2)

x3 = corr_df.anya.values
y3 = corr_df.rate.values

x3 = x3.reshape(length, 1)
y3 = y3.reshape(length, 1)

regr3 = LinearRegression().fit(x3, y3)


print(regr.score(x,y))
print(regr2.score(x2,y2))
print(regr3.score(x3,y3))

0.872830434973256
0.9162543052759315
0.9192305613686622


# plot it as in the example at http://scikit-learn.org/
plt.scatter(x, y,  color='black')
plt.plot(x, regr.predict(x), color='blue', linewidth=3)
plt.title("Quarterback Statistics")
plt.xlabel("Touchdown %")
plt.ylabel("Rating")
plt.show()

plt.scatter(x2, y2,  color='black')
plt.plot(x2, regr2.predict(x2), color='blue', linewidth=3)
plt.xlabel("Adjusted Yards per Attempt")
plt.ylabel("Rating")
plt.show()


plt.scatter(x3, y3,  color='black')
plt.plot(x3, regr3.predict(x3), color='blue', linewidth=3)
plt.xlabel("Adjusted Net Yards per Attempt")
plt.ylabel("Rating")
plt.show()


nfl_df['myrating'] = ((nfl_df['anya']*3) + (nfl_df['aya']*4)  + (nfl_df['tdpct']*4.8)) + 23


nfl_df.head()


plt.rcParams.update({'figure.figsize':(10,8), 'figure.dpi':100})
plt.scatter(nfl_df.player, nfl_df.rate)
plt.scatter(nfl_df.player, nfl_df.myrating)
plt.xticks(rotation=90)
plt.show()


nfl_df['ratingdiff'] = (nfl_df['rate']) - (nfl_df['myrating'])

nfl_df['ratingdiff'] = nfl_df['ratingdiff'].abs()

avg = nfl_df['ratingdiff'].mean()


print(avg)

3.3103448275862077


measurements = pd.read_csv("nfl2023measure.csv") 
measurements.head()


measurements['myrating'] = nfl_df['rate']
measurements1 = measurements.drop('name', axis = 1) 
measurements2 = measurements1.drop('height', axis = 1) 
measurements3 = measurements2.drop('handsize', axis = 1) 
measurements1.head()


measure = measurements1.corr()
measure.style.background_gradient(cmap='coolwarm')


x4 = measurements3.drop('myrating',axis= 1) 
y4 = measurements3['myrating']


X_train, X_test, y_train, y_test = train_test_split( 
    x4, y4, test_size=0.3, random_state=101)


model = LinearRegression()


model.fit(X_train,y_train)

LinearRegression()

LinearRegression()


predictions = model.predict(X_test)


print(predictions)

[89.86870924 93.25767581 89.51779279 91.83190589 95.14567609 91.95723319
 94.54706636 91.83190589 98.76023181]


print( 
  'mean_squared_error : ', mean_squared_error(y_test, predictions)) 
print( 
  'mean_absolute_error : ', mean_absolute_error(y_test, predictions))

mean_squared_error :  141.5633064933608
mean_absolute_error :  11.032362146922777


regr = linear_model.LinearRegression()
regr.fit(x4, y4)

LinearRegression()

LinearRegression()


predicted1 = regr.predict([[227, 25]])


predicted2 = regr.predict([[222, 29]])

predicted3 = regr.predict([[229, 30]])


predicted1 + 11

array([103.23099967])


predicted2 + 11

array([104.95161571])


predicted3 + 11

array([108.00396546])


model = Lasso(alpha=1.0)


# define model
model = Lasso(alpha=1.0)
# define model evaluation method
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate model
scores = cross_val_score(model, x4, y4, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# force scores to be positive
scores = absolute(scores)
print('Mean MAE: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean MAE: 7.698 (3.260)


model = Lasso(alpha=1.0)
# fit model
model.fit(x4, y4)
# define new data
row1 = [227, 25]
row2 = [222, 29]
row3 = [229, 39]
# make a prediction
yhat1 = model.predict([row1])
yhat2 = model.predict([row2])
yhat3 = model.predict([row3])

yhat1 = yhat1 * 1.1
yhat2 = yhat2 * 1.1
yhat3 = yhat3 * 1.1
# summarize prediction
print('Predicted: %.3f' % yhat1)
print('Predicted: %.3f' % yhat2)
print('Predicted: %.3f' % yhat3)

Predicted: 101.531
Predicted: 103.168
Predicted: 113.887

	rank	player	team	age	pod	g	gs	cmp	att	cmppct	...	aya	ypc	ypg	rate	qbr	sk	yds.1	spct	nya	anya
0	1.0	Tua Tagovailoa	MIA	25.0	QB	17.0	17.0	388.0	560.0	69.3	...	8.2	11.9	272.0	101.1	60.8	29.0	171.0	4.9	7.56	7.48
1	2.0	Jared Goff	DET	29.0	QB	17.0	17.0	407.0	605.0	67.3	...	7.7	11.2	269.1	97.9	60.3	30.0	197.0	4.7	6.89	6.99
2	3.0	Dak Prescott	DAL	30.0	QB	17.0	17.0	410.0	590.0	69.5	...	8.2	11.0	265.6	105.9	72.7	39.0	255.0	6.2	6.77	7.28
3	4.0	Josh Allen	BUF	27.0	QB	17.0	17.0	385.0	579.0	66.5	...	7.0	11.2	253.3	92.2	69.6	24.0	152.0	4.0	6.89	6.51
4	5.0	Brock Purdy	SFO	24.0	QB	16.0	16.0	308.0	444.0	69.4	...	9.9	13.9	267.5	113.0	72.8	28.0	153.0	5.9	8.74	9.01

	age	g	gs	cmp	att	cmppct	yds	tf	tdpct	int	...	aya	ypc	ypg	rate	qbr	sk	yds.1	spct	nya	anya
0	25.0	17.0	17.0	388.0	560.0	69.3	4624.0	29.0	5.2	14.0	...	8.2	11.9	272.0	101.1	60.8	29.0	171.0	4.9	7.56	7.48
1	29.0	17.0	17.0	407.0	605.0	67.3	4575.0	30.0	5.0	12.0	...	7.7	11.2	269.1	97.9	60.3	30.0	197.0	4.7	6.89	6.99
2	30.0	17.0	17.0	410.0	590.0	69.5	4516.0	36.0	6.1	9.0	...	8.2	11.0	265.6	105.9	72.7	39.0	255.0	6.2	6.77	7.28
3	27.0	17.0	17.0	385.0	579.0	66.5	4306.0	29.0	5.0	18.0	...	7.0	11.2	253.3	92.2	69.6	24.0	152.0	4.0	6.89	6.51
4	24.0	16.0	16.0	308.0	444.0	69.4	4280.0	31.0	7.0	11.0	...	9.9	13.9	267.5	113.0	72.8	28.0	153.0	5.9	8.74	9.01

	age	g	gs	cmp	att	cmppct	yds	tf	tdpct	int	intpct	1d	succpct	lng	ya	aya	ypc	ypg	rate	qbr	sk	yds.1	spct	nya	anya
age	1.000000	-0.096904	-0.058288	0.092303	0.027248	0.378987	0.068625	0.283090	0.398776	-0.201751	-0.267725	0.090304	0.250194	0.189479	0.114578	0.241700	-0.033522	0.311876	0.373436	0.377266	-0.302338	-0.366297	-0.335697	0.193378	0.293391
g	-0.096904	1.000000	0.947394	0.857407	0.880886	0.162190	0.846586	0.654199	0.274326	0.564133	0.153288	0.814989	0.366283	0.371712	0.354643	0.289473	0.347475	0.228232	0.243155	0.399136	0.398730	0.277875	-0.138835	0.348502	0.299672
gs	-0.058288	0.947394	1.000000	0.898056	0.903782	0.266359	0.886366	0.744135	0.381754	0.564149	0.129231	0.861190	0.449808	0.334011	0.401916	0.356744	0.358319	0.354868	0.337671	0.437934	0.388259	0.295769	-0.170364	0.390225	0.360396
cmp	0.092303	0.857407	0.898056	1.000000	0.981798	0.429316	0.933687	0.819149	0.442877	0.600554	0.106078	0.939518	0.569898	0.348069	0.363722	0.347863	0.242721	0.574668	0.395058	0.577638	0.233116	0.171267	-0.392721	0.415506	0.397321
att	0.027248	0.880886	0.903782	0.981798	1.000000	0.254365	0.896764	0.737374	0.319134	0.625179	0.120960	0.897764	0.437261	0.290577	0.255577	0.235044	0.190616	0.479179	0.258355	0.477424	0.343072	0.296493	-0.293298	0.294886	0.273843
cmppct	0.378987	0.162190	0.266359	0.429316	0.254365	1.000000	0.496097	0.678879	0.780841	0.103741	-0.024563	0.514556	0.838139	0.351031	0.650268	0.671652	0.339429	0.707967	0.810791	0.708133	-0.421986	-0.516058	-0.614579	0.730529	0.741264
yds	0.068625	0.846586	0.886366	0.933687	0.896764	0.496097	1.000000	0.891506	0.602020	0.511032	0.057922	0.987144	0.720030	0.522083	0.652745	0.615508	0.571344	0.701055	0.594801	0.704422	0.121029	0.050318	-0.440615	0.673940	0.645405
tf	0.283090	0.654199	0.744135	0.819149	0.737374	0.678879	0.891506	1.000000	0.861829	0.337682	-0.045441	0.902285	0.799468	0.526854	0.689922	0.725873	0.533740	0.763021	0.791048	0.815154	-0.022791	-0.114336	-0.480900	0.719189	0.756094
tdpct	0.398776	0.274326	0.381754	0.442877	0.319134	0.780841	0.602020	0.861829	1.000000	0.043867	-0.137667	0.606851	0.792002	0.438851	0.789619	0.854679	0.614041	0.761222	0.934254	0.818219	-0.224387	-0.327145	-0.428565	0.794594	0.863398
int	-0.201751	0.564133	0.564149	0.600554	0.625179	0.103741	0.511032	0.337682	0.043867	1.000000	0.828189	0.502828	0.247263	-0.023687	0.051363	-0.155769	0.025601	0.176181	-0.156891	0.120725	0.270975	0.146509	-0.143110	0.110455	-0.089974
intpct	-0.267725	0.153288	0.129231	0.106078	0.120960	-0.024563	0.057922	-0.045441	-0.137667	0.828189	1.000000	0.054950	0.040843	-0.201250	-0.085309	-0.341080	-0.077540	-0.122191	-0.359339	-0.174296	0.089859	-0.045769	0.001506	-0.034404	-0.281243
1d	0.090304	0.814989	0.861190	0.939518	0.897764	0.514556	0.987144	0.902285	0.606851	0.502828	0.054950	1.000000	0.736239	0.522984	0.617692	0.590601	0.517996	0.714296	0.588809	0.711426	0.069109	0.020066	-0.489252	0.653081	0.630445
succpct	0.250194	0.366283	0.449808	0.569898	0.437261	0.838139	0.720030	0.799468	0.792002	0.247263	0.040843	0.736239	1.000000	0.563832	0.814080	0.782768	0.619845	0.809252	0.817766	0.837726	-0.460164	-0.503405	-0.760726	0.899464	0.863322
lng	0.189479	0.371712	0.334011	0.348069	0.290577	0.351031	0.522083	0.526854	0.438851	-0.023687	-0.201250	0.522984	0.563832	1.000000	0.593199	0.603200	0.577296	0.426060	0.537869	0.625337	-0.297472	-0.340659	-0.435324	0.624214	0.628479
ya	0.114578	0.354643	0.401916	0.363722	0.255577	0.650268	0.652745	0.689922	0.789619	0.051363	-0.085309	0.617692	0.814080	0.593199	1.000000	0.955494	0.932350	0.720316	0.871648	0.729782	-0.269983	-0.347630	-0.422742	0.966948	0.946365
aya	0.241700	0.289473	0.356744	0.347863	0.235044	0.671652	0.615508	0.725873	0.854679	-0.155769	-0.341080	0.590601	0.782768	0.603200	0.955494	1.000000	0.865259	0.749503	0.957212	0.781493	-0.276368	-0.324431	-0.415558	0.919160	0.979305
ypc	-0.033522	0.347475	0.358319	0.242721	0.190616	0.339429	0.571344	0.533740	0.614041	0.025601	-0.077540	0.517996	0.619845	0.577296	0.932350	0.865259	1.000000	0.573362	0.697040	0.580806	-0.153090	-0.207109	-0.248473	0.859729	0.824698
ypg	0.311876	0.228232	0.354868	0.574668	0.479179	0.707967	0.701055	0.763021	0.761222	0.176181	-0.122191	0.714296	0.809252	0.426060	0.720316	0.749503	0.573362	1.000000	0.785172	0.788274	-0.270803	-0.249964	-0.603812	0.765344	0.789656
rate	0.373436	0.243155	0.337671	0.395058	0.258355	0.810791	0.594801	0.791048	0.934254	-0.156891	-0.359339	0.588809	0.817766	0.537869	0.871648	0.957212	0.697040	0.785172	1.000000	0.826181	-0.315711	-0.374640	-0.481912	0.868464	0.958765
qbr	0.377266	0.399136	0.437934	0.577638	0.477424	0.708133	0.704422	0.815154	0.818219	0.120725	-0.174296	0.711426	0.837726	0.625337	0.729782	0.781493	0.580806	0.788274	0.826181	1.000000	-0.361001	-0.412892	-0.672507	0.805898	0.842078
sk	-0.302338	0.398730	0.388259	0.233116	0.343072	-0.421986	0.121029	-0.022791	-0.224387	0.270975	0.089859	0.069109	-0.460164	-0.297472	-0.269983	-0.276368	-0.153090	-0.270803	-0.315711	-0.361001	1.000000	0.936531	0.780383	-0.440851	-0.410559
yds.1	-0.366297	0.277875	0.295769	0.171267	0.296493	-0.516058	0.050318	-0.114336	-0.327145	0.146509	-0.045769	0.020066	-0.503405	-0.340659	-0.347630	-0.324431	-0.207109	-0.249964	-0.374640	-0.412892	0.936531	1.000000	0.742603	-0.518535	-0.461820
spct	-0.335697	-0.138835	-0.170364	-0.392721	-0.293298	-0.614579	-0.440615	-0.480900	-0.428565	-0.143110	0.001506	-0.489252	-0.760726	-0.435324	-0.422742	-0.415558	-0.248473	-0.603812	-0.481912	-0.672507	0.780383	0.742603	1.000000	-0.630212	-0.583843
nya	0.193378	0.348502	0.390225	0.415506	0.294886	0.730529	0.673940	0.719189	0.794594	0.110455	-0.034404	0.653081	0.899464	0.624214	0.966948	0.919160	0.859729	0.765344	0.868464	0.805898	-0.440851	-0.518535	-0.630212	1.000000	0.961000
anya	0.293391	0.299672	0.360396	0.397321	0.273843	0.741264	0.645405	0.756094	0.863398	-0.089974	-0.281243	0.630445	0.863322	0.628479	0.946365	0.979305	0.824698	0.789656	0.958765	0.842078	-0.410559	-0.461820	-0.583843	0.961000	1.000000

	rank	player	team	age	pod	g	gs	cmp	att	cmppct	...	ypc	ypg	rate	qbr	sk	yds.1	spct	nya	anya	myrating
0	1.0	Tua Tagovailoa	MIA	25.0	QB	17.0	17.0	388.0	560.0	69.3	...	11.9	272.0	101.1	60.8	29.0	171.0	4.9	7.56	7.48	103.20
1	2.0	Jared Goff	DET	29.0	QB	17.0	17.0	407.0	605.0	67.3	...	11.2	269.1	97.9	60.3	30.0	197.0	4.7	6.89	6.99	98.77
2	3.0	Dak Prescott	DAL	30.0	QB	17.0	17.0	410.0	590.0	69.5	...	11.0	265.6	105.9	72.7	39.0	255.0	6.2	6.77	7.28	106.92
3	4.0	Josh Allen	BUF	27.0	QB	17.0	17.0	385.0	579.0	66.5	...	11.2	253.3	92.2	69.6	24.0	152.0	4.0	6.89	6.51	94.53
4	5.0	Brock Purdy	SFO	24.0	QB	16.0	16.0	308.0	444.0	69.4	...	13.9	267.5	113.0	72.8	28.0	153.0	5.9	8.74	9.01	123.23

	height	weight	age	handsize	myrating
height	1.000000	0.208924	-0.013441	-0.034121	0.033015
weight	0.208924	1.000000	0.099235	0.186289	0.340325
age	-0.013441	0.099235	1.000000	0.065689	0.355507
handsize	-0.034121	0.186289	0.065689	1.000000	0.153581
myrating	0.033015	0.340325	0.355507	0.153581	1.000000

Regression Analysis in the NFL

Importing Packages and Initial View

Clean and Scope Data

Correlation Plot

Linear Regression

Recreating the Rating

Predicting Quarterback Rating With Multiple Linear Regression

Lasso Regression Model

Conclusion

Sources

	name	height	weight	age	handsize
0	Tua Tagovailoa	71	227	25	10.000
1	Jared Goff	76	222	29	9.000
2	Dak Prescott	74	229	30	10.000
3	Josh Allen	77	237	28	10.125
4	Brock Purdy	73	220	24	9.250