Fixed-Effects Regressions: An Exploration of PGA Tour Panel Data

Image for post
Image for post

Preface

Data Curation

Image for post
Image for post
results_list = []
for tourney in tourney_list:
sel = Select(dropdown)
sel.select_by_visible_text(f'{tourney}')
time.sleep(5)
years = driver.find_elements_by_css_selector("text.yearoptions")
for year in years:
year.click()
time.sleep(5)
graph = driver.find_element_by_css_selector("div.table")
rows = graph.find_elements_by_class_name("datarow")
i = 0
e = 0
v = 0
scorelist = []
for row in rows:
player_dict = {}
player_dict["tournament"] = tourney
player_dict["year"] = year.text

try:
golfer = row.find_element_by_id("col_text1").text
except:
golfer = ''
player_dict["golfer"] = golfer

The Data

# creating columns for each course
for course in courses:
df1[f'{course}'] = 0

# populating each course column with a 1 for its respective course
for x in courses:
df1.loc[df1.course == f'{x}', f'{x}'] = 1
# creating columns for each year
for year in years:
df2[year] = 0

# populating each year column with a 1 for its respective year
for x in years:
df2.loc[df2.year == x, x] = 1

Models

dta2 = pd.read_csv("panel_data_timeandcourse.csv")
dta2['ML_group'] = np.random.randint(100,size = dta2.shape[0])
dta2 = dta2.sort_values(by='ML_group')
inx_train2 = dta2.ML_group<80                     
inx_valid2 = (dta2.ML_group<90)&(dta2.ML_group>=80)
inx_test2 = (dta2.ML_group>=90)
Y_train2 = dta2.score[inx_train2].to_list()
Y_valid2 = dta2.score[inx_valid2].to_list()
Y_test2 = dta2.score[inx_test2].to_list()
X_train2 = dta2.loc[inx_train2, ['sg_putting', 'sg_arg', 'sg_approach', 'sg_tee', 'Muirfield Village GC', 'Muirfield Village Golf Club', 'TPC Louisiana', 'Sherwood Country Club', 'Sedgefield CC', ....... '2016', '2017', '2018', '2019', '2020',
'2021']]
from sklearn import linear_model# model declaration
model = linear_model.LinearRegression()
# training
result3 = model.fit(X_train2, Y_train2)
result3.predict(X_test2)# prediction
dta2['score_hat'] = np.concatenate(
[result3.predict(X_train2),
result3.predict(X_valid2),
result3.predict(X_test2)]
).round().astype(int)
# confusion matrix
dta2['result'] = 0
results3 = dta2.loc[inx_valid2].result = dta2.loc[inx_valid2].apply(lambda x: confusion(x['score'], x['score_hat']), axis=1)
def confusion(x, y):
if x == y:
z = 'Exact'
elif (x == y-1) | (x == y+1) == True:
z = '1 off'
elif (x == y-2) | (x == y+2) == True:
z = '2 off'
elif (x == y-3) | (x == y+3) == True:
z = '3 off'
elif (x == y-4) | (x == y+4) == True:
z = '4 off'
elif (x == y-5) | (x == y+5) == True:
z = '5 off'
elif (x == y-6) | (x == y+6) == True:
z = '6 off'
elif (x == y-7) | (x == y+7) == True:
z = '7 off'
elif (x == y-8) | (x == y+8) == True:
z = '8 off'
elif (x == y-9) | (x == y+9) == True:
z = '9 off'
elif (x == y-10) | (x == y+10) == True:
z = '10 off'
elif (x == y-11) | (x == y+11) == True:
z = '11 off'
elif (x == y-12) | (x == y+12) == True:
z = '12 off'
elif (x > y-16) & (x < y+16) == True:
z = '13-15 off'
elif (x > y-20) & (x < y+20) == True:
z = '16-19 off'
elif (x > y-26) & (x < y+26) == True:
z = '20-25 off'
else:
z = '26+ off'
return z
In [45]: results3.value_counts(normalize=True)
Out[45]:
1 off 0.290673
2 off 0.219722
Exact 0.152012
3 off 0.137517
4 off 0.082205
5 off 0.050734
6 off 0.025367
7 off 0.016975
8 off 0.007820
9 off 0.005722
10 off 0.003052
11 off 0.002670
13-15 off 0.002479
12 off 0.001907
16-19 off 0.000572
20-25 off 0.000572
In [53]: results3.value_counts(normalize=True)
Out[53]:
1 off 0.197597
2 off 0.175854
3 off 0.139424
Exact 0.108716
4 off 0.104711
5 off 0.082777
6 off 0.054358
7 off 0.046920
8 off 0.026702
9 off 0.021362
10 off 0.015068
13-15 off 0.008964
11 off 0.008201
12 off 0.005913
16-19 off 0.003052
20-25 off 0.000381

Next Steps

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store