library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(sjPlot)
## Learn more about sjPlot with 'browseVignettes("sjPlot")'.
players_stats <- read_csv("player_stats_attrs_s2.csv")
## New names:
## Rows: 355 Columns: 126
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (13): ...1, batter_name, batter_team, pitcher_name, pitcher_team, field... dbl
## (112): pa, ab, hit, rbi, bb, k, 1b, 2b, 3b, hr, fc, dp, BA, OBP, SLG, OP... lgl
## (1): is_active
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
# Clean column names and calculate columns we'll want later
players_stats <- players_stats %>%
clean_names() %>%
select(-starts_with("hm_")) %>%
mutate(
# batting: walk rate and strikeout rate
bb_pa = bb / pa,
k_pa = k / pa,
# batting: rate of types of hit
x1b_h = x1b / hit,
x2b_h = x2b / hit,
x3b_h = x3b / hit,
hr_h = hr / hit,
xbh_h = (x2b + x3b) / hit,
# fielding: rate of hit types allowed per hits, or per BIP
# x1b_h_f= x1b_alwd / hits_alwd,
# x1b_bip_f= x1b_alwd / totl_fields,
# x2b_bip_f= x2b_alwd / totl_fields,
# x3b_bip_f= x3b_alwd / totl_fields,
adv_fields = advances / pa_fielded,
# fielder Manhattan distances from bases
home_dist = abs(location_x - 0) + abs(location_y - 0),
first_dist = abs(location_x - 2) + abs(location_y - 0),
second_dist = abs(location_x - 2) + abs(location_y - 2),
third_dist = abs(location_x - 0) + abs(location_y - 2),
fifth_dist = abs(location_x - 0) + abs(location_y - 4),
zeroth_dist = abs(location_x - 4) + abs(location_y - 0),
)
summary(lm (era ~ control + stuff + guile, data = players_stats))
##
## Call:
## lm(formula = era ~ control + stuff + guile, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4158 -0.7190 -0.0993 0.6076 3.6638
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.7049 0.3507 24.820 < 2e-16 ***
## control -2.1322 0.3614 -5.900 3.62e-08 ***
## stuff -2.7770 0.3435 -8.084 6.45e-13 ***
## guile -1.4378 0.3725 -3.860 0.000186 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.043 on 117 degrees of freedom
## (234 observations deleted due to missingness)
## Multiple R-squared: 0.524, Adjusted R-squared: 0.5118
## F-statistic: 42.93 on 3 and 117 DF, p-value: < 2.2e-16
summary(lm (so9 ~ control + stuff + guile, data = players_stats))
##
## Call:
## lm(formula = so9 ~ control + stuff + guile, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1327 -0.5575 -0.0360 0.5998 2.1223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.9473 0.2972 6.551 1.60e-09 ***
## control 1.1947 0.3063 3.900 0.000161 ***
## stuff 4.9937 0.2911 17.152 < 2e-16 ***
## guile 2.5963 0.3157 8.223 3.09e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8841 on 117 degrees of freedom
## (234 observations deleted due to missingness)
## Multiple R-squared: 0.7833, Adjusted R-squared: 0.7778
## F-statistic: 141 on 3 and 117 DF, p-value: < 2.2e-16
summary(lm (bb9 ~ control + stuff + guile, data = players_stats))
##
## Call:
## lm(formula = bb9 ~ control + stuff + guile, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.25332 -0.48762 -0.07595 0.37931 2.34787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.7435 0.2302 20.610 <2e-16 ***
## control -5.3139 0.2372 -22.405 <2e-16 ***
## stuff 0.4802 0.2254 2.130 0.0353 *
## guile -0.2774 0.2445 -1.135 0.2589
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6845 on 117 degrees of freedom
## (234 observations deleted due to missingness)
## Multiple R-squared: 0.8165, Adjusted R-squared: 0.8118
## F-statistic: 173.5 on 3 and 117 DF, p-value: < 2.2e-16
summary(lm (h9 ~ control + stuff + guile, data = players_stats))
##
## Call:
## lm(formula = h9 ~ control + stuff + guile, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7858 -0.8526 -0.1129 0.6175 4.0456
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.4496 0.4279 31.428 < 2e-16 ***
## control -0.9516 0.4410 -2.158 0.032989 *
## stuff -3.8206 0.4192 -9.115 2.64e-15 ***
## guile -1.7309 0.4546 -3.808 0.000225 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.273 on 117 degrees of freedom
## (234 observations deleted due to missingness)
## Multiple R-squared: 0.4928, Adjusted R-squared: 0.4798
## F-statistic: 37.89 on 3 and 117 DF, p-value: < 2.2e-16
summary(lm (hr9 ~ control + stuff + guile, data = players_stats))
##
## Call:
## lm(formula = hr9 ~ control + stuff + guile, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78499 -0.21249 -0.04499 0.23443 0.91711
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.34381 0.11553 20.287 < 2e-16 ***
## control -0.09378 0.11905 -0.788 0.43244
## stuff -0.74634 0.11316 -6.595 1.29e-09 ***
## guile -0.47886 0.12271 -3.902 0.00016 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3436 on 117 degrees of freedom
## (234 observations deleted due to missingness)
## Multiple R-squared: 0.3645, Adjusted R-squared: 0.3482
## F-statistic: 22.37 on 3 and 117 DF, p-value: 1.62e-11
summary(lm (babip ~ control + stuff + guile, data = players_stats))
##
## Call:
## lm(formula = babip ~ control + stuff + guile, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.074949 -0.016571 -0.002183 0.016591 0.067342
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.311750 0.008771 35.543 < 2e-16 ***
## control -0.009602 0.009038 -1.062 0.29026
## stuff -0.023954 0.008591 -2.788 0.00619 **
## guile -0.004795 0.009317 -0.515 0.60775
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02609 on 117 degrees of freedom
## (234 observations deleted due to missingness)
## Multiple R-squared: 0.07673, Adjusted R-squared: 0.05305
## F-statistic: 3.241 on 3 and 117 DF, p-value: 0.02465
ggplot(players_stats, aes(stuff, babip)) +
geom_point()
## Warning: Removed 234 rows containing missing values (`geom_point()`).
s_ba1 <- summary(lm(ba ~ sight + thwack + ferocity + stealth, data = players_stats))
s_ba1
##
## Call:
## lm(formula = ba ~ sight + thwack + ferocity + stealth, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.115705 -0.018932 -0.001313 0.016254 0.217690
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.115818 0.008097 14.304 < 2e-16 ***
## sight 0.032469 0.008198 3.961 9.98e-05 ***
## thwack 0.077683 0.008216 9.455 < 2e-16 ***
## ferocity 0.112717 0.008200 13.746 < 2e-16 ***
## stealth 0.039579 0.007870 5.029 9.95e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0338 on 229 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.6521, Adjusted R-squared: 0.646
## F-statistic: 107.3 on 4 and 229 DF, p-value: < 2.2e-16
s_ba2 <- summary(lm(ba ~ sight + thwack + ferocity + control + hustle + stealth,
data = players_stats))
s_ba2
##
## Call:
## lm(formula = ba ~ sight + thwack + ferocity + control + hustle +
## stealth, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11091 -0.01784 -0.00089 0.01664 0.21113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.118872 0.009014 13.187 < 2e-16 ***
## sight 0.032956 0.008105 4.066 6.59e-05 ***
## thwack 0.077836 0.008120 9.586 < 2e-16 ***
## ferocity 0.111562 0.008113 13.751 < 2e-16 ***
## control -0.018253 0.007175 -2.544 0.0116 *
## hustle 0.011946 0.007672 1.557 0.1208
## stealth 0.038042 0.007865 4.837 2.43e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0334 on 227 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.6633, Adjusted R-squared: 0.6544
## F-statistic: 74.53 on 6 and 227 DF, p-value: < 2.2e-16
summary(lm(obp ~ sight + thwack + ferocity + stealth, data = players_stats))
##
## Call:
## lm(formula = obp ~ sight + thwack + ferocity + stealth, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.131952 -0.017655 -0.001154 0.015910 0.195323
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.167010 0.007966 20.966 < 2e-16 ***
## sight 0.037142 0.008065 4.605 6.83e-06 ***
## thwack 0.056419 0.008083 6.980 3.15e-11 ***
## ferocity 0.104686 0.008067 12.977 < 2e-16 ***
## stealth 0.040436 0.007742 5.223 3.96e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03326 on 229 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.6059, Adjusted R-squared: 0.599
## F-statistic: 88 on 4 and 229 DF, p-value: < 2.2e-16
summary(lm(slg ~ sight + thwack + ferocity + stealth, data = players_stats))
##
## Call:
## lm(formula = slg ~ sight + thwack + ferocity + stealth, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25690 -0.05107 -0.00685 0.04039 0.62843
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.09851 0.02013 4.895 1.86e-06 ***
## sight 0.07278 0.02038 3.572 0.000432 ***
## thwack 0.16790 0.02042 8.222 1.50e-14 ***
## ferocity 0.29977 0.02038 14.708 < 2e-16 ***
## stealth 0.21795 0.01956 11.141 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08402 on 229 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.7012, Adjusted R-squared: 0.696
## F-statistic: 134.4 on 4 and 229 DF, p-value: < 2.2e-16
summary(lm(bb_pa ~ sight + thwack, data = players_stats))
##
## Call:
## lm(formula = bb_pa ~ sight + thwack, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.052823 -0.009067 -0.000894 0.008600 0.053601
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.059028 0.002939 20.085 < 2e-16 ***
## sight 0.008546 0.003671 2.328 0.0208 *
## thwack -0.023263 0.003605 -6.454 6.34e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01524 on 231 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.1599, Adjusted R-squared: 0.1526
## F-statistic: 21.98 on 2 and 231 DF, p-value: 1.822e-09
summary(lm(k_pa ~ sight + thwack, data = players_stats))
##
## Call:
## lm(formula = k_pa ~ sight + thwack, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16082 -0.02313 -0.00209 0.01874 0.31549
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.333760 0.008129 41.059 < 2e-16 ***
## sight -0.073341 0.010155 -7.222 7.35e-12 ***
## thwack -0.178307 0.009970 -17.885 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04214 on 231 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.6404, Adjusted R-squared: 0.6373
## F-statistic: 205.7 on 2 and 231 DF, p-value: < 2.2e-16
summary(lm (hr_h ~ ferocity, data = players_stats))
##
## Call:
## lm(formula = hr_h ~ ferocity, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.19180 -0.02814 -0.00445 0.02517 0.32018
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.091880 0.007546 12.177 <2e-16 ***
## ferocity 0.110706 0.011772 9.404 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04982 on 232 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.276, Adjusted R-squared: 0.2729
## F-statistic: 88.44 on 1 and 232 DF, p-value: < 2.2e-16
summary(lm (x3b_h ~ stealth, data = players_stats))
##
## Call:
## lm(formula = x3b_h ~ stealth, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11429 -0.01851 -0.00274 0.01558 0.15507
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.003854 0.004484 0.86 0.391
## stealth 0.115243 0.007883 14.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0342 on 232 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.4795, Adjusted R-squared: 0.4773
## F-statistic: 213.7 on 1 and 232 DF, p-value: < 2.2e-16
summary(lm (x2b_h ~ stealth, data = players_stats))
##
## Call:
## lm(formula = x2b_h ~ stealth, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.21002 -0.04314 -0.00012 0.04176 0.37623
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.198795 0.009342 21.28 <2e-16 ***
## stealth 0.301490 0.016425 18.36 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07127 on 232 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.5922, Adjusted R-squared: 0.5905
## F-statistic: 336.9 on 1 and 232 DF, p-value: < 2.2e-16
summary(lm (x1b_h ~ ferocity + stealth, data = players_stats))
##
## Call:
## lm(formula = x1b_h ~ ferocity + stealth, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29234 -0.04978 -0.00405 0.04612 0.31885
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.70166 0.01422 49.357 < 2e-16 ***
## ferocity -0.10450 0.01770 -5.904 1.26e-08 ***
## stealth -0.41608 0.01727 -24.097 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07492 on 231 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.7266, Adjusted R-squared: 0.7243
## F-statistic: 307 on 2 and 231 DF, p-value: < 2.2e-16
summary(lm (xbh_h ~ stealth, data = players_stats))
##
## Call:
## lm(formula = xbh_h ~ stealth, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31631 -0.04476 -0.00328 0.03960 0.28901
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.202650 0.009462 21.42 <2e-16 ***
## stealth 0.416733 0.016635 25.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07218 on 232 degrees of freedom
## (121 observations deleted due to missingness)
## Multiple R-squared: 0.7301, Adjusted R-squared: 0.7289
## F-statistic: 627.6 on 1 and 232 DF, p-value: < 2.2e-16
ggplot(players_stats, aes(magnet, der)) +
geom_point()
## Warning: Removed 122 rows containing missing values (`geom_point()`).
summary(lm (pa_fielded ~ reach + magnet + reflex, data = players_stats))
##
## Call:
## lm(formula = pa_fielded ~ reach + magnet + reflex, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -346.80 -47.49 8.51 50.86 263.07
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 110.038 18.809 5.850 1.68e-08 ***
## reach 286.411 21.652 13.228 < 2e-16 ***
## magnet 2.707 21.043 0.129 0.898
## reflex 2.656 19.610 0.135 0.892
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 94.59 on 229 degrees of freedom
## (122 observations deleted due to missingness)
## Multiple R-squared: 0.4354, Adjusted R-squared: 0.428
## F-statistic: 58.86 on 3 and 229 DF, p-value: < 2.2e-16
summary(lm (der ~ magnet , data = players_stats))
##
## Call:
## lm(formula = der ~ magnet, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32694 -0.02639 0.00343 0.02601 0.11664
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.621676 0.006489 95.81 <2e-16 ***
## magnet 0.164328 0.010736 15.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04854 on 231 degrees of freedom
## (122 observations deleted due to missingness)
## Multiple R-squared: 0.5035, Adjusted R-squared: 0.5014
## F-statistic: 234.3 on 1 and 231 DF, p-value: < 2.2e-16
summary(lm(formula = dp_f ~ reflex + first_dist, data = players_stats))
##
## Call:
## lm(formula = dp_f ~ reflex + first_dist, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.492 -3.064 -0.960 2.015 37.735
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.9773 1.0428 8.609 1.18e-15 ***
## reflex 5.3504 1.1357 4.711 4.26e-06 ***
## first_dist -2.0296 0.1825 -11.121 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.473 on 230 degrees of freedom
## (122 observations deleted due to missingness)
## Multiple R-squared: 0.4025, Adjusted R-squared: 0.3973
## F-statistic: 77.46 on 2 and 230 DF, p-value: < 2.2e-16
summary(lm (adv_fields ~ reflex + home_dist, data = players_stats))
##
## Call:
## lm(formula = adv_fields ~ reflex + home_dist, data = players_stats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033770 -0.008927 -0.001137 0.008578 0.056975
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0504828 0.0026818 18.824 < 2e-16 ***
## reflex -0.0132374 0.0028517 -4.642 5.80e-06 ***
## home_dist -0.0029512 0.0003708 -7.958 7.93e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01374 on 230 degrees of freedom
## (122 observations deleted due to missingness)
## Multiple R-squared: 0.2563, Adjusted R-squared: 0.2498
## F-statistic: 39.63 on 2 and 230 DF, p-value: 1.625e-15