Week 10 figures - Lectures 17 and 18

generalized linear model
logistic regression
Author
Affiliation
Lecture date

March 10, 2026

Modified

March 12, 2026

Code
# cleaning
library(tidyverse)

# visualization
theme_set(theme_bw() +
            theme(panel.grid = element_blank(),
                  axis.text = element_text(size = 18),
                  axis.title = element_text(size = 18),
                  text = element_text(family = "Lato"),
                  strip.text = element_text(size = 16),
                  strip.background = element_rect(fill = "#FFFFFF")))
library(patchwork)
library(ggeffects)
library(flextable)
library(GGally)
library(equatiomatic)

# data
library(palmerpenguins)

# analysis
library(car)
library(performance)
library(broom)
library(DHARMa)
library(MuMIn)
library(lmtest)
library(emmeans)

bats <- read_csv(here::here("data", 
                            "Cleaned Inf MYLU Band Recap Data for Ecotraps Analysis.csv")) 

math notation

simple linear regression

\[ E[y_i] = a + bx_i \]

\[ var[y_i] = s^2 \]

generalized form:

\[ E[y_i] = a + bx_i \]

\[ var[y_i] = v(E[y_i]) \]

GLM structure using Gaussian example

random component

\[ Y_i \sim N(\mu_i, \sigma^2) \]

\[ Y_i \sim Normal(\mu_i, \sigma^2) \]

systematic component

\[ \eta_i = \sum^{p-1}_{n = 0}\beta_jx_{ij} \]

\[ \mu_i = \beta_0 + \beta x_i \]

GLM structure using binary example

random

\[ E(Y) = p \]

systematic

inverse logit

\[ \frac{1}{1+e^{-x}} \]

model equation

\[ log(\frac{p}{1-p}) = 13.0974 - 0.6741 \times distance \]

bat equations

\[ log(\frac{p}{1-p}) = 5.9443 - 0.7178x \]

\[ log(\frac{p}{1-p}) = 0.9239 - 0.2039x \]

binomial/bernoulli example

Code
set.seed(666)
lizard <- tibble(
  pushup = c(rep(1, 30), rep(0, 30)),
  distance = c(rnorm(n = 20, mean = 10, sd = 2), 
               rnorm(n = 20, mean = 20, sd = 2), 
               rnorm(n = 20, mean = 30, sd = 2))
) %>% 
  mutate(distance = round(distance, 1))

slice_sample(lizard, n = 10)
# A tibble: 10 × 2
   pushup distance
    <dbl>    <dbl>
 1      1      9.3
 2      1     11.7
 3      0     28.6
 4      0     33.7
 5      0     20.5
 6      0     28.8
 7      0     18.8
 8      1     14.3
 9      1     17.7
10      1     10.7
Code
ggplot(lizard,
       aes(x = distance,
           y = pushup)) +
  geom_point(size = 3,
             shape = 21) +
  scale_y_continuous(limits = c(0, 1),
                     breaks = c(0, 1)) +
  geom_smooth(method = "glm",
              method.args = list(family = "binomial"),
              se = FALSE,
              linewidth = 1)

Code
ggplot(lizard,
       aes(x = distance,
           y = pushup)) +
  geom_point(size = 3,
             shape = 21) +
  scale_y_continuous(limits = c(0, 1),
                     breaks = c(0, 1)) +
  geom_smooth(method = "lm",
              se = FALSE,
              linewidth = 1)

Code
liz_mod <- glm(pushup ~ distance,
               data = lizard,
               family = "binomial")

summary(liz_mod)

Call:
glm(formula = pushup ~ distance, family = "binomial", data = lizard)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)   
(Intercept)  13.0974     4.7342   2.767  0.00567 **
distance     -0.6741     0.2458  -2.743  0.00609 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 83.178  on 59  degrees of freedom
Residual deviance: 22.717  on 58  degrees of freedom
AIC: 26.717

Number of Fisher Scoring iterations: 8
Code
plogis(13.0974)
[1] 0.9999979
Code
plot(
  simulateResiduals(liz_mod)
)

Code
confint(liz_mod)
                2.5 %     97.5 %
(Intercept)  6.572132 25.5395134
distance    -1.319798 -0.3373257
Code
mod_preds <- ggpredict(liz_mod,
                       terms = "distance [3:40 by = 1]") 

ggplot(lizard,
       aes(x = distance,
           y = pushup)) +
  geom_point(size = 3,
             alpha = 0.4) +
  geom_ribbon(data = mod_preds,
              aes(x = x,
                  y = predicted,
                  ymin = conf.low,
                  ymax = conf.high),
              alpha = 0.4) +
  geom_line(data = mod_preds,
            aes(x = x,
                y = predicted)) +
  scale_y_continuous(limits = c(0, 1),
                     breaks = c(0, 1)) 

Code
# what is the probability of a pushup at 20cm?
ggpredict(liz_mod,
          terms = "distance [0]")
# Predicted probabilities of pushup

distance | Predicted |     95% CI
---------------------------------
       0 |      1.00 | 0.98, 1.00
Code
# what is the probability of a pushup at 20cm?
ggpredict(liz_mod,
          terms = "distance [20]")
# Predicted probabilities of pushup

distance | Predicted |     95% CI
---------------------------------
      20 |      0.41 | 0.18, 0.67
Code
# what is the probability of a pushup at 10cm?
ggpredict(liz_mod,
          terms = "distance [10]")
# Predicted probabilities of pushup

distance | Predicted |     95% CI
---------------------------------
      10 |      1.00 | 0.86, 1.00
Code
# what is the probability of a pushup at 30cm?
ggpredict(liz_mod,
          terms = "distance [30]")
# Predicted probabilities of pushup

distance | Predicted |     95% CI
---------------------------------
      30 |      0.00 | 0.00, 0.14
Code
# what is the probability of a pushup at 20cm?
predict(liz_mod, 
        newdata = data.frame(distance = 20), 
        type = "response")
       1 
0.405084 
Code
r.squaredLR(liz_mod)
[1] 0.6349364
attr(,"adj.r.squared")
[1] 0.8465819
Code
gtsummary::tbl_regression(liz_mod)
Characteristic log(OR) 95% CI p-value
distance -0.67 -1.3, -0.34 0.006
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
Code
gtsummary::tbl_regression(liz_mod,
                          exponentiate = TRUE)
Characteristic OR 95% CI p-value
distance 0.51 0.27, 0.71 0.006
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
Code
as_flextable(liz_mod)

Estimate

Standard Error

z value

Pr(>|z|)

(Intercept)

13.097

4.734

2.767

0.0057

**

distance

-0.674

0.246

-2.743

0.0061

**

Signif. codes: 0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 83.18 on 59 degrees of freedom

Residual deviance: 22.72 on 58 degrees of freedom

negative binomial example

Code
set.seed(666)
nbinom_df <- bind_cols(
  size1 = rnbinom(mu = 10, size = 1, n = 100),
  size10 = rnbinom(mu = 10, size = 10, n = 100),
  size100 = rnbinom(mu = 10, size = 100, n = 100)
)

ggplot(data.frame(x = 1:20), aes(x)) +
  stat_function(geom = "point", n = 20, fun = dnbinom, args = list(mu = 4, x = 5), size = 2) +
  stat_function(geom = "line", n = 20, fun = dnbinom, args = list(mu = 4, x = 5))

Code
size1 <- ggplot(nbinom_df, aes(x = size1)) +
  geom_histogram(bins = 8, fill = "cornflowerblue", color = "grey8") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 40)) +
  theme_classic() +
  labs(title = expression(mu~"= 10, k = 1")) +
  theme(axis.title.x = element_blank())

size10 <- ggplot(nbinom_df, aes(x = size10)) +
  geom_histogram(bins = 8, fill = "cornflowerblue", color = "grey8") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 40)) +
  theme_classic() +
  labs(title = expression(mu~"= 10, k = 10")) +
  theme(axis.title.x = element_blank())

size100 <- ggplot(nbinom_df, aes(x = size100)) +
  geom_histogram(bins = 8, fill = "cornflowerblue", color = "grey8") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 40)) +
  theme_classic() +
  labs(title = expression(mu~"= 10, k = 100")) +
  theme(axis.title.x = element_blank())

size1 + size10 + size100

poisson example

Code
set.seed(666)
pois_df <- bind_cols(
  lambda1 = rpois(lambda = 1, n = 100),
  lambda5 = rpois(lambda = 5, n = 100),
  lambda20 = rpois(lambda = 20, n = 100)
)

lambda1 <- ggplot(pois_df, aes(x = lambda1)) +
  geom_histogram(bins = 8, fill = "darkorange", color = "grey8") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 40)) +
  theme_classic() +
  labs(title = expression(lambda~"= 1")) +
  theme(axis.title.x = element_blank())

lambda5 <- ggplot(pois_df, aes(x = lambda5)) +
  geom_histogram(bins = 8, fill = "darkorange", color = "grey8") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 40)) +
  theme_classic() +
  labs(title = expression(lambda~"= 5")) +
  theme(axis.title.x = element_blank())

lambda20 <- ggplot(pois_df, aes(x = lambda20)) +
  geom_histogram(bins = 8, fill = "darkorange", color = "grey8") +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 40)) +
  theme_classic() +
  labs(title = expression(lambda~"= 20")) +
  theme(axis.title.x = element_blank())

lambda1 + lambda5 + lambda20

bat example

cleaning and displaying rows

Code
bats_clean <- bats |> 
  mutate(date.x = mdy(date.x)) |> 
  # create categories for disease load based on log-transformed values
  mutate(disease_load = case_when(
    logearlyloads < -3 ~ "low",
    between(logearlyloads, -3, -2) ~ "moderate",
    between(logearlyloads, -2, 0) ~ "high",
  ),
  # set factor levels for disease load: low --> high
  disease_load = fct_relevel(disease_load, "low", "moderate", "high")) |> 
  # rename columns for clearer interpretation
  rename(recaptured_yn = EverRecapturedYN.x,
         temp = earlytemp) |> 
  # select columns of interest
  select(recaptured_yn, temp, disease_load) |> 
  filter(disease_load != "high")

slice_sample(
  bats_clean,
  n = 10
)
# A tibble: 10 × 3
   recaptured_yn  temp disease_load
           <dbl> <dbl> <fct>       
 1             0   8.2 low         
 2             0   8.3 moderate    
 3             1   7.4 low         
 4             1   7.6 low         
 5             0   9.7 low         
 6             0   9.6 low         
 7             0   9.2 low         
 8             0   8.2 low         
 9             1   5.4 low         
10             0   8.2 low         

fitting model and looking at diagnostics

Code
bat_mod <- glm(recaptured_yn ~ temp * disease_load,
               data = bats_clean,
               family = "binomial")

plot(
  simulateResiduals(bat_mod)
)

Code
summary(bat_mod)

Call:
glm(formula = recaptured_yn ~ temp * disease_load, family = "binomial", 
    data = bats_clean)

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)    
(Intercept)                 5.9443     1.0358   5.739 9.52e-09 ***
temp                       -0.7178     0.1257  -5.713 1.11e-08 ***
disease_loadmoderate       -5.0204     1.6304  -3.079  0.00208 ** 
temp:disease_loadmoderate   0.5139     0.2010   2.557  0.01055 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 328.45  on 236  degrees of freedom
Residual deviance: 275.56  on 233  degrees of freedom
  (2 observations deleted due to missingness)
AIC: 283.56

Number of Fisher Scoring iterations: 4

generating predictions

Code
model_preds <- ggpredict(bat_mod, 
                         terms = c("temp[all]", 
                                   "disease_load")) |> 
  rename(disease_load = group)

model_preds_with0 <- ggpredict(bat_mod, 
                         terms = c("temp[0:12.3 by = 0.1]", 
                                   "disease_load")) |> 
  rename(disease_load = group)

ggpredict(bat_mod,
          terms = c("temp [0]",
                    "disease_load"))
# Predicted probabilities of recaptured_yn

disease_load: low

temp | Predicted |     95% CI
-----------------------------
   0 |      1.00 | 0.98, 1.00

disease_load: moderate

temp | Predicted |     95% CI
-----------------------------
   0 |      0.72 | 0.18, 0.97
Code
ggpredict(bat_mod,
          terms = c("temp [4.8]",
                    "disease_load"))
# Predicted probabilities of recaptured_yn

disease_load: low

temp | Predicted |     95% CI
-----------------------------
4.80 |      0.92 | 0.83, 0.97

disease_load: moderate

temp | Predicted |     95% CI
-----------------------------
4.80 |      0.49 | 0.24, 0.74
Code
ggpredict(bat_mod,
          terms = c("temp [8.0]",
                    "disease_load"))
# Predicted probabilities of recaptured_yn

disease_load: low

temp | Predicted |     95% CI
-----------------------------
   8 |      0.55 | 0.47, 0.63

disease_load: moderate

temp | Predicted |     95% CI
-----------------------------
   8 |      0.33 | 0.22, 0.47
Code
ggpredict(bat_mod,
          terms = c("temp [12.3]",
                    "disease_load"))
# Predicted probabilities of recaptured_yn

disease_load: low

 temp | Predicted |     95% CI
------------------------------
12.30 |      0.05 | 0.02, 0.14

disease_load: moderate

 temp | Predicted |     95% CI
------------------------------
12.30 |      0.17 | 0.04, 0.48

generating effects

Code
emtrends(bat_mod,
         specs = "disease_load",
         var = "temp")
 disease_load temp.trend    SE  df asymp.LCL asymp.UCL
 low              -0.718 0.126 Inf    -0.964    -0.472
 moderate         -0.204 0.157 Inf    -0.511     0.104

Confidence level used: 0.95 
Code
# moderate

plotting

Code
ggplot(data = bats_clean,
       aes(x = temp,
           y = recaptured_yn)) +
  geom_point(aes(color = disease_load),
             shape = 21) + 
  scale_color_manual(values = c(
    "low" = "tomato2",
    "moderate" = "turquoise4"
  )) +
  scale_x_continuous(limits = c(4, 12.5),
                     breaks = c(4, 6, 8, 10, 12)) +
  facet_wrap(~ disease_load) +
  theme(legend.position = "none")

Code
ggplot(data = bats_clean,
       aes(x = temp,
           y = recaptured_yn)) +
  geom_point(aes(color = disease_load),
             shape = 21) +
  geom_line(data = model_preds,
            aes(x = x,
                y = predicted,
                color = disease_load),
            linewidth = 2) +
  geom_ribbon(data = model_preds,
              aes(x = x,
                  y = predicted,
                  ymin = conf.low,
                  ymax = conf.high,
                  fill = disease_load),
              alpha = 0.1) +
  facet_wrap(~ disease_load,
             labeller = labeller(
               disease_load = c(low = "Low disease load", 
                                moderate = "Moderate disease load")
             )) + 
  scale_color_manual(values = c(
    "low" = "tomato2",
    "moderate" = "turquoise4"
  )) +
  scale_x_continuous(limits = c(4, 12.5),
                     breaks = c(4, 6, 8, 10, 12)) +
  labs(x = "Temperature (\U00B0 C)",
       y = "Probability of recapture success") +
  theme(legend.position = "none")

Code
ggplot(data = model_preds_with0,
       aes(x = x,
           y = predicted)) +
  geom_ribbon(aes(x = x,
                  y = predicted,
                  ymin = conf.low,
                  ymax = conf.high,
                  fill = disease_load),
              alpha = 0.1) +
  geom_line(aes(color = disease_load),
            linewidth = 2) + 
  scale_color_manual(values = c(
    "low" = "tomato2",
    "moderate" = "turquoise4"
  )) +
  scale_fill_manual(values = c(
    "low" = "tomato2",
    "moderate" = "turquoise4"
  )) +
  facet_wrap(~ disease_load) +
  scale_x_continuous(limits = c(0, 12.5),
                     breaks = c(0, 2, 4, 6, 8, 10, 12)) +
  theme(legend.position = "none")

Code
ggplot(data = model_preds_with0,
       aes(x = x,
           y = predicted)) +
  geom_point(data = bats_clean,
             aes(x = temp,
                 y = recaptured_yn,
                 color = disease_load),
             shape = 21) +
  geom_line(aes(color = disease_load),
            linewidth = 2) + 
  scale_color_manual(values = c(
    "low" = "tomato2",
    "moderate" = "turquoise4"
  )) +
  scale_fill_manual(values = c(
    "low" = "tomato2",
    "moderate" = "turquoise4"
  )) +
  facet_wrap(~ disease_load) +
  scale_x_continuous(limits = c(0, 12.5),
                     breaks = c(0, 2, 4, 6, 8, 10, 12)) +
  theme(legend.position = "none")

Code
as_flextable(bat_mod)

Estimate

Standard Error

z value

Pr(>|z|)

(Intercept)

5.944

1.036

5.739

0.0000

***

temp

-0.718

0.126

-5.713

0.0000

***

disease_loadmoderate

-5.020

1.630

-3.079

0.0021

**

temp:disease_loadmoderate

0.514

0.201

2.557

0.0106

*

Signif. codes: 0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 328.4 on 236 degrees of freedom

Residual deviance: 275.6 on 233 degrees of freedom

(2 observations deleted due to missingness)

Citation

BibTeX citation:
@online{bui2026,
  author = {Bui, An},
  title = {Week 10 Figures - {Lectures} 17 and 18},
  date = {2026-03-10},
  url = {https://winter-2026.envs-193ds.com/lecture/lecture_week-10.html},
  langid = {en}
}
For attribution, please cite this work as:
Bui, An. 2026. “Week 10 Figures - Lectures 17 and 18.” March 10, 2026. https://winter-2026.envs-193ds.com/lecture/lecture_week-10.html.