Week 8 figures - Lectures 14 and 15

multiple linear regression
AIC
Author
Affiliation
Lecture date

February 24, 2026

Modified

March 12, 2026

Code
# cleaning
library(tidyverse)
library(readxl)
library(here)
library(janitor)

# visualization
theme_set(theme_classic() +
            theme(panel.grid = element_blank(),
                  axis.text = element_text(size = 18),
                  axis.title = element_text(size = 18),
                  text = element_text(family = "Lato")))
library(patchwork)
library(ggeffects)
library(flextable)
library(GGally)

# data
library(palmerpenguins)

# analysis
library(car)
library(performance)
library(broom)
library(DHARMa)
library(MuMIn)
library(gtsummary)
library(emmeans)

drought_exp <- read_xlsx(
  # file path
  here("data", "Valliere_etal_EcoApps_Data.xlsx"),
  # specifying which sheet you want to read in
  sheet = "First Harvest"
)

multiple linear regression equation

\[ \begin{align} y &= \beta_0 + \beta_1x_1 + \beta_2x_2 + ... \beta_kx_k + \epsilon \end{align} \]

formulas

sum of squares for linear regression

regression (or model)

\[ SS_{reg} = \sum_{i = 1}^{n}(\hat{y} - \bar{y})^2 \]

error

\[ SS_{err} = \sum_{i = 1}^{n}(y_i - \hat{y})^2 \]

total

\[ SS_{tot} = \sum_{i = 1}^n(y_i - \bar{y}) \]

mean square

regression

\[ MS_{reg} = \frac{SS_{reg}}{1} \]

error

\[ MS_{err} = \frac{SS_{err}}{n - 2} \]

F-statistic

\[ F = \frac{MS_{reg}}{MS_{err}} \]

soil example

Code
set.seed(666)
# sample size
n <- 64

soil_df <- tibble(
  # compaction
  force_mpa = round(rnorm(n = n, mean = 0.7, sd = 0.03), 3),
  # root length
  root_mm = round(rnorm(n = n, mean = -1, sd = 0.02)*force_mpa, 3) + 5,
  # soil salinity
  salinity = round(rnorm(n = n, mean = 3, sd = 0.3)*force_mpa, 3)
) 

ggplot(data = soil_df,
       aes(x = force_mpa,
           y = root_mm)) +
  geom_point()

Code
ggplot(data = soil_df,
       aes(x = force_mpa,
           y = salinity)) +
  geom_point()

Code
soil_lm <- lm(
  root_mm ~ force_mpa,
  data = soil_df
)

par(mfrow = c(2, 2))
plot(soil_lm)

Code
summary(soil_lm)

Call:
lm(formula = root_mm ~ force_mpa, data = soil_df)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.033749 -0.009088  0.000251  0.008962  0.028832 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   4.9569     0.0357   138.9   <2e-16 ***
force_mpa    -0.9400     0.0511   -18.4   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.01338 on 62 degrees of freedom
Multiple R-squared:  0.8451,    Adjusted R-squared:  0.8427 
F-statistic: 338.4 on 1 and 62 DF,  p-value: < 2.2e-16
Code
cor.test(soil_df$root_mm, soil_df$force_mpa,
         method = "pearson")

    Pearson's product-moment correlation

data:  soil_df$root_mm and soil_df$force_mpa
t = -18.395, df = 62, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.9503672 -0.8701422
sample estimates:
       cor 
-0.9193192 
Code
cor.test(soil_df$root_mm, soil_df$force_mpa,
         method = "spearman")

    Spearman's rank correlation rho

data:  soil_df$root_mm and soil_df$force_mpa
S = 83662, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
       rho 
-0.9153354 
Code
cor.test(soil_df$salinity, soil_df$force_mpa,
         method = "pearson")

    Pearson's product-moment correlation

data:  soil_df$salinity and soil_df$force_mpa
t = 2.5713, df = 62, p-value = 0.01254
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.06995477 0.51680060
sample estimates:
      cor 
0.3104261 
Code
cor.test(soil_df$salinity, soil_df$force_mpa,
         method = "spearman")

    Spearman's rank correlation rho

data:  soil_df$salinity and soil_df$force_mpa
S = 29861, p-value = 0.01087
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.3163656 
Code
model_preds <- ggpredict(
  soil_lm,
  terms = "force_mpa [0.63:0.77 by = 0.01]"
) 

ggpredict(
  soil_lm,
  terms = "force_mpa [0.63:0.77 by = 0.01]") |> 
  plot(show_data = TRUE)

Code
flextable::as_flextable(soil_lm) 

Estimate

Standard Error

t value

Pr(>|t|)

(Intercept)

4.957

0.036

138.866

0.0000

***

force_mpa

-0.940

0.051

-18.395

0.0000

***

Signif. codes: 0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

Residual standard error: 0.01338 on 62 degrees of freedom

Multiple R-squared: 0.8451, Adjusted R-squared: 0.8427

F-statistic: 338.4 on 62 and 1 DF, p-value: 0.0000

Code
flextable::as_flextable(soil_lm) %>% 
  set_formatter(
    # special function to represent p < 0.001
    values = list("p.value" = function(x){ 
      z <- scales::label_pvalue()(x)
      z[!is.finite(x)] <- ""
      z
    })
  )

Estimate

Standard Error

t value

Pr(>|t|)

(Intercept)

4.957

0.036

138.866

<0.001

***

force_mpa

-0.940

0.051

-18.395

<0.001

***

Signif. codes: 0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

Residual standard error: 0.01338 on 62 degrees of freedom

Multiple R-squared: 0.8451, Adjusted R-squared: 0.8427

F-statistic: 338.4 on 62 and 1 DF, p-value: 0.0000

plant example

Code
set.seed(666)
# sample size
n <- 64
plant_df <- tibble(
  # predictor variables
  temperature = round(rnorm(n = n, mean = 28, sd = 1), digits = 1),
  light = round(rnorm(n = n, mean = 1, sd = 0.2), digits = 1),
  ph = rnorm(n = n, mean = 7, sd = 0.01),
  
  # response: growth in cm/week
  growth = light*rnorm(n = n, mean = 0.3, sd = 0.1) + temperature/round(rnorm(n = n, mean = 5, sd = 0.1))
) 

ggplot(data = plant_df,
       aes(x = temperature,
           y = growth)) +
  geom_point()

Code
ggplot(data = plant_df,
       aes(x = ph,
           y = light)) +
  geom_point()

Code
ggplot(data = plant_df,
       aes(x = light,
           y = growth)) +
  geom_point()

Code
plant_lm <- lm(growth ~ temperature + ph + light,
               data = plant_df)
simulateResiduals(plant_lm) %>% plot()

Code
summary(plant_lm)

Call:
lm(formula = growth ~ temperature + ph + light, data = plant_df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.23209 -0.06571  0.01010  0.06173  0.19950 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -6.67140    6.80194  -0.981    0.331    
temperature  0.19626    0.01058  18.558  < 2e-16 ***
ph           0.96241    0.96806   0.994    0.324    
light        0.35196    0.05939   5.926 1.63e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.0911 on 60 degrees of freedom
Multiple R-squared:  0.8759,    Adjusted R-squared:  0.8696 
F-statistic: 141.1 on 3 and 60 DF,  p-value: < 2.2e-16
Code
ggpredict(plant_lm,
          terms = c("temperature")) %>% 
  plot(show_data = TRUE)

Code
ggpredict(plant_lm,
          terms = c("ph")) %>% 
  plot(show_data = TRUE)

Code
ggpredict(plant_lm,
          terms = c("light")) %>% 
  plot(show_data = TRUE)

Code
plant_model <- lm(growth ~ light + temperature, 
                  data = plant_df)
Code
simulateResiduals(plant_model,
                  plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0 0.756 0.792 0.816 0.06 0.132 0.408 0.708 0.988 0.892 0.452 0.876 0.644 0.512 0.984 0.904 0.936 0.56 0.58 0.724 ...
Code
check_model(plant_model)

Code
pairs(plant_df, upper.panel = NULL)

Code
ggpairs(plant_df)

Code
cor(plant_df)
            temperature       light          ph      growth
temperature  1.00000000  0.15540780 -0.06657697  0.89551840
light        0.15540780  1.00000000 -0.04769987  0.40397013
ph          -0.06657697 -0.04769987  1.00000000 -0.02466729
growth       0.89551840  0.40397013 -0.02466729  1.00000000
Code
vif(plant_model)
      light temperature 
   1.024749    1.024749 
Code
summary(plant_model)

Call:
lm(formula = growth ~ light + temperature, data = plant_df)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.227058 -0.071297  0.006266  0.063745  0.197573 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.08464    0.29166   0.290    0.773    
light        0.34972    0.05934   5.894 1.76e-07 ***
temperature  0.19563    0.01056  18.534  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.09109 on 61 degrees of freedom
Multiple R-squared:  0.8738,    Adjusted R-squared:  0.8697 
F-statistic: 211.2 on 2 and 61 DF,  p-value: < 2.2e-16
Code
# anova(plant_model)

For example, temperature F-value:

Code
2.85039/0.0083
[1] 343.4205

\[ \begin{align} F &= \frac{2.85039}{0.00830} \\ &= 343.4205 \end{align} \]

dummy variable examples

Code
tibble(
  herbivores = sample(x = c("absent", "present"), size = 10, replace = TRUE, prob = c(0.5, 0.5))
)
# A tibble: 10 × 1
   herbivores
   <chr>     
 1 present   
 2 present   
 3 absent    
 4 present   
 5 absent    
 6 absent    
 7 absent    
 8 absent    
 9 absent    
10 present   
Code
df <- tibble(
  fertilizer = sample(x = c("low", "medium", "high"), size = 10, replace = TRUE, prob = c(0.3, 0.3, 0.3))
) |> 
  mutate(fertilizer = as_factor(fertilizer),
         fertilizer = fct_relevel(fertilizer, "low", "medium", "high"))

str(df)
tibble [10 × 1] (S3: tbl_df/tbl/data.frame)
 $ fertilizer: Factor w/ 3 levels "low","medium",..: 2 2 3 1 2 3 1 1 2 2
Code
df <- tibble(
  year = sample(x = c("1st", "2nd", "3rd", "4th", "5th"), size = 20, replace = TRUE, prob = c(0.2, 0.2, 0.2, 0.2, 0.2))
) %>% 
  mutate(year = as_factor(year),
         year = fct_relevel(year, "1st", "2nd", "3rd", "4th", "5th"))

df 
# A tibble: 20 × 1
   year 
   <fct>
 1 3rd  
 2 2nd  
 3 3rd  
 4 4th  
 5 3rd  
 6 5th  
 7 2nd  
 8 3rd  
 9 4th  
10 1st  
11 5th  
12 5th  
13 4th  
14 5th  
15 4th  
16 4th  
17 4th  
18 1st  
19 5th  
20 3rd  
Code
str(df)
tibble [20 × 1] (S3: tbl_df/tbl/data.frame)
 $ year: Factor w/ 5 levels "1st","2nd","3rd",..: 3 2 3 4 3 5 2 3 4 1 ...

plant growth dummy variable

Code
# humidity units: %
# plant growth: cm / week

low_col <- "#2176ae"
medium_col <- "#fbb13c"
high_col <- "#fe6847"

n <- 30
set.seed(10)
plant_df <- tibble(
  humidity = round(rnorm(n = n, mean = 60, sd = 15), 0),
  low = humidity*rnorm(n = n, mean = 0.025, sd = 0.012) + 1,
  medium = humidity*rnorm(n = n, mean = 0.025, sd = 0.015) + 1.5,
  high = humidity*rnorm(n = n, mean = 0.025, sd = 0.017) + 2
) %>% 
  pivot_longer(cols = low:high,
               names_to = "fertilizer", 
               values_to = "growth") %>% 
  mutate(fertilizer = fct_relevel(fertilizer, "low", "medium", "high"))

# gives you a random sample
sample_n(plant_df, 10)
# A tibble: 10 × 3
   humidity fertilizer growth
      <dbl> <fct>       <dbl>
 1       71 high         3.01
 2       42 high         1.90
 3       58 medium       1.92
 4       50 low          3.07
 5       56 low          1.96
 6       61 low          2.50
 7       47 medium       4.24
 8       54 low          2.86
 9       77 low          3.93
10       39 medium       1.99
Code
# look at the structure
str(plant_df)
tibble [90 × 3] (S3: tbl_df/tbl/data.frame)
 $ humidity  : num [1:90] 60 60 60 57 57 57 39 39 39 51 ...
 $ fertilizer: Factor w/ 3 levels "low","medium",..: 1 2 3 1 2 3 1 2 3 1 ...
 $ growth    : num [1:90] 1.17 1.89 3.08 2.37 2.53 ...
Code
ggplot(data = plant_df,
       aes(x = humidity,
           y = growth)) +
  geom_point() 

Code
ggplot(data = plant_df,
       aes(x = fertilizer,
           y = growth,
           color = fertilizer)) +
  geom_jitter(width = 0.2,
              height = 0,
              alpha = 0.3) +
  stat_summary(fun.data = mean_cl_normal,
               geom = "pointrange",
               size = 1,
               linewidth = 1) +
  scale_color_manual(values = c(low_col, medium_col, high_col)) +
  theme(legend.position = "none")

Code
plant_df %>% 
  group_by(fertilizer) %>% 
  summarize(mean = mean(growth))
# A tibble: 3 × 2
  fertilizer  mean
  <fct>      <dbl>
1 low         2.25
2 medium      2.82
3 high        3.51
Code
plant_df %>% 
  summarize(mean = mean(growth))
# A tibble: 1 × 1
   mean
  <dbl>
1  2.86
Code
plant_lm <- lm(growth ~ humidity + fertilizer,
               data = plant_df)

# am i broken because i can't look at anything other than dharma residuals
simulateResiduals(plant_lm) %>% plot()

Code
par(mfrow = c(2, 2))
plot(plant_lm)

Code
ggpredict(plant_lm,
          terms = c("humidity",
                    "fertilizer")) %>% 
  plot(show_data = TRUE) +
  scale_color_manual(values = c(low_col, medium_col, high_col)) +
  scale_fill_manual(values = c(low_col, medium_col, high_col)) +
  theme_classic()

Code
ggpredict(plant_lm,
          terms = c("humidity")) %>% 
  plot(show_data = TRUE) +
  theme_classic()

Code
ggpredict(lm(growth ~ humidity, data = plant_df),
          terms = "humidity") %>% 
  plot(show_data = TRUE) +
  theme_classic()

Code
summary(plant_lm)

Call:
lm(formula = growth ~ humidity + fertilizer, data = plant_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.7361 -0.5220  0.1129  0.5353  1.6649 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      1.280788   0.390802   3.277  0.00151 ** 
humidity         0.017697   0.006615   2.675  0.00894 ** 
fertilizermedium 0.573791   0.207205   2.769  0.00688 ** 
fertilizerhigh   1.264891   0.207205   6.105 2.88e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.8025 on 86 degrees of freedom
Multiple R-squared:  0.3411,    Adjusted R-squared:  0.3182 
F-statistic: 14.84 on 3 and 86 DF,  p-value: 7.19e-08
Code
summary(lm(growth ~ humidity, data = plant_df))

Call:
lm(formula = growth ~ humidity, data = plant_df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.7902 -0.7698 -0.0693  0.5993  1.9402 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.893683   0.440513   4.299 4.42e-05 ***
humidity    0.017697   0.007833   2.259   0.0263 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9502 on 88 degrees of freedom
Multiple R-squared:  0.05483,   Adjusted R-squared:  0.04409 
F-statistic: 5.105 on 1 and 88 DF,  p-value: 0.02633
Code
tbl_regression(plant_lm,
               label = list(humidity = "Humidity (%)",
                            fertilizer = "Fertilizer treatment")) |> 
  as_flex_table() 

Characteristic

Beta

95% CI

p-value

Humidity (%)

0.02

0.00, 0.03

0.009

Fertilizer treatment

low

medium

0.57

0.16, 0.99

0.007

high

1.3

0.85, 1.7

<0.001

Abbreviation: CI = Confidence Interval

Code
flextable::as_flextable(plant_lm)

Estimate

Standard Error

t value

Pr(>|t|)

(Intercept)

1.281

0.391

3.277

0.0015

**

humidity

0.018

0.007

2.675

0.0089

**

fertilizermedium

0.574

0.207

2.769

0.0069

**

fertilizerhigh

1.265

0.207

6.105

0.0000

***

Signif. codes: 0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

Residual standard error: 0.8025 on 86 degrees of freedom

Multiple R-squared: 0.3411, Adjusted R-squared: 0.3182

F-statistic: 14.84 on 86 and 3 DF, p-value: 0.0000

frog example

generating data

Code
set.seed(666)
frog_n <- 87

frog_df <- tibble(
  weight = (round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2)),
  blue = round(rnorm(n = frog_n, mean = 2.5, sd = 0.09)*(round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2)), 2),
  green = round(rnorm(n = frog_n, mean = 3, sd = 0.05)*(round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2)), 2),
  red = round(rnorm(n = frog_n, mean = 2.3, sd = 0.05)*(round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2)), 2)
) %>% 
  pivot_longer(cols = blue:red,
               names_to = "color",
               values_to = "toxicity")

df <- cbind(
  # predictor variables
  # color = sample(x = c("blue", "green", "red"), size = frog_n, replace = TRUE, prob = c(0.3, 0.3, 0.3)),
  weight = (round(rnorm(n = frog_n, mean = 3, sd = 0.1), 2))
  #pattern = sample(x = c("striped", "spotted", "none"), size = frog_n, replace = TRUE, prob = c(0.3, 0.3, 0.3))
) %>%
  as_tibble() %>%
  mutate(toxicity = round(rnorm(n = frog_n, mean = 1.5, sd = 0.06)*weight, 3)) %>%
  mutate(color = case_when(
    between(toxicity, 2.5, 4.4) ~ "blue",
    between(toxicity, 4.4, 4.6) ~ "green",
    between(toxicity, 4.6, 5.5) ~ "red"
  )) %>% 
  mutate(toxicity = case_when(
    color == "blue" ~ round(rnorm(n = frog_n, mean = 2.5, sd = 0.09)*weight, 3),
    color == "green" ~ round(rnorm(n = frog_n, mean = 3, sd = 0.05)*weight, 3),
    color == "red" ~ round(rnorm(n = frog_n, mean = 2.3, sd = 0.05)*weight, 3)
  )) %>% 
  mutate(color = as_factor(color),
         color = fct_relevel(color, "red", "blue", "green"))


# df <- cbind(
#   color = sample(x = c("blue", "green", "red"), size = frog_n, replace = TRUE, prob = c(0.3, 0.3, 0.3))
# ) %>% 
#   as_tibble() %>% 
#   mutate(weight = (round(rnorm(n = frog_n, mean = 3.5, sd = 0.1), 2))) %>% 
#   mutate(toxicity = case_when(
#     color == "blue" ~ round(rnorm(n = frog_n, mean = 3, sd = 0.4)*weight, 2),
#     color == "green" ~ round(rnorm(n = frog_n, mean = 2, sd = 0.5)*weight, 2),
#     color == "red" ~ round(rnorm(n = frog_n, mean = 1, sd = 0.03)*weight, 2)
#   ))

plotting data

Code
blue_col <- "cornflowerblue"
green_col <- "darkgreen"
red_col <- "maroon"

striped_col <- "grey1"
spotted_col <- "grey50"
none_col <- "grey80"

ggplot(data = frog_df, aes(x = color, y = toxicity, color = color, fill = color)) +
  geom_jitter(width = 0.2, height = 0, alpha = 0.3) +
  scale_color_manual(values = c("blue" = blue_col, "green" = green_col, "red" = red_col)) +
  scale_fill_manual(values = c("blue" = blue_col, "green" = green_col, "red" = red_col)) +
  stat_summary(geom = "pointrange", 
               fun = mean, 
               fun.min = function(x) mean(x) - sd(x), 
               fun.max = function(x) mean(x) + sd(x), 
               shape = 21, 
               size = 1) +
 #  geom_point(position = position_jitter(width = 0.2, height = 0, seed = 666), alpha = 0.3) +
  labs(title = "Color") +
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        text = element_text(size = 22))

Code
ggplot(data = frog_df, aes(x = weight, y = toxicity)) +
  geom_point() +
  # geom_smooth(method = "lm") +
  labs(title = "Weight") +
  theme(legend.position = "none",
        axis.title.x = element_blank(),
        text = element_text(size = 22))

Code
ggplot(data = frog_df, aes(x = weight, y = toxicity, color = color)) +
  geom_point() +
  scale_color_manual(values = c("blue" = blue_col, "green" = green_col, "red" = red_col)) +
  geom_smooth(method = "lm") +
  labs(title = "Weight")

Code
head(df, 10)
# A tibble: 10 × 3
   weight toxicity color
    <dbl>    <dbl> <fct>
 1   3.18     8.29 blue 
 2   3.02     9.24 green
 3   3.01     8.75 green
 4   3.07     7.24 red  
 5   2.98     7.47 blue 
 6   2.94     8.89 green
 7   2.95     7.26 blue 
 8   2.91     7.25 blue 
 9   2.95     6.61 red  
10   3.06     8.96 green

model

Code
model1 <- lm(toxicity ~ weight + color, 
             data = frog_df)

model2 <- lm(toxicity ~ weight * color, 
             data = frog_df)

simulateResiduals(model1, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.804 0.556 0.336 0.376 0.5 0.56 0.076 0.74 0.308 0.008 0.416 0.828 0.684 0.204 0.272 0.66 0.172 0.208 0.604 0.708 ...
Code
simulateResiduals(model2, plot = TRUE)

Object of Class DHARMa with simulated residuals based on 250 simulations with refit = FALSE . See ?DHARMa::simulateResiduals for help. 
 
Scaled residual values: 0.82 0.536 0.336 0.42 0.436 0.572 0.076 0.74 0.3 0.012 0.368 0.84 0.628 0.256 0.244 0.692 0.144 0.212 0.58 0.752 ...
Code
check_model(model2)

Code
testOutliers(model2)


    DHARMa outlier test based on exact binomial test with approximate
    expectations

data:  model2
outliers at both margin(s) = 1, observations = 261, p-value = 0.7287
alternative hypothesis: true probability of success is not equal to 0.007968127
95 percent confidence interval:
 9.699839e-05 2.116127e-02
sample estimates:
frequency of outliers (expected: 0.00796812749003984 ) 
                                           0.003831418 

diagnostics

Code
par(mfrow = c(2, 2))
plot(model1)

Code
plot(model2)

model summary

Code
summary(model1)

Call:
lm(formula = toxicity ~ weight + color, data = frog_df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.90113 -0.21865 -0.00068  0.22588  0.90229 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.71921    0.58381  13.222   <2e-16 ***
weight      -0.07811    0.19467  -0.401    0.689    
colorgreen   1.48908    0.05049  29.490   <2e-16 ***
colorred    -0.56874    0.05049 -11.263   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.333 on 257 degrees of freedom
Multiple R-squared:  0.8733,    Adjusted R-squared:  0.8718 
F-statistic: 590.6 on 3 and 257 DF,  p-value: < 2.2e-16
Code
summary(model2)

Call:
lm(formula = toxicity ~ weight * color, data = frog_df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.91522 -0.22141 -0.00686  0.21240  0.87930 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)         8.2941     1.0119   8.196 1.21e-14 ***
weight             -0.2702     0.3379  -0.800    0.425    
colorgreen          0.1204     1.4311   0.084    0.933    
colorred           -0.9248     1.4311  -0.646    0.519    
weight:colorgreen   0.4573     0.4778   0.957    0.339    
weight:colorred     0.1189     0.4778   0.249    0.804    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3337 on 255 degrees of freedom
Multiple R-squared:  0.8738,    Adjusted R-squared:  0.8713 
F-statistic: 353.1 on 5 and 255 DF,  p-value: < 2.2e-16
Code
model.sel(model1, model2)
Model selection table 
       (Int) clr   wgh clr:wgh df  logLik  AICc delta
model1 7.719   + 0.832          5 -81.355 172.9   0.0
model2 8.294   + 0.168       +  7 -80.851 176.1   3.2
Models ranked by AICc(x) 
Code
emmeans(model2,
        specs = c("weight", "color"))
 weight color emmean     SE  df lower.CL upper.CL
   2.99 blue    7.49 0.0358 255     7.41     7.56
   2.99 green   8.97 0.0358 255     8.90     9.04
   2.99 red     6.92 0.0358 255     6.85     6.99

Confidence level used: 0.95 
Code
ggpredict(model1,
          terms = c("weight [2:4 by = 0.01]", 
                    "color")) %>% 
  plot(show_data = TRUE,
       limit_range = TRUE) +
  scale_color_manual(values = c("green" = green_col, 
                                "red" = red_col, 
                                "blue" = blue_col)) +
  scale_fill_manual(values = c("green" = green_col, 
                               "red" = red_col, 
                               "blue" = blue_col)) +
  theme_classic()

Code
ggpredict(model2,
          terms = c("weight [2:4 by = 0.01]", 
                    "color")) %>% 
  plot(show_data = TRUE,
       limit_range = TRUE) +
  scale_color_manual(values = c("green" = green_col, 
                                "red" = red_col, 
                                "blue" = blue_col)) +
  scale_fill_manual(values = c("green" = green_col, 
                               "red" = red_col, 
                               "blue" = blue_col)) +
  theme_classic()

\[ \hat{y}_h \pm t_{(1-\alpha/2, n-2)}*\sqrt{MSE*(\frac{1}{n}+\frac{(x_h-\bar{x})^2}{\sum(x_i-\bar{x})^2})} \]

\[ MSE = \frac{\sum(y_i-\hat{y})^2}{n} \]

Code
tidy(model2, conf.int = TRUE, conf.level = 0.95)
# A tibble: 6 × 7
  term              estimate std.error statistic  p.value conf.low conf.high
  <chr>                <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
1 (Intercept)          8.29      1.01     8.20   1.21e-14    6.30     10.3  
2 weight              -0.270     0.338   -0.800  4.25e- 1   -0.936     0.395
3 colorgreen           0.120     1.43     0.0841 9.33e- 1   -2.70      2.94 
4 colorred            -0.925     1.43    -0.646  5.19e- 1   -3.74      1.89 
5 weight:colorgreen    0.457     0.478    0.957  3.39e- 1   -0.484     1.40 
6 weight:colorred      0.119     0.478    0.249  8.04e- 1   -0.822     1.06 
Code
model_summary <- summary(model2)
c("lower" = model_summary$coef[2,1] - qt(0.975, df = model_summary$df[2]) * model_summary$coef[2, 2],
  "upper" = model_summary$coef[2,1] + qt(0.975, df = model_summary$df[2]) * model_summary$coef[2, 2])
     lower      upper 
-0.9355103  0.3951563 

Confidence interval for a single coefficient: in words: estimate plus or minus the t-value at your confidence level * standard error

Valliere et al.

Code
drought_exp_clean <- drought_exp |> 
  clean_names() |> 
  mutate(water = case_when(
    water == "WW" ~ "Well watered",
    water == "DS" ~ "Drought stressed"
  )) |> 
  rename(treatment = water,
         species_name = species) |> 
  mutate(treatment = as_factor(treatment))


str(drought_exp_clean)
tibble [70 × 13] (S3: tbl_df/tbl/data.frame)
 $ species_name     : chr [1:70] "ENCCAL" "ENCCAL" "ENCCAL" "ENCCAL" ...
 $ treatment        : Factor w/ 2 levels "Well watered",..: 1 1 1 1 1 2 2 2 2 2 ...
 $ rep_number       : num [1:70] 1 2 3 4 5 1 2 3 4 5 ...
 $ height_cm        : num [1:70] 5.8 4.9 8.4 6.5 7.1 3.2 4.4 4.2 4.5 3.9 ...
 $ leaf_number      : num [1:70] 11 8 11 12 10 7 7 10 8 6 ...
 $ leaf_dry_weight_g: num [1:70] 0.0294 0.0185 0.0177 0.0178 0.0164 0.017 0.0193 0.0153 0.0159 0.0133 ...
 $ leaf_area_cm2    : num [1:70] 5.01 3.98 3.69 3.84 3.63 3.06 3.1 2.94 2.73 2.61 ...
 $ sla              : num [1:70] 170 215 209 216 222 ...
 $ total_la         : num [1:70] 55.1 31.8 40.6 46.1 36.3 ...
 $ shoot_g          : num [1:70] 0.253 0.164 0.241 0.213 0.232 ...
 $ root_g           : num [1:70] 0.202 0.165 0.209 0.146 0.12 ...
 $ total_g          : num [1:70] 0.455 0.329 0.45 0.359 0.352 ...
 $ r_s              : num [1:70] 0.8 1 0.9 0.7 0.5 0.8 1.2 3.1 0.9 1.2 ...
Code
pull(drought_exp_clean, treatment)
 [1] Well watered     Well watered     Well watered     Well watered    
 [5] Well watered     Drought stressed Drought stressed Drought stressed
 [9] Drought stressed Drought stressed Well watered     Well watered    
[13] Well watered     Well watered     Well watered     Drought stressed
[17] Drought stressed Drought stressed Drought stressed Drought stressed
[21] Well watered     Well watered     Well watered     Well watered    
[25] Well watered     Drought stressed Drought stressed Drought stressed
[29] Drought stressed Drought stressed Well watered     Well watered    
[33] Well watered     Well watered     Well watered     Drought stressed
[37] Drought stressed Drought stressed Drought stressed Drought stressed
[41] Well watered     Well watered     Well watered     Well watered    
[45] Well watered     Drought stressed Drought stressed Drought stressed
[49] Drought stressed Drought stressed Well watered     Well watered    
[53] Well watered     Well watered     Well watered     Drought stressed
[57] Drought stressed Drought stressed Drought stressed Drought stressed
[61] Well watered     Well watered     Well watered     Well watered    
[65] Well watered     Drought stressed Drought stressed Drought stressed
[69] Drought stressed Drought stressed
Levels: Well watered Drought stressed
Code
slice_sample(drought_exp_clean,
             n = 10)
# A tibble: 10 × 13
   species_name treatment     rep_number height_cm leaf_number leaf_dry_weight_g
   <chr>        <fct>              <dbl>     <dbl>       <dbl>             <dbl>
 1 SALLEU       Well watered           3       3.1           6            0.0343
 2 ENCCAL       Well watered           1       5.8          11            0.0294
 3 PENCEN       Drought stre…          2       1.9           6            0.0064
 4 SALLEU       Drought stre…          3       1.9           4            0.0304
 5 STIPUL       Drought stre…          4       9.7          16            0.0067
 6 GRICAM       Well watered           4       7.1           8            0.0451
 7 GRICAM       Drought stre…          5       4.3           6            0.0217
 8 SALLEU       Drought stre…          5       2.8           6            0.0296
 9 PENCEN       Drought stre…          4       2.1           5            0.0074
10 LOTSCO       Drought stre…          2       5.2          21            0.0033
# ℹ 7 more variables: leaf_area_cm2 <dbl>, sla <dbl>, total_la <dbl>,
#   shoot_g <dbl>, root_g <dbl>, total_g <dbl>, r_s <dbl>
Code
ggplot(data = drought_exp_clean,
       aes(x = sla,
           y = total_g)) +
  geom_point(color = "darkgreen")

Code
ggplot(data = drought_exp_clean,
       aes(x = reorder(species_name, -sla),
           y = sla,
           color = species_name)) +
  geom_jitter(height = 0,
              width = 0.2) +
  theme(legend.position = "none")

Code
ggplot(data = drought_exp_clean,
       aes(x = treatment,
           y = total_g,
           color = treatment)) +
  geom_jitter(height = 0,
              width = 0.2) +
  scale_color_manual(values = c(
    "Well watered" = "lightseagreen",
    "Drought stressed" = "orangered3"
  )) +
  theme(legend.position = "none")

Code
mod <- lm(
  total_g ~ sla + treatment,
  data = drought_exp_clean
)

model0 <- lm(
  total_g ~ 1,
  data = drought_exp_clean
)

model1 <- lm(
  total_g ~ sla + treatment + species_name,
  data = drought_exp_clean
)

model4 <- lm(
  total_g ~ treatment + species_name,
  data = drought_exp_clean
)

par(mfrow = c(2, 2))
plot(mod)

Code
summary(mod)

Call:
lm(formula = total_g ~ sla + treatment, data = drought_exp_clean)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.231899 -0.082814 -0.004803  0.081373  0.287369 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)   
(Intercept)                0.1362657  0.0619544   2.199  0.03130 * 
sla                        0.0012807  0.0003736   3.428  0.00104 **
treatmentDrought stressed -0.0895679  0.0291549  -3.072  0.00307 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1173 on 67 degrees of freedom
Multiple R-squared:  0.3031,    Adjusted R-squared:  0.2823 
F-statistic: 14.57 on 2 and 67 DF,  p-value: 5.566e-06
Code
ggpredict(
  mod,
  terms = c("sla")
) |> 
  plot(show_data = TRUE)

Code
preds <- ggpredict(
  mod,
  terms = c("sla", "treatment")
) |> 
  rename(treatment = group)

ggpredict(
  mod,
  terms = c("sla[0]", "treatment"))
# Predicted values of total_g

treatment: Well watered

sla | Predicted |     95% CI
----------------------------
  0 |      0.14 | 0.01, 0.26

treatment: Drought stressed

sla | Predicted |      95% CI
-----------------------------
  0 |      0.05 | -0.06, 0.16
Code
ggpredict(
  mod,
  terms = c("sla", "treatment")
) |> 
  plot(show_data = TRUE) +
  scale_color_manual(values = c(
    "Well watered" = "lightseagreen",
    "Drought stressed" = "orangered3"
  )) +
  scale_fill_manual(values = c(
    "Well watered" = "lightseagreen",
    "Drought stressed" = "orangered3"
  )) 

Code
as_flextable(mod)

Estimate

Standard Error

t value

Pr(>|t|)

(Intercept)

0.136

0.062

2.199

0.0313

*

sla

0.001

0.000

3.428

0.0010

**

treatmentDrought stressed

-0.090

0.029

-3.072

0.0031

**

Signif. codes: 0 <= '***' < 0.001 < '**' < 0.01 < '*' < 0.05

Residual standard error: 0.1173 on 67 degrees of freedom

Multiple R-squared: 0.3031, Adjusted R-squared: 0.2823

F-statistic: 14.57 on 67 and 2 DF, p-value: 0.0000

Code
tbl_regression(mod)
Characteristic Beta 95% CI p-value
sla 0.00 0.00, 0.00 0.001
treatment


    Well watered
    Drought stressed -0.09 -0.15, -0.03 0.003
Abbreviation: CI = Confidence Interval
Code
ggplot(data = drought_exp_clean,
       aes(x = sla,
           y = total_g)) +
  geom_point(aes(color = treatment)) +
  geom_ribbon(data = preds,
              aes(x = x,
                  y = predicted,
                  ymin = conf.low,
                  ymax = conf.high,
                  fill = treatment),
              alpha = 0.3) +
  geom_line(data = preds,
              aes(x = x,
                  y = predicted,
                  color = treatment)) +
  scale_color_manual(values = c(
    "Well watered" = "lightseagreen",
    "Drought stressed" = "orangered3"
  )) +
  scale_fill_manual(values = c(
    "Well watered" = "lightseagreen",
    "Drought stressed" = "orangered3"
  )) +
  labs(x = "Specific leaf area (cm\U00B2/g)",
       y = "Total mass (g)",
       color = "Treatment",
       fill = "Treatment")

Citation

BibTeX citation:
@online{bui2026,
  author = {Bui, An},
  title = {Week 8 Figures - {Lectures} 14 and 15},
  date = {2026-02-24},
  url = {https://winter-2026.envs-193ds.com/lecture/lecture_week-08.html},
  langid = {en}
}
For attribution, please cite this work as:
Bui, An. 2026. “Week 8 Figures - Lectures 14 and 15.” February 24, 2026. https://winter-2026.envs-193ds.com/lecture/lecture_week-08.html.