anova.gkwfit {gkwreg} | R Documentation |
Compare Fitted gkwfit Models using Likelihood Ratio Tests
Description
Computes Likelihood Ratio Tests (LRT) to compare two or more nested models
fitted using gkwfit
. It produces a table summarizing the models
and the test statistics.
Usage
## S3 method for class 'gkwfit'
anova(object, ...)
Arguments
object |
An object of class |
... |
One or more additional objects of class |
Details
This function performs pairwise likelihood ratio tests between consecutively ordered models (ordered by their degrees of freedom). It assumes the models are nested and are fitted to the same dataset. A warning is issued if the number of observations differs between models.
The Likelihood Ratio statistic is calculated as LR = 2 \times (\log L_{complex} - \log L_{simple})
.
This statistic is compared to a Chi-squared distribution with degrees of freedom
equal to the difference in the number of parameters between the two models
(\Delta df = df_{complex} - df_{simple}
).
The output table includes the number of parameters (N.Par
), AIC, BIC, log-likelihood (LogLik
),
the test description (Test
), the LR statistic (LR stat.
), and the p-value (Pr(>Chi)
).
Models are ordered by increasing complexity (number of parameters).
Warnings are issued if models do not appear correctly nested based on degrees of freedom or if the log-likelihood decreases for a more complex model, as the LRT results may not be meaningful in such cases.
The function relies on a working logLik.gkwfit
method to extract
necessary information (log-likelihood, df, nobs).
Value
An object of class c("anova.gkwfit", "anova", "data.frame")
.
This data frame contains rows for each model and columns summarizing the fit
and the pairwise likelihood ratio tests. It includes:
N.Par |
Number of estimated parameters (degrees of freedom). |
AIC |
Akaike Information Criterion. |
BIC |
Bayesian Information Criterion. |
LogLik |
Log-likelihood value. |
Test |
Description of the pairwise comparison (e.g., "1 vs 2"). |
LR stat. |
Likelihood Ratio test statistic. |
Pr(>Chi) |
P-value from the Chi-squared test. |
The table is printed using a method that mimics print.anova
.
Author(s)
Lopes, J. E.
See Also
gkwfit
, logLik.gkwfit
, AIC.gkwfit
, BIC.gkwfit
, anova
Examples
## Not run:
# Load required packages
library(ggplot2)
library(patchwork)
library(betareg)
# Generate data from GKw distribution
set.seed(2203)
n <- 1000
y <- rgkw(n, alpha = 2, beta = 3, gamma = 1.5, delta = 0.2, lambda = 1.2)
# Fit models from GKw family respecting their parameter structures
# Full GKw model: 5 parameters (alpha, beta, gamma, delta, lambda)
fit_gkw <- gkwfit(data = y, family = "gkw", plot = FALSE)
# BKw model: 4 parameters (alpha, beta, gamma, delta)
fit_bkw <- gkwfit(data = y, family = "bkw", plot = FALSE)
# KKw model: 4 parameters (alpha, beta, delta, lambda)
fit_kkw <- gkwfit(data = y, family = "kkw", plot = FALSE)
# EKw model: 3 parameters (alpha, beta, lambda)
fit_ekw <- gkwfit(data = y, family = "ekw", plot = FALSE)
# Mc model: 3 parameters (gamma, delta, lambda)
fit_mc <- gkwfit(data = y, family = "mc", plot = FALSE)
# Kw model: 2 parameters (alpha, beta)
fit_kw <- gkwfit(data = y, family = "kw", plot = FALSE)
# Beta model: 2 parameters (gamma, delta)
fit_beta <- gkwfit(data = y, family = "beta", plot = FALSE)
# Test 1: BKw vs GKw (testing lambda)
# H0: lambda=1 (BKw) vs H1: lambda!=1 (GKw)
cat("=== Testing BKw vs GKw (adding lambda parameter) ===\n")
test_bkw_gkw <- anova(fit_bkw, fit_gkw)
print(test_bkw_gkw)
# Test 2: KKw vs GKw (testing gamma)
# H0: gamma=1 (KKw) vs H1: gamma!=1 (GKw)
cat("\n=== Testing KKw vs GKw (adding gamma parameter) ===\n")
test_kkw_gkw <- anova(fit_kkw, fit_gkw)
print(test_kkw_gkw)
# Test 3: Kw vs EKw (testing lambda)
# H0: lambda=1 (Kw) vs H1: lambda!=1 (EKw)
cat("\n=== Testing Kw vs EKw (adding lambda parameter) ===\n")
test_kw_ekw <- anova(fit_kw, fit_ekw)
print(test_kw_ekw)
# Test 4: Beta vs Mc (testing lambda)
# H0: lambda=1 (Beta) vs H1: lambda!=1 (Mc)
cat("\n=== Testing Beta vs Mc (adding lambda parameter) ===\n")
test_beta_mc <- anova(fit_beta, fit_mc)
print(test_beta_mc)
# Visualize model comparison
# Create dataframe summarizing all models
models_df <- data.frame(
Model = c("GKw", "BKw", "KKw", "EKw", "Mc", "Kw", "Beta"),
Parameters = c(
paste("alpha,beta,gamma,delta,lambda"),
paste("alpha,beta,gamma,delta"),
paste("alpha,beta,delta,lambda"),
paste("alpha,beta,lambda"),
paste("gamma,delta,lambda"),
paste("alpha,beta"),
paste("gamma,delta")
),
Param_count = c(5, 4, 4, 3, 3, 2, 2),
LogLik = c(
as.numeric(logLik(fit_gkw)),
as.numeric(logLik(fit_bkw)),
as.numeric(logLik(fit_kkw)),
as.numeric(logLik(fit_ekw)),
as.numeric(logLik(fit_mc)),
as.numeric(logLik(fit_kw)),
as.numeric(logLik(fit_beta))
),
AIC = c(
fit_gkw$AIC,
fit_bkw$AIC,
fit_kkw$AIC,
fit_ekw$AIC,
fit_mc$AIC,
fit_kw$AIC,
fit_beta$AIC
),
BIC = c(
fit_gkw$BIC,
fit_bkw$BIC,
fit_kkw$BIC,
fit_ekw$BIC,
fit_mc$BIC,
fit_kw$BIC,
fit_beta$BIC
)
)
# Sort by AIC
models_df <- models_df[order(models_df$AIC), ]
print(models_df)
# Create comprehensive visualization
# Plot showing model hierarchy and information criteria
p1 <- ggplot(models_df, aes(x = Param_count, y = LogLik, label = Model)) +
geom_point(size = 3) +
geom_text(vjust = -0.8) +
labs(
title = "Log-likelihood vs Model Complexity",
x = "Number of Parameters",
y = "Log-likelihood"
) +
theme_minimal()
# Create information criteria comparison
models_df_long <- tidyr::pivot_longer(
models_df,
cols = c("AIC", "BIC"),
names_to = "Criterion",
values_to = "Value"
)
p2 <- ggplot(models_df_long, aes(x = reorder(Model, -Value), y = Value, fill = Criterion)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Information Criteria Comparison",
x = "Model",
y = "Value (lower is better)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Print plots
print(p1 + p2)
# ============================================================================
# Manual LR tests to demonstrate underlying calculations
# ============================================================================
# Function to perform manual likelihood ratio test
manual_lr_test <- function(model_restricted, model_full, alpha = 0.05) {
# Extract log-likelihoods
ll_restricted <- as.numeric(logLik(model_restricted))
ll_full <- as.numeric(logLik(model_full))
# Calculate test statistic
lr_stat <- -2 * (ll_restricted - ll_full)
# Calculate degrees of freedom (parameter difference)
df <- length(coef(model_full)) - length(coef(model_restricted))
# Calculate p-value
p_value <- pchisq(lr_stat, df = df, lower.tail = FALSE)
# Return results
list(
lr_statistic = lr_stat,
df = df,
p_value = p_value,
significant = p_value < alpha,
critical_value = qchisq(1 - alpha, df = df)
)
}
# Example: Manual LR test for BKw vs GKw (testing lambda parameter)
cat("\n=== Manual LR test: BKw vs GKw ===\n")
lr_bkw_gkw <- manual_lr_test(fit_bkw, fit_gkw)
cat("LR statistic:", lr_bkw_gkw$lr_statistic, "\n")
cat("Degrees of freedom:", lr_bkw_gkw$df, "\n")
cat("P-value:", lr_bkw_gkw$p_value, "\n")
cat("Critical value (alpha=0.05):", lr_bkw_gkw$critical_value, "\n")
cat("Decision:", ifelse(lr_bkw_gkw$significant,
"Reject H0: Lambda is significantly different from 1",
"Fail to reject H0: Lambda is not significantly different from 1"
), "\n")
# Example: Manual LR test for Kw vs EKw (testing lambda parameter)
cat("\n=== Manual LR test: Kw vs EKw ===\n")
lr_kw_ekw <- manual_lr_test(fit_kw, fit_ekw)
cat("LR statistic:", lr_kw_ekw$lr_statistic, "\n")
cat("Degrees of freedom:", lr_kw_ekw$df, "\n")
cat("P-value:", lr_kw_ekw$p_value, "\n")
cat("Critical value (alpha=0.05):", lr_kw_ekw$critical_value, "\n")
cat("Decision:", ifelse(lr_kw_ekw$significant,
"Reject H0: Lambda is significantly different from 1",
"Fail to reject H0: Lambda is not significantly different from 1"
), "\n")
# ============================================================================
# Real data application with correct model nesting
# ============================================================================
if (requireNamespace("betareg", quietly = TRUE)) {
data("ReadingSkills", package = "betareg")
y <- ReadingSkills$accuracy
# Fit models
rs_gkw <- gkwfit(data = y, family = "gkw", plot = FALSE)
rs_bkw <- gkwfit(data = y, family = "bkw", plot = FALSE)
rs_kkw <- gkwfit(data = y, family = "kkw", plot = FALSE)
rs_kw <- gkwfit(data = y, family = "kw", plot = FALSE)
rs_beta <- gkwfit(data = y, family = "beta", plot = FALSE)
# Test nested models
cat("\n=== Real data: Testing BKw vs GKw (adding lambda) ===\n")
rs_test_bkw_gkw <- anova(rs_bkw, rs_gkw)
print(rs_test_bkw_gkw)
cat("\n=== Real data: Testing Kw vs KKw (adding delta and lambda) ===\n")
rs_test_kw_kkw <- anova(rs_kw, rs_kkw)
print(rs_test_kw_kkw)
# Compare non-nested models with information criteria
cat("\n=== Real data: Comparing non-nested Beta vs Kw ===\n")
rs_compare_beta_kw <- anova(rs_beta, rs_kw)
print(rs_compare_beta_kw)
# Summarize all models
cat("\n=== Real data: Model comparison summary ===\n")
models_rs <- c("GKw", "BKw", "KKw", "Kw", "Beta")
aic_values <- c(rs_gkw$AIC, rs_bkw$AIC, rs_kkw$AIC, rs_kw$AIC, rs_beta$AIC)
bic_values <- c(rs_gkw$BIC, rs_bkw$BIC, rs_kkw$BIC, rs_kw$BIC, rs_beta$BIC)
loglik_values <- c(
as.numeric(logLik(rs_gkw)),
as.numeric(logLik(rs_bkw)),
as.numeric(logLik(rs_kkw)),
as.numeric(logLik(rs_kw)),
as.numeric(logLik(rs_beta))
)
df_rs <- data.frame(
Model = models_rs,
LogLik = loglik_values,
AIC = aic_values,
BIC = bic_values
)
df_rs <- df_rs[order(df_rs$AIC), ]
print(df_rs)
# Determine the best model for the data
best_model <- df_rs$Model[which.min(df_rs$AIC)]
cat("\nBest model based on AIC:", best_model, "\n")
}
## End(Not run)