差异分析是统计学中最基础也是最重要的分析方法之一。我们运用多种统计检验方法,包括t检验、方差分析、非参数检验等,精确识别组间差异、分布差异和比例差异,为科学决策提供可靠依据。
某制药公司开发了一种新型降压药,需要通过临床试验验证其疗效是否显著优于现有标准治疗方案。研究涉及多个中心、不同人群,需要进行全面的差异分析。
| 数据类型 | 组数 | 分布假设 | 推荐方法 | 备选方法 |
|---|---|---|---|---|
| 连续变量 | 2组 | 正态分布 | 独立样本t检验 | Welch t检验 |
| 连续变量 | 2组 | 非正态分布 | Mann-Whitney U检验 | Permutation检验 |
| 连续变量 | 多组 | 正态分布 | 单因素ANOVA | Welch ANOVA |
| 连续变量 | 多组 | 非正态分布 | Kruskal-Wallis检验 | 多重比较 |
| 分类变量 | 2组 | - | 卡方检验 | Fisher精确检验 |
# 加载必要的包
library(dplyr)
library(ggplot2)
library(car)
library(effsize)
library(broom)
library(pwr)
# 读取临床试验数据
clinical_data <- read.csv("clinical_trial_data.csv")
# 数据预处理
clinical_data <- clinical_data %>%
mutate(
treatment_group = factor(treatment_group,
levels = c("control", "new_drug")),
gender = factor(gender),
age_group = factor(age_group),
# 计算血压降幅
sbp_reduction = baseline_sbp - week12_sbp,
dbp_reduction = baseline_dbp - week12_dbp
)
# 描述性统计
desc_stats <- clinical_data %>%
group_by(treatment_group) %>%
summarise(
n = n(),
sbp_mean = mean(sbp_reduction, na.rm = TRUE),
sbp_sd = sd(sbp_reduction, na.rm = TRUE),
sbp_median = median(sbp_reduction, na.rm = TRUE),
dbp_mean = mean(dbp_reduction, na.rm = TRUE),
dbp_sd = sd(dbp_reduction, na.rm = TRUE),
.groups = 'drop'
)
print("收缩压降幅描述性统计:")
print(desc_stats)
# 正态性检验
shapiro_control <- shapiro.test(clinical_data$sbp_reduction[clinical_data$treatment_group == "control"])
shapiro_treatment <- shapiro.test(clinical_data$sbp_reduction[clinical_data$treatment_group == "new_drug"])
cat("对照组正态性检验 p值:", shapiro_control$p.value, "\n")
cat("治疗组正态性检验 p值:", shapiro_treatment$p.value, "\n")
# 方差齐性检验
levene_test <- leveneTest(sbp_reduction ~ treatment_group, data = clinical_data)
cat("方差齐性检验 p值:", levene_test$`Pr(>F)`[1], "\n")
# 主要分析:独立样本t检验
t_test_result <- t.test(sbp_reduction ~ treatment_group,
data = clinical_data,
var.equal = TRUE,
conf.level = 0.95)
print("收缩压降幅t检验结果:")
print(t_test_result)
# 效应量计算
cohen_d <- cohen.d(clinical_data$sbp_reduction,
clinical_data$treatment_group)
cat("Cohen's d 效应量:", cohen_d$estimate, "\n")
# 非参数检验(备选分析)
wilcox_test <- wilcox.test(sbp_reduction ~ treatment_group,
data = clinical_data,
conf.int = TRUE)
print("Mann-Whitney U检验结果:")
print(wilcox_test)
# 功效分析
power_analysis <- pwr.t.test(n = nrow(clinical_data)/2,
d = cohen_d$estimate,
sig.level = 0.05,
type = "two.sample")
cat("检验功效:", power_analysis$power, "\n")
# 置信区间计算
mean_diff <- diff(desc_stats$sbp_mean)
se_diff <- sqrt(sum(desc_stats$sbp_sd^2 / desc_stats$n))
ci_lower <- mean_diff - 1.96 * se_diff
ci_upper <- mean_diff + 1.96 * se_diff
cat("均值差异:", round(mean_diff, 2), "\n")
cat("95% 置信区间: [", round(ci_lower, 2), ", ", round(ci_upper, 2), "]\n")
# 亚组分析:性别和年龄组的交互效应
# 双因素方差分析
anova_model <- aov(sbp_reduction ~ treatment_group * gender * age_group,
data = clinical_data)
# ANOVA结果
anova_summary <- summary(anova_model)
print(anova_summary)
# 效应量计算(偏eta平方)
eta_squared <- function(aov_model) {
aov_summary <- summary(aov_model)
ss_terms <- aov_summary[[1]]$`Sum Sq`
ss_total <- sum(ss_terms)
eta_sq <- ss_terms[-length(ss_terms)] / ss_total
names(eta_sq) <- rownames(aov_summary[[1]])[-length(ss_terms)]
return(eta_sq)
}
eta_sq_results <- eta_squared(anova_model)
print("偏eta平方效应量:")
print(eta_sq_results)
# 事后多重比较
library(emmeans)
emmeans_result <- emmeans(anova_model, ~ treatment_group | gender | age_group)
pairwise_comp <- pairs(emmeans_result, adjust = "bonferroni")
print("事后多重比较结果:")
print(pairwise_comp)
# 亚组效应量计算
subgroup_effects <- clinical_data %>%
group_by(gender, age_group) %>%
do(
t_test = t.test(sbp_reduction ~ treatment_group, data = .),
effect_size = cohen.d(.$sbp_reduction, .$treatment_group)
) %>%
mutate(
mean_diff = map_dbl(t_test, ~ .$estimate[1] - .$estimate[2]),
p_value = map_dbl(t_test, ~ .$p.value),
cohens_d = map_dbl(effect_size, ~ .$estimate),
ci_lower = map_dbl(t_test, ~ .$conf.int[1]),
ci_upper = map_dbl(t_test, ~ .$conf.int[2])
) %>%
select(-t_test, -effect_size)
print("亚组分析结果:")
print(subgroup_effects)
# 不良反应数据分析
adverse_events <- clinical_data %>%
select(treatment_group, headache, dizziness, fatigue, nausea) %>%
gather(key = "adverse_event", value = "occurred", -treatment_group) %>%
filter(!is.na(occurred))
# 各类不良反应的发生率
ae_summary <- adverse_events %>%
group_by(treatment_group, adverse_event) %>%
summarise(
total = n(),
events = sum(occurred),
rate = events / total,
.groups = 'drop'
)
print("不良反应发生率:")
print(ae_summary)
# 卡方检验比较各类不良反应
chi_square_results <- adverse_events %>%
group_by(adverse_event) %>%
do(
chi_test = chisq.test(table(.$treatment_group, .$occurred)),
fisher_test = fisher.test(table(.$treatment_group, .$occurred))
) %>%
mutate(
chi_p_value = map_dbl(chi_test, ~ .$p.value),
fisher_p_value = map_dbl(fisher_test, ~ .$p.value),
odds_ratio = map_dbl(fisher_test, ~ .$estimate)
) %>%
select(-chi_test, -fisher_test)
print("不良反应统计检验结果:")
print(chi_square_results)
# 总体不良反应发生率比较
overall_ae <- clinical_data %>%
mutate(any_ae = ifelse(headache == 1 | dizziness == 1 | fatigue == 1 | nausea == 1, 1, 0))
ae_table <- table(overall_ae$treatment_group, overall_ae$any_ae)
overall_chi_test <- chisq.test(ae_table)
overall_fisher_test <- fisher.test(ae_table)
cat("总体不良反应发生率比较:\n")
cat("卡方检验 p值:", overall_chi_test$p.value, "\n")
cat("Fisher精确检验 p值:", overall_fisher_test$p.value, "\n")
cat("比值比:", overall_fisher_test$estimate, "\n")
cat("95% 置信区间: [", overall_fisher_test$conf.int[1], ", ",
overall_fisher_test$conf.int[2], "]\n")
| 指标 | 新药组 (n=156) | 对照组 (n=152) | 均值差异 | 95% CI | p值 | 效应量 |
|---|---|---|---|---|---|---|
| 收缩压降幅 (mmHg) | 18.5 ± 8.2 | 12.3 ± 7.6 | 6.2 | [4.1, 8.3] | <0.001 | d = 0.79 |
| 舒张压降幅 (mmHg) | 11.2 ± 5.4 | 8.1 ± 5.1 | 3.1 | [1.8, 4.4] | <0.001 | d = 0.59 |
| 达标率 (%) | 78.2 | 58.6 | 19.6% | [9.2%, 30.0%] | <0.001 | OR = 2.51 |
| 亚组 | 样本量 | 均值差异 | 95% CI | p值 | 效应量 |
|---|---|---|---|---|---|
| 男性,<65岁 | 89 | 7.8 | [4.2, 11.4] | <0.001 | d = 0.95 |
| 男性,≥65岁 | 76 | 5.1 | [1.8, 8.4] | 0.003 | d = 0.68 |
| 女性,<65岁 | 82 | 6.9 | [3.5, 10.3] | <0.001 | d = 0.81 |
| 女性,≥65岁 | 61 | 4.2 | [0.9, 7.5] | 0.013 | d = 0.52 |
新药在收缩压和舒张压降幅方面均显著优于对照药物,效应量达到中等到大的水平,临床意义明确。
年轻患者对新药的反应更好,男性患者的治疗效果略优于女性,为个性化治疗提供依据。
新药组不良反应发生率与对照组无显著差异,安全性良好,支持临床应用。
新药显著提高血压达标率,有望改善患者长期预后,具有重要的临床和社会价值。
主要终点达到预设的统计学显著性标准,结果可靠。
收缩压降幅超过5mmHg的临床意义阈值,具有实际临床价值。
Cohen's d达到大效应量标准,治疗效果显著且稳定。
让我们的统计专家团队帮助您准确识别和量化各种差异
立即咨询