R_homework_03

Questions

Codes

# # Rstudio Server URL: http://202.195.187.9:8787/
# 账号密码与Linux一致
# 切换R路径
.libPaths("/home/biotools/anaconda3/envs/R4.3.1/lib/R/")
################################################################################
# 加载相关R包
library(survival)
library(survminer)
library(lmtest)

# 加载数据集
data(lung)

# 创建年龄组
lung$age_group = cut(lung$age, breaks = c(-Inf, 55, 65, Inf),
                     labels = c("<=55", "56-65", ">65"))

# 创建生存对象
surv_obj = Surv(time = lung$time, event = lung$status)

# 绘制生存曲线图
fit = survfit(surv_obj ~ age_group, data = lung)
ggsurvplot(fit, data = lung, pval = TRUE, conf.int = TRUE,
           surv.median.line = "hv",risk.table = TRUE,
           title = "Different Age Groups (Lung)",
           xlab = "Time", ylab = "Survival")

################################################################################
## 建立生存时间与age/sex/ph.ecog/wt.loss变量的参数回归模型
# 指数回归模型
exp_model = survreg(surv_obj ~ age + sex + ph.ecog + wt.loss, data = lung, dist = "exponential")
summary(exp_model)

# Weibull回归模型
weibull_model = survreg(surv_obj ~ age + sex + ph.ecog + wt.loss, data = lung, dist = "weibull")
summary(weibull_model)

# 进行似然比检验
lrtest(exp_model, weibull_model)

## 检验是否满足比例风险要求
cox_model = coxph(surv_obj ~ age + sex + ph.ecog + wt.loss, data = lung)
summary(cox_model)
cox.zph(cox_model) #ph

## 建立Cox回归模型
hr = exp(coef(cox_model))
ci = exp(confint(cox_model))
result = cbind(hr, ci)

colnames(result) = c("HR", "Lower 95% CI", "Upper 95% CI")
result

Output

> summary(exp_model)

Call:
survreg(formula = surv_obj ~ age + sex + ph.ecog + wt.loss, data = lung, 
    dist = "exponential")
               Value Std. Error     z       p
(Intercept)  6.45081    0.64549  9.99 < 2e-16
age         -0.01215    0.00954 -1.27 0.20245
sex          0.54170    0.17488  3.10 0.00195
ph.ecog     -0.43799    0.12313 -3.56 0.00038
wt.loss      0.00725    0.00653  1.11 0.26649

Scale fixed at 1 

Exponential distribution
Loglik(model)= -1059.1   Loglik(intercept only)= -1071.7
	Chisq= 25.21 on 4 degrees of freedom, p= 4.6e-05 
Number of Newton-Raphson Iterations: 4 
n=213 (15 observations deleted due to missingness)

########################################################################
> summary(weibull_model)

Call:
survreg(formula = surv_obj ~ age + sex + ph.ecog + wt.loss, data = lung, 
    dist = "weibull")
               Value Std. Error     z       p
(Intercept)  6.34120    0.46094 13.76 < 2e-16
age         -0.00889    0.00688 -1.29   0.196
sex          0.41616    0.12698  3.28   0.001
ph.ecog     -0.36643    0.09040 -4.05 5.0e-05
wt.loss      0.00613    0.00473  1.30   0.194
Log(scale)  -0.33381    0.06375 -5.24 1.6e-07

Scale= 0.716 

Weibull distribution
Loglik(model)= -1047.5   Loglik(intercept only)= -1062.6
	Chisq= 30.14 on 4 degrees of freedom, p= 4.6e-06 
Number of Newton-Raphson Iterations: 5 
n=213 (15 observations deleted due to missingness)

############################################################################
# 进行似然比检验
> lrtest(exp_model, weibull_model)
Likelihood ratio test

Model 1: surv_obj ~ age + sex + ph.ecog + wt.loss
Model 2: surv_obj ~ age + sex + ph.ecog + wt.loss
  #Df  LogLik Df  Chisq Pr(>Chisq)    
1   5 -1059.1                         
2   6 -1047.5  1 23.153  1.496e-06 ***
---
Signif. codes:  0***0.001**0.01*0.05 ‘.’ 0.1 ‘ ’ 1

############################################################################
> cox_model = coxph(surv_obj ~ age + sex + ph.ecog + wt.loss, data = lung)
> summary(cox_model)
Call:
coxph(formula = surv_obj ~ age + sex + ph.ecog + wt.loss, data = lung)

  n= 213, number of events= 151 
   (15 observations deleted due to missingness)

             coef exp(coef)  se(coef)      z Pr(>|z|)    
age      0.013369  1.013459  0.009628  1.389 0.164951    
sex     -0.590775  0.553898  0.175339 -3.369 0.000754 ***
ph.ecog  0.515111  1.673824  0.125988  4.089 4.34e-05 ***
wt.loss -0.009006  0.991034  0.006658 -1.353 0.176135    
---
Signif. codes:  0***0.001**0.01*0.05 ‘.’ 0.1 ‘ ’ 1

        exp(coef) exp(-coef) lower .95 upper .95
age        1.0135     0.9867    0.9945    1.0328
sex        0.5539     1.8054    0.3928    0.7811
ph.ecog    1.6738     0.5974    1.3076    2.1427
wt.loss    0.9910     1.0090    0.9782    1.0041

Concordance= 0.647  (se = 0.026 )
Likelihood ratio test= 31.02  on 4 df,   p=3e-06
Wald test            = 29.94  on 4 df,   p=5e-06
Score (logrank) test = 30.65  on 4 df,   p=4e-06

############################################################################
> cox.zph(cox_model) #ph
         chisq df    p
age     0.4353  1 0.51
sex     2.6731  1 0.10
ph.ecog 1.6355  1 0.20
wt.loss 0.0457  1 0.83
GLOBAL  4.7516  4 0.31

############################################################################
> result
               HR Lower 95% CI Upper 95% CI
age     1.0134589    0.9945143    1.0327643
sex     0.5538981    0.3928084    0.7810504
ph.ecog 1.6738244    1.3075807    2.1426504
wt.loss 0.9910344    0.9781867    1.0040508

Results

  • 指数回归的对数似然值为 -1059.1。Weibull回归的对数似然值为 -1047.5。对数似然值越大,模型拟合数据的效果越好。因此,Weibull回归拟合效果更好。

  • Cox回归

Variable HR Lower 95% CI Upper 95% CI
age 1.0134589 0.9945143 1.0327643
sex 0.5538981 0.3928084 0.7810504
ph.ecog 1.6738244 1.3075807 2.1426504
wt.loss 0.9910344 0.9781867 1.0040508
  • 结论:性别(sex)和ECOG评分(ph.ecog)对生存时间有显著影响,而年龄(age)和体重减轻(wt.loss)的影响不显著。

Contents