References
Load data
## Load survival package
library(survival)
## List datasets in survival package
data(package = "survival")
## Load lung data
data(lung)
## Show first 6 rows
head(lung)
inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
1 3 306 2 74 1 1 90 100 1175 NA
2 3 455 2 68 1 0 90 90 1225 15
3 3 1010 1 56 1 0 90 90 NA 15
4 5 210 2 57 1 1 90 60 1150 11
5 1 883 2 60 1 0 100 90 NA 0
6 12 1022 1 74 1 1 50 80 513 0
Coding (from help file)
NCCTG Lung Cancer Data
Description:
Survival in patients with advanced lung cancer from the North
Central Cancer Treatment Group. Performance scores rate how well
the patient can perform usual daily activities.
inst: Institution code
time: Survival time in days
status: censoring status 1=censored, 2=dead
age: Age in years
sex: Male=1 Female=2
ph.ecog: ECOG performance score (0=good 5=dead)
ph.karno: Karnofsky performance score (bad=0-good=100) rated by physician
pat.karno: Karnofsky performance score as rated by patient
meal.cal: Calories consumed at meals
wt.loss: Weight loss in last six months
Create a survival object
## Add survival object. status == 2 is death
lung$SurvObj <- with(lung, Surv(time, status == 2))
## Check data
head(lung)
inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss SurvObj
1 3 306 2 74 1 1 90 100 1175 NA 306
2 3 455 2 68 1 0 90 90 1225 15 455
3 3 1010 1 56 1 0 90 90 NA 15 1010+
4 5 210 2 57 1 1 90 60 1150 11 210
5 1 883 2 60 1 0 100 90 NA 0 883
6 12 1022 1 74 1 1 50 80 513 0 1022+
Kaplan-Meier estimator without grouping
## Kaplan-Meier estimator. The "log-log" confidence interval is preferred.
km.as.one <- survfit(SurvObj ~ 1, data = lung, conf.type = "log-log")
km.by.sex <- survfit(SurvObj ~ sex, data = lung, conf.type = "log-log")
## Show object
km.as.one
Call: survfit(formula = SurvObj ~ 1, data = lung, conf.type = "log-log")
records n.max n.start events median 0.95LCL 0.95UCL
228 228 228 165 310 284 361
km.by.sex
Call: survfit(formula = SurvObj ~ sex, data = lung, conf.type = "log-log")
records n.max n.start events median 0.95LCL 0.95UCL
sex=1 138 138 138 112 270 210 306
sex=2 90 90 90 53 426 345 524
## See survival estimates at given time (lots of outputs)
## summary(km.as.one)
## summary(km.by.sex)
## Plotting without any specification
plot(km.as.one)
plot(km.by.sex)
## Without
plot(km.as.one, conf = F, mark.time = F)
plot(km.by.sex, conf = F, mark.time = F)
Cox regression using coxph
## Fit Cox regression: age, sex, Karnofsky performance score, wt loss
res.cox1 <- coxph(SurvObj ~ age + sex + ph.karno + wt.loss, data = lung)
res.cox1
Call:
coxph(formula = SurvObj ~ age + sex + ph.karno + wt.loss, data = lung)
coef exp(coef) se(coef) z p
age 0.01514 1.015 0.00984 1.539 0.1200
sex -0.51396 0.598 0.17441 -2.947 0.0032
ph.karno -0.01287 0.987 0.00618 -2.081 0.0370
wt.loss -0.00225 0.998 0.00636 -0.353 0.7200
Likelihood ratio test=18.8 on 4 df, p=0.000844 n= 214, number of events= 152
(14 observations deleted due to missingness)
## Check for violation of proportional hazard (constant HR over time)
(res.zph1 <- cox.zph(res.cox1))
rho chisq p
age -0.00837 0.0117 0.91381
sex 0.13137 2.5579 0.10975
ph.karno 0.23963 8.2624 0.00405
wt.loss 0.05930 0.5563 0.45575
GLOBAL NA 12.0669 0.01686
## Displays a graph of the scaled Schoenfeld residuals, along with a smooth curve.
plot(res.zph1)
## residuals(res.cox1, type = "scaledsch")
## c("martingale", "deviance", "score", "schoenfeld", "dfbeta", "dfbetas", "scaledsch","partial")
## Check Karnofsky performance score (only 6 discrete values)
table(lung$ph.karno)
50 60 70 80 90 100
6 19 32 67 74 29
## Solution 1: Stratify
res.cox1.strata <- coxph(SurvObj ~ age + sex + strata(ph.karno) + wt.loss, data = lung)
cox.zph(res.cox1.strata)
rho chisq p
age 0.0320 0.168 0.682
sex 0.1264 2.261 0.133
wt.loss 0.0469 0.346 0.557
GLOBAL NA 2.619 0.454
summary(res.cox1.strata)
Call:
coxph(formula = SurvObj ~ age + sex + strata(ph.karno) + wt.loss,
data = lung)
n= 214, number of events= 152
(14 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
age 0.01825 1.01842 0.01002 1.82 0.06854 .
sex -0.60333 0.54699 0.18037 -3.34 0.00082 ***
wt.loss -0.00536 0.99465 0.00669 -0.80 0.42288
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
age 1.018 0.982 0.999 1.039
sex 0.547 1.828 0.384 0.779
wt.loss 0.995 1.005 0.982 1.008
Concordance= 0.615 (se = 0.057 )
Rsquare= 0.069 (max possible= 0.983 )
Likelihood ratio test= 15.4 on 3 df, p=0.00152
Wald test = 14.6 on 3 df, p=0.00221
Score (logrank) test = 14.9 on 3 df, p=0.00187
## Solution 2: Time-varying effect
## Event status variable necessary
lung$event <- (lung$status == 2)
## Erase the survival object (Leaving it in data frames breaks the conversion)
lung2$SurvObj <- NULL
Error: object 'lung2' not found
## Counting process format creation
lung.split <- survSplit(data = lung,
cut = c(200,400), # vector of timepoints to cut at
end = "time", # character string with name of event time variable
event = "event", # character string with name of censoring indicator
start = "start", # character string with name of start time variable (created)
id = "id", # character string with name of new id variable to create
zero = 0, # If start doesn't already exist, used as start
episode = NULL # character string with name of new episode variable (optional)
)
## Make id numeric
lung.split$id <- as.numeric(lung.split$id)
## Reorder by id, then start time
library(doBy)
lung.split <- orderBy( ~ id + start, lung.split)
## Recreate SurbObj
lung.split$SurvObj <- with(lung.split, Surv(time = (start), time2 = time, event = event))
## Check
head(lung.split, 10)
inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss SurvObj event start id
1 3 200 2 74 1 1 90 100 1175 NA ( 0,200+] 0 0 1
685 3 200 2 74 1 1 90 100 1175 NA ( 0,200+] 0 0 1
229 3 306 2 74 1 1 90 100 1175 NA (200,306] 1 200 1
913 3 306 2 74 1 1 90 100 1175 NA (200,306] 1 200 1
2 3 200 2 68 1 0 90 90 1225 15 ( 0,200+] 0 0 2
686 3 200 2 68 1 0 90 90 1225 15 ( 0,200+] 0 0 2
230 3 400 2 68 1 0 90 90 1225 15 (200,400+] 0 200 2
914 3 400 2 68 1 0 90 90 1225 15 (200,400+] 0 200 2
458 3 455 2 68 1 0 90 90 1225 15 (400,455] 1 400 2
1142 3 455 2 68 1 0 90 90 1225 15 (400,455] 1 400 2
## Time-varying effect of baseline variable by including interaction with interval
res.cox1.strata <- coxph(SurvObj ~ age + sex + ph.karno + ph.karno:factor(start) + wt.loss + cluster(id),
data = lung.split)
summary(res.cox1.strata)
Call:
coxph(formula = SurvObj ~ age + sex + ph.karno + ph.karno:factor(start) +
wt.loss + cluster(id), data = lung.split)
n= 822, number of events= 304
(36 observations deleted due to missingness)
coef exp(coef) se(coef) robust se z Pr(>|z|)
age 0.01582 1.01595 0.00700 0.00994 1.59 0.11137
sex -0.53285 0.58693 0.12325 0.16664 -3.20 0.00139 **
ph.karno -0.03335 0.96720 0.00688 0.01003 -3.32 0.00089 ***
wt.loss -0.00239 0.99761 0.00451 0.00670 -0.36 0.72163
ph.karno:factor(start)200 0.02226 1.02251 0.01008 0.01203 1.85 0.06412 .
ph.karno:factor(start)400 0.04180 1.04268 0.01032 0.01317 3.17 0.00150 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
age 1.016 0.984 0.996 1.036
sex 0.587 1.704 0.423 0.814
ph.karno 0.967 1.034 0.948 0.986
wt.loss 0.998 1.002 0.985 1.011
ph.karno:factor(start)200 1.023 0.978 0.999 1.047
ph.karno:factor(start)400 1.043 0.959 1.016 1.070
Concordance= 0.645 (se = 0.019 )
Rsquare= 0.065 (max possible= 0.978 )
Likelihood ratio test= 54.8 on 6 df, p=5.07e-10
Wald test = 26.6 on 6 df, p=0.000176
Score (logrank) test = 56.5 on 6 df, p=2.33e-10, Robust = 22.9 p=0.000827
(Note: the likelihood ratio and score tests assume independence of
observations within a cluster, the Wald and robust score tests do not).
rms::survplot() function
## Load rms package
library(rms)
## without specification
survplot(km.by.sex)
## Minimum decent
survplot(km.by.sex,
conf = "none")
## Full-fledged grammer
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Survival",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
## fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 100, # time increment
dots = TRUE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
## y.n.risk = 0, cex.n.risk = 0.6
)
## Without dots
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Survival",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
## fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 100, # time increment
dots = FALSE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
## y.n.risk = 0, cex.n.risk = 0.6
)
## Different time incremente
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Survival",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
## fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 300, # time increment
dots = FALSE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
## y.n.risk = 0, cex.n.risk = 0.6
)
## Plot cumulative probability F(t) = 1 - S(t)
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Cumulative Incidence",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 100, # time increment
dots = FALSE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
## y.n.risk = 0, cex.n.risk = 0.6
)
## Change position of number at risk
survplot(fit = km.by.sex,
conf = c("none","bands","bars")[1],
xlab = "", ylab = "Survival",
## xlim(0,100),
label.curves = TRUE, # label curves directly
## label.curves = list(keys = "lines"), # legend instead of direct label
levels.only = FALSE, # show only levels, no label
abbrev.label = FALSE, # if label used, abbreviate
## fun = function(x) {1 - x}, # Cumulative probability plot
loglog = FALSE, # log(-log Survival) plot
logt = FALSE, # log time
time.inc = 100, # time increment
dots = FALSE, # dot grid
n.risk = TRUE, # show number at risk
## srt.n.risk = 0, sep.n.risk = 0.056, adj.n.risk = 1,
y.n.risk = -0.2, cex.n.risk = 0.6
)
Posted by Chathurika Chandrapala
No comments:
Post a Comment