# ANALYSE DE SURVIE
# Chargement des données -----
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3.9000 v purrr 0.3.4
## v tibble 3.1.1 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: package 'tibble' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(labelled)
library(questionr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(survival)
library(survminer)
## Loading required package: ggpubr
## Warning: replacing previous import 'ggplot2:::=' by 'data.table:::=' when
## loading 'survMisc'
data(fecondite)
# Recodages -------
## durée d'observation
enfants <- enfants %>%
left_join(
femmes %>% select(id_femme, date_entretien),
by = "id_femme"
)
enfants <- enfants %>%
mutate(
duree_observation = interval(date_naissance, date_entretien) %>%
time_length(unit = "month")
)
enfants <- enfants %>%
mutate(
date_entretien = date_entretien %>%
recode_if(
date_entretien < date_naissance,
date_entretien %m+% months(1)
),
duree_observation = interval(date_naissance, date_entretien) %>%
time_length(unit = "month"),
duree_observation_mois_revolus = trunc(duree_observation)
)
## age au décès imputation
enfants <- enfants %>%
mutate(
age_deces_impute = age_deces + runif(n())
)
## variables de survie
enfants <- enfants %>%
mutate(
deces = if_else(survie == 0, 1, 0),
time = if_else(
survie == 0,
age_deces_impute,
duree_observation
),
time_revolus = if_else(
survie == 0,
age_deces,
duree_observation_mois_revolus
)
) %>%
set_variable_labels(deces = "Est décédé ?") %>%
set_value_labels(deces = c("en vie" = 0, "décédé" = 1))
## variables explicatives
enfants <- enfants %>%
left_join(
femmes %>%
select(id_femme, id_menage, milieu, educ, date_naissance_mere = date_naissance, nb_enf_ideal),
by = "id_femme"
) %>%
left_join(
menages %>%
select(id_menage, structure, richesse),
by = "id_menage"
)
enfants <- enfants %>%
mutate(
sexe = to_factor(sexe),
richesse = to_factor(richesse),
milieu = to_factor(milieu),
structure = to_factor(structure) %>%
fct_drop() %>%
fct_relevel("deux adultes de sexe opposé"),
educ2 = to_factor(educ) %>%
fct_recode(
"secondaire et sup" = "secondaire",
"secondaire et sup" = "supérieur"
),
age_mere_naissance = interval(date_naissance_mere, date_naissance) %>%
time_length(unit = "years")
)
enfants$gp_age_mere_naissance <- cut(enfants$age_mere_naissance,
include.lowest = TRUE,
right = FALSE,
breaks = c(-Inf, 20, 30, Inf)
)
enfants$gp_age_mere_naissance <- enfants$gp_age_mere_naissance %>%
fct_recode(
"19 ans ou moins" = "[-Inf,20)",
"20-29 ans" = "[20,30)",
"30 ans ou plus" = "[30, Inf]"
)
enfants <- enfants %>%
arrange(id_femme, date_naissance) %>%
group_by(id_femme) %>%
mutate(
rang = rank(date_naissance, ties.method = "max"),
nb_enf_ideal = nb_enf_ideal %>% user_na_to_na() %>% unclass(), # transformer les valeurs indiquées manquantes en NA
rang_apres_ideal = if_else(rang > nb_enf_ideal, "oui", "non") %>%
factor(levels = c("non", "oui"))
) %>%
set_variable_labels(rang_apres_ideal = "Rang de naissance plus élevé qu'idéal")
# Courbe de Kaplan-Meier -----
km_globale <- survfit(Surv(time, deces) ~ 1, data = enfants)
ggsurvplot(km_globale)
ggsurvplot(km_globale, fun = "event", risk.table = TRUE, surv.scale = "percent", break.time.by = 12)
km_globale_revolus <- survfit(Surv(time_revolus, deces) ~ 1, data = enfants)
ggsurvplot(km_globale_revolus, fun = "event", risk.table = TRUE, surv.scale = "percent", break.time.by = 12)
km_sexe <- survfit(Surv(time, deces) ~ sexe, data = enfants)
survdiff(Surv(time, deces) ~ sexe, data = enfants)
## Call:
## survdiff(formula = Surv(time, deces) ~ sexe, data = enfants)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sexe=masculin 762 94 66.3 11.6 21.8
## sexe=féminin 822 48 75.7 10.2 21.8
##
## Chisq= 21.8 on 1 degrees of freedom, p= 3e-06
ggsurvplot(km_sexe, fun = "event", risk.table = TRUE, surv.scale = "percent", break.time.by = 12, pval = TRUE)
km_sexe_age <- survfit(Surv(time, deces) ~ sexe + gp_age_mere_naissance, data = enfants)
ggsurvplot(km_sexe_age, fun = "event", risk.table = TRUE, surv.scale = "percent", break.time.by = 12, pval = TRUE)
## Modèle de Cox
mod1 <- coxph(
Surv(time, deces) ~ sexe + milieu + richesse +
structure + educ2 + gp_age_mere_naissance + rang_apres_ideal,
data = enfants
)
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.0.5
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
tbl <- tbl_regression(mod1, exponentiate = TRUE)
tbl %>%
add_global_p(keep = TRUE) %>%
add_significance_stars(hide_p = FALSE)
## add_global_p: Global p-values for variable(s) `add_global_p(include =
## c("sexe", "milieu", "richesse", "structure", "educ2", "gp_age_mere_naissance",
## "rang_apres_ideal"))` were calculated with
## `car::Anova(x$model_obj, type = "III")`
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
Characteristic | HR1,2 | SE2 | p-value |
---|---|---|---|
Sexe de l'enfant | <0.001 | ||
masculin | — | — | |
féminin | 0.44*** | 0.178 | <0.001 |
Milieu de résidence | 0.013 | ||
urbain | — | — | |
rural | 1.92* | 0.270 | 0.015 |
Niveau de vie (quintiles) | 0.5 | ||
très pauvre | — | — | |
pauvre | 0.93 | 0.251 | 0.8 |
moyen | 1.38 | 0.248 | 0.2 |
riche | 1.42 | 0.298 | 0.2 |
très riche | 1.59 | 0.429 | 0.3 |
Structure démographique du ménage | 0.6 | ||
deux adultes de sexe opposé | — | — | |
un adulte | 0.87 | 0.600 | 0.8 |
deux adultes de même sexe | 1.83 | 0.377 | 0.11 |
trois adultes ou plus avec lien de parenté | 1.05 | 0.197 | 0.8 |
adultes sans lien de parenté | 0.88 | 0.305 | 0.7 |
Niveau d'éducation | 0.8 | ||
aucun | — | — | |
primaire | 0.97 | 0.206 | 0.9 |
secondaire et sup | 0.82 | 0.367 | 0.6 |
gp_age_mere_naissance | 0.5 | ||
19 ans ou moins | — | — | |
20-29 ans | 1.36 | 0.268 | 0.2 |
30 ans ou plus | 1.36 | 0.291 | 0.3 |
Rang de naissance plus élevé qu'idéal | 0.054 | ||
non | — | — | |
oui | 4.04* | 0.603 | 0.021 |
1
*p<0.05; **p<0.01; ***p<0.001
2
HR = Hazard Ratio, SE = Standard Error
|
plot(tbl)
ggcoef_model(mod1, exponentiate = TRUE)
mod2 <- step(mod1)
## Start: AIC=2026.96
## Surv(time, deces) ~ sexe + milieu + richesse + structure + educ2 +
## gp_age_mere_naissance + rang_apres_ideal
##
## Df AIC
## - structure 4 2021.9
## - richesse 4 2022.6
## - educ2 2 2023.3
## - gp_age_mere_naissance 2 2024.5
## <none> 2027.0
## - rang_apres_ideal 1 2028.7
## - milieu 1 2031.2
## - sexe 1 2047.1
##
## Step: AIC=2021.87
## Surv(time, deces) ~ sexe + milieu + richesse + educ2 + gp_age_mere_naissance +
## rang_apres_ideal
##
## Df AIC
## - richesse 4 2016.9
## - educ2 2 2018.1
## - gp_age_mere_naissance 2 2019.3
## <none> 2021.9
## - rang_apres_ideal 1 2023.4
## - milieu 1 2025.4
## - sexe 1 2042.1
##
## Step: AIC=2016.86
## Surv(time, deces) ~ sexe + milieu + educ2 + gp_age_mere_naissance +
## rang_apres_ideal
##
## Df AIC
## - educ2 2 2013.1
## - gp_age_mere_naissance 2 2014.7
## <none> 2016.9
## - rang_apres_ideal 1 2018.0
## - milieu 1 2018.6
## - sexe 1 2037.4
##
## Step: AIC=2013.13
## Surv(time, deces) ~ sexe + milieu + gp_age_mere_naissance + rang_apres_ideal
##
## Df AIC
## - gp_age_mere_naissance 2 2011.0
## <none> 2013.1
## - rang_apres_ideal 1 2014.3
## - milieu 1 2015.6
## - sexe 1 2033.6
##
## Step: AIC=2011.03
## Surv(time, deces) ~ sexe + milieu + rang_apres_ideal
##
## Df AIC
## <none> 2011.0
## - rang_apres_ideal 1 2012.3
## - milieu 1 2013.8
## - sexe 1 2031.4
ggcoef_model(mod2, exponentiate = TRUE)
ggforest(mod2)
## Warning in .get_data(model, data = data): The `data` argument is not provided.
## Data will be extracted from model fit.
## Validité du modèle
test <- cox.zph(mod2)
test
## chisq df p
## sexe 0.2510 1 0.62
## milieu 0.0129 1 0.91
## rang_apres_ideal 0.7758 1 0.38
## GLOBAL 1.0467 3 0.79
ggcoxzph(test)
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values