library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3.9000 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(labelled)
library(questionr)
library(gtsummary)
## #Uighur
library(viridis)
## Le chargement a nécessité le package : viridisLite
library(scales)
##
## Attachement du package : 'scales'
## L'objet suivant est masqué depuis 'package:viridis':
##
## viridis_pal
## L'objet suivant est masqué depuis 'package:purrr':
##
## discard
## L'objet suivant est masqué depuis 'package:readr':
##
## col_factor
load("care_trajectories.RData")
care_trajectories <- as_tibble(care_trajectories)
var_label(care_trajectories$sex) <- "Sexe"
val_labels(care_trajectories$sex) <- c(homme = 0, femme = 1)
var_label(care_trajectories$age) <- "Âge"
var_label(care_trajectories$education) <- "Education"
val_labels(care_trajectories$education) <- c(
primaire = 1,
secondaire = 2,
supérieur = 3
)
val_labels(care_trajectories$care_status) <- c(
"diagnostiqué, mais pas suivi" = "D",
"suivi, mais pas sous traitement" = "C",
"sous traitement, mais infection non contrôlée" = "T",
"sous traitement et infection contrôlée" = "S"
)
care_trajectories <- care_trajectories %>%
set_variable_labels(
id = "Identifiant Patient",
month = "Mois depuis la diagnostic",
care_status = "Statut dans les soins",
wealth = "Niveau de richesse",
distance_clinic = "Distance à la clinique la plus proche"
) %>%
set_value_labels(
wealth = c(bas = 1, moyen = 2, haut = 3),
distance_clinic = c("moins de 10 km" = 1, "10 km ou plus" = 2)
)
care_trajectories %>%
unlabelled() %>%
tbl_summary()
Characteristic | N = 49,3651 |
---|---|
Identifiant Patient | 4,961 (2,481, 7,532) |
Mois depuis la diagnostic | 9 (4, 16) |
Statut dans les soins | |
diagnostiqué, mais pas suivi | 25,374 (51%) |
suivi, mais pas sous traitement | 5,886 (12%) |
sous traitement, mais infection non contrôlée | 4,596 (9.3%) |
sous traitement et infection contrôlée | 13,509 (27%) |
Sexe | |
homme | 17,781 (36%) |
femme | 31,584 (64%) |
Âge | |
16-29 | 16,911 (34%) |
30-59 | 29,365 (59%) |
60+ | 3,089 (6.3%) |
Education | |
primaire | 10,417 (21%) |
secondaire | 19,024 (39%) |
supérieur | 19,924 (40%) |
Niveau de richesse | |
bas | 15,432 (31%) |
moyen | 20,769 (42%) |
haut | 13,164 (27%) |
Distance à la clinique la plus proche | |
moins de 10 km | 26,804 (54%) |
10 km ou plus | 22,561 (46%) |
1
Median (IQR); n (%)
|
Le fichier contient en tout 49365 lignes (une par individu et par mois de suivi). Il correspond à 2929 individus différents.
n <- care_trajectories %>%
filter(month %in% (0:8*6)) %>%
group_by(month) %>%
count() %>%
pluck("n")
etiquettes <- paste0("M", 0:8*6, "\n(n=", n, ")")
ggplot(care_trajectories) +
aes(x = month, fill = to_factor(care_status)) +
geom_bar(width = 1, color = "gray50") +
scale_fill_viridis(discrete = TRUE, direction = -1) +
xlab("") + ylab("") +
scale_x_continuous(breaks = 0:8*6, labels = etiquettes) +
labs(fill = "Statut dans les soins") +
ggtitle("Distribution du statut dans les soins chaque mois") +
guides(fill = guide_legend(nrow = 2)) +
theme_light() +
theme(legend.position = "bottom")
ggplot(care_trajectories %>% filter(month <= 36)) +
aes(x = month, fill = to_factor(care_status)) +
geom_bar(width = 1, color = "gray50", position = "fill") +
scale_fill_viridis(discrete = TRUE, direction = -1) +
xlab("") + ylab("") +
scale_x_continuous(breaks = 0:8*6, labels = etiquettes) +
scale_y_continuous(labels = percent, breaks = 0:5/5) +
labs(fill = "Statut dans les soins") +
ggtitle("Cascade des soins observée, selon le temps depuis le diagnostic") +
guides(fill = guide_legend(nrow = 2)) +
theme_light() +
theme(legend.position = "bottom")
ind <- care_trajectories %>% filter(month == 0)
ind$diagnostic <- 0
ind <- ind %>%
left_join(
care_trajectories %>%
filter(care_status %in% c("C", "T", "S")) %>%
group_by(id) %>%
summarise(entree_soins = min(month)),
by = "id"
) %>%
left_join(
care_trajectories %>%
filter(care_status %in% c("T", "S")) %>%
group_by(id) %>%
summarise(initiation_tt = min(month)),
by = "id"
) %>%
left_join(
care_trajectories %>%
filter(care_status == "S") %>%
group_by(id) %>%
summarise(controle = min(month)),
by = "id"
) %>%
left_join(
care_trajectories %>%
group_by(id) %>%
summarise(suivi = max(month)),
by = "id"
)
library(survival)
library(broom)
tmp <- ind %>% filter(!is.na(entree_soins))
tmp$event <- FALSE
tmp$time <- tmp$suivi - tmp$entree_soins
tmp$event <- !is.na(tmp$initiation_tt)
tmp$time[tmp$event] <- tmp$initiation_tt[tmp$event] - tmp$entree_soins[tmp$event]
kaplan <- survfit(Surv(time, event) ~ 1 , data = tmp)
res <- tidy(kaplan, conf.int = TRUE)
ggplot(res) +
aes(x = time, y = estimate) +
geom_line()
km <- function(date_origine, date_evenement, nom = ""){
library(survival)
library(broom)
tmp <- ind
tmp <- tmp %>% filter(!is.na(tmp[[date_origine]]))
tmp$event <- FALSE
tmp$time <- tmp$suivi - tmp[[date_origine]]
tmp$event <- !is.na(tmp[[date_evenement]])
tmp$time[tmp$event] <- tmp[[date_evenement]][tmp$event] - tmp[[date_origine]][tmp$event]
kaplan <- survfit(Surv(time, event) ~ 1 , data = tmp)
res <- tidy(kaplan, conf.int = TRUE)
res$nom <- nom
res
}
depuis_diag <- bind_rows(
km("diagnostic", "entree_soins", "Entrée en soins"),
km("diagnostic", "initiation_tt", "Initiation du traitement"),
km("diagnostic", "controle", "Contrôle de l'infection")
)
depuis_diag$nom <- fct_inorder(depuis_diag$nom)
ggplot(depuis_diag) +
aes(
x = time, y = 1 - estimate, colour = nom,
ymin = 1 - conf.high, ymax = 1 - conf.low, fill = nom
) +
geom_ribbon(alpha = .25, mapping = aes(colour = NULL)) +
geom_line() +
scale_x_continuous(limits = c(0, 36), breaks = 0:6*6) +
scale_y_continuous(labels = scales::percent) +
xlab("mois depuis le diagnostic") +
ylab("") + labs(color = "", fill = "") +
theme_classic() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_line(colour = "grey")
)
## Warning: Removed 31 row(s) containing missing values (geom_path).
depuis_prec <- bind_rows(
km("diagnostic", "entree_soins", "Entrée en soins"),
km("entree_soins", "initiation_tt", "Initiation du traitement"),
km("initiation_tt", "controle", "Contrôle de l'infection")
)
depuis_prec$nom <- fct_inorder(depuis_prec$nom)
ggplot(depuis_prec) +
aes(
x = time, y = 1 - estimate, colour = nom,
ymin = 1 - conf.high, ymax = 1 - conf.low, fill = nom
) +
geom_ribbon(alpha = .25, mapping = aes(colour = NULL)) +
geom_line() +
scale_x_continuous(limits = c(0, 36), breaks = 0:6*6) +
scale_y_continuous(labels = scales::percent) +
xlab("mois depuis l'étape précédente") +
ylab("") + labs(color = "", fill = "") +
theme_classic() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_line(colour = "grey")
) +
facet_grid(cols = vars(nom))
## Warning: Removed 13 row(s) containing missing values (geom_path).
km_sexe <- function(date_origine, date_evenement, nom = ""){
library(survival)
library(broom)
tmp <- ind
tmp <- tmp %>% filter(!is.na(tmp[[date_origine]]))
tmp$event <- FALSE
tmp$time <- tmp$suivi - tmp[[date_origine]]
tmp$event <- !is.na(tmp[[date_evenement]])
tmp$time[tmp$event] <- tmp[[date_evenement]][tmp$event] - tmp[[date_origine]][tmp$event]
tmp$sexe <- to_factor(tmp$sex)
kaplan <- survfit(Surv(time, event) ~ sexe , data = tmp)
res <- tidy(kaplan, conf.int = TRUE)
res$nom <- nom
res
}
depuis_prec_sexe <- bind_rows(
km_sexe("diagnostic", "entree_soins", "Entrée en soins"),
km_sexe("entree_soins", "initiation_tt", "Initiation du traitement"),
km_sexe("initiation_tt", "controle", "Contrôle de l'infection")
)
depuis_prec_sexe$nom <- fct_inorder(depuis_prec_sexe$nom)
depuis_prec_sexe$sexe <- depuis_prec_sexe$strata %>%
str_sub(start = 6)
ggplot(depuis_prec_sexe) +
aes(
x = time, y = 1 - estimate, colour = sexe,
ymin = 1 - conf.high, ymax = 1 - conf.low, fill = sexe
) +
geom_ribbon(alpha = .25, mapping = aes(colour = NULL)) +
geom_line() +
scale_x_continuous(limits = c(0, 36), breaks = 0:6*6) +
scale_y_continuous(labels = scales::percent) +
xlab("mois depuis l'étape précédente") +
ylab("") + labs(color = "", fill = "") +
theme_classic() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_line(colour = "grey")
) +
facet_grid(cols = vars(nom))
large <- care_trajectories %>%
select(id, month, care_status) %>%
pivot_wider(names_from = month, values_from = care_status, names_prefix = "m")
library(TraMineR, quietly = TRUE)
##
## TraMineR stable version 2.2-1 (Built: 2020-11-02)
## Website: http://traminer.unige.ch
## Please type 'citation("TraMineR")' for citation information.
seq_all <- seqdef(
large %>% select(-id),
id = large$id,
alphabet = c("D", "C", "T", "S"),
states = c("diagnostiqué", "en soins", "sous traitement", "controlé"),
cpal = viridis(4, direction = -1)
)
## [>] found missing values ('NA') in sequence data
## [>] preparing 2929 sequences
## [>] coding void elements with '%' and missing values with '*'
## [>] state coding:
## [alphabet] [label] [long label]
## 1 D diagnostiqué diagnostiqué
## 2 C en soins en soins
## 3 T sous traitement sous traitement
## 4 S controlé controlé
## [>] 2929 sequences in the data set
## [>] min/max sequence length: 1/51
seqdplot(seq_all, legend.prop = .25)
couts <- seqcost(seq_all, method = "CONSTANT")
## [>] creating 4x4 substitution-cost matrix using 2 as constant value
couts$sm[1,] <- c(0, 1, 2, 3)
couts$sm[2,] <- c(1, 0, 1, 2)
couts$sm[3,] <- c(2, 1, 0, 1)
couts$sm[4,] <- c(3, 2, 1, 0)
couts$indel <- max(couts$sm)/2
couts
## $indel
## [1] 1.5
##
## $sm
## diagnostiqué-> en soins-> sous traitement-> controlé->
## diagnostiqué-> 0 1 2 3
## en soins-> 1 0 1 2
## sous traitement-> 2 1 0 1
## controlé-> 3 2 1 0
dist_all <- seqdist(seq_all, method = "OM", sm = couts$sm, indel = couts$indel)
## [>] 2929 sequences with 4 distinct states
## [>] checking 'sm' (size and triangle inequality)
## [>] 1370 distinct sequences
## [>] min/max sequence lengths: 1/51
## [>] computing distances using the OM metric
## [>] elapsed time: 2.19 secs
arbre_all <- hclust(as.dist(dist_all), method = "ward.D2")
plot(arbre_all)
library(seqhandbook)
seq_heatmap(seq_all, arbre_all)
large$seq_length <- seqlength(seq_all)
large_m18 <- large %>%
filter(seq_length >= 19) %>%
select(id, m0:m18)
seq_m18 <- seqdef(
large_m18 %>% select(-id),
id = large_m18$id,
alphabet = c("D", "C", "T", "S"),
states = c("diagnostiqué", "en soins", "sous traitement", "controlé"),
cpal = viridis(4, direction = -1)
)
## [>] state coding:
## [alphabet] [label] [long label]
## 1 D diagnostiqué diagnostiqué
## 2 C en soins en soins
## 3 T sous traitement sous traitement
## 4 S controlé controlé
## [>] 1317 sequences in the data set
## [>] min/max sequence length: 19/19
seqdplot(seq_m18, legend.prop = .25)
dist_m18 <- seqdist(seq_m18, method = "OM", sm = couts$sm, indel = couts$indel)
## [>] 1317 sequences with 4 distinct states
## [>] checking 'sm' (size and triangle inequality)
## [>] 578 distinct sequences
## [>] min/max sequence lengths: 19/19
## [>] computing distances using the OM metric
## [>] elapsed time: 0.54 secs
arbre_m18 <- hclust(as.dist(dist_m18), method = "ward.D2")
plot(arbre_m18)
seq_heatmap(seq_m18, arbre_m18)
library(WeightedCluster, quietly = TRUE)
## This is WeightedCluster stable version 1.4-1 (Built: 2020-07-07)
##
## To get the manuals, please run:
## vignette("WeightedCluster") ## Complete manual in English
## vignette("WeightedClusterFR") ## Complete manual in French
## vignette("WeightedClusterPreview") ## Short preview in English
##
## To cite WeightedCluster in publications please use:
## Studer, Matthias (2013). WeightedCluster Library Manual: A practical
## guide to creating typologies of trajectories in the social sciences
## with R. LIVES Working Papers, 24. doi:
## 10.12682/lives.2296-1658.2013.24
seqtree_m18 <- as.seqtree(arbre_m18, seqdata = seq_m18, diss = dist_m18, ncluster = 7)
seqtreedisplay(seqtree_m18, type = "I", filename = "seqtree_m18.png", show.depth = TRUE)
## [>] GraphViz version 2.38 found.
mon seqtree
large_m18$typo_cah <- cutree(arbre_m18, k = 4)
pam_m18 <- wcKMedoids(dist_m18, k = 4, initialclust = arbre_m18)
large_m18$typo_pam <- pam_m18$clustering
library(gtsummary, quietly = TRUE)
large_m18 %>%
tbl_cross(row = typo_cah, col = typo_pam)
Characteristic | typo_pam | Total | |||
---|---|---|---|---|---|
6 | 23 | 85 | 410 | ||
typo_cah | |||||
1 | 522 | 4 | 0 | 39 | 565 |
2 | 0 | 3 | 330 | 3 | 336 |
3 | 1 | 203 | 59 | 13 | 276 |
4 | 23 | 0 | 5 | 112 | 140 |
Total | 546 | 210 | 394 | 167 | 1,317 |
large_m18$ordre_cmd <- cmdscale(as.dist(dist_m18), k = 1)
seqIplot(seq_m18, group = large_m18$typo_cah)
seqIplot(seq_m18, group = large_m18$typo_cah, sortv = large_m18$ordre_cmd)
seqIplot(seq_m18, group = large_m18$typo_pam, sortv = large_m18$ordre_cmd)
tab <- tibble(
indicateur = names(wcClusterQuality(dist_m18, large_m18$typo_cah)$stats),
cah = wcClusterQuality(dist_m18, large_m18$typo_cah)$stats,
pam = wcClusterQuality(dist_m18, large_m18$typo_pam)$stats
)
tab %>% gt::gt() %>% gt::fmt_number(2:3, decimals = 2, sep_mark = " ", dec_mark = ",")
indicateur | cah | pam |
---|---|---|
PBC | 0,70 | 0,71 |
HG | 0,93 | 0,95 |
HGSD | 0,93 | 0,95 |
ASW | 0,53 | 0,57 |
ASWw | 0,53 | 0,57 |
CH | 938,97 | 1 023,12 |
R2 | 0,68 | 0,70 |
CHsq | 3 351,21 | 3 980,60 |
R2sq | 0,88 | 0,90 |
HC | 0,03 | 0,02 |
large_m18$groupe <- as.character(large_m18$typo_pam)
large_m18$groupe <- fct_recode(large_m18$groupe,
"Hors Soins" = "6",
"Lents" = "23",
"Rapides" = "85",
"Inaboutis" = "410"
)
large_m18$groupe <- fct_relevel(
large_m18$groupe,
"Rapides", "Lents", "Inaboutis", "Hors Soins"
)
large_m18 <- large_m18 %>%
mutate(rang_cmd = rank(ordre_cmd, ties.method = "first"))
long_m18 <- large_m18 %>%
pivot_longer(m0:m18, names_to = "month", values_to = "care_status", names_prefix = "m")
long_m18$month <- as.integer(long_m18$month)
long_m18$care_statusF <- to_factor(long_m18$care_status)
ggplot(long_m18) +
aes(x = month, y = factor(rang_cmd), fill = care_statusF) +
geom_raster() +
scale_fill_viridis(discrete = TRUE, direction = -1) +
facet_grid(rows = vars(groupe), scales = "free", space = "free") +
labs(x = NULL, y = NULL, fill = NULL) +
scale_y_discrete(labels = NULL) +
scale_x_continuous(
expand = expansion(0, 0),
breaks = 0:3*6,
labels = paste0("M", 0:3*6)
) +
guides(
fill = guide_legend(ncol = 2)
) +
theme_minimal() +
theme(
legend.position = "bottom"
)