# Classification ascendante hiérarchique (CAH)
# Charger les données
library(questionr)
data(hdv2003)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3.9000 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(gtsummary)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Recodage de hdv2003$age en hdv2003$groupe_age
hdv2003$groupe_age <- cut(hdv2003$age,
include.lowest = TRUE,
right = FALSE,
dig.lab = 4,
breaks = c(18, 25, 45, 65, 97)
)
## Recodage de hdv2003$nivetud en hdv2003$etud
hdv2003$etud <- fct_recode(hdv2003$nivetud,
"Primaire" = "N'a jamais fait d'etudes",
"Primaire" = "A arrete ses etudes, avant la derniere annee d'etudes primaires",
"Primaire" = "Derniere annee d'etudes primaires",
"Secondaire" = "1er cycle",
"Secondaire" = "2eme cycle",
"Technique/Pro" = "Enseignement technique ou professionnel court",
"Technique/Pro" = "Enseignement technique ou professionnel long",
"Supérieur" = "Enseignement superieur y compris technique superieur"
)
hdv2003$etud <- fct_explicit_na(hdv2003$etud, "Non documenté")
d2 <- hdv2003 %>%
select(groupe_age, sexe, etud, peche.chasse, cinema, cuisine, bricol, sport, lecture.bd)
library(ade4)
acm <- dudi.acm(d2, scannf = FALSE, nf = Inf)
# calcul de la matrice de distance
md <- dist.dudi(acm)
# calcul de la matrice des distances de Gower
library(cluster)
md_gower <- daisy(d2, metric = "gower")
# calcul du dendrogramme
arbre <- hclust(md, method = "ward.D2")
arbre_gower <- hclust(md_gower, method = "ward.D2")
# Représenter le dendrogramme
plot(arbre, labels = FALSE)
rect.hclust(arbre, 2, border = "red")
rect.hclust(arbre, 5, border = "blue")
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.14.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
color_branches(arbre, k = 5) %>% ggplot(labels = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_dend(arbre, k = 5, show_labels = FALSE, rect = TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
# saut d'inertie
inertie <- sort(arbre$height, decreasing = TRUE)
plot(inertie[1:20], type = "s")
inertie_gower <- sort(arbre_gower$height, decreasing = TRUE)
plot(inertie_gower[1:10], type = "s")
source(url("https://raw.githubusercontent.com/larmarange/JLutils/master/R/clustering.R"))
best.cutree(arbre_gower, graph = TRUE)
## [1] 3
best.cutree(arbre, graph = TRUE)
## [1] 3
library(WeightedCluster)
## Loading required package: TraMineR
##
## TraMineR stable version 2.2-1 (Built: 2020-11-02)
## Website: http://traminer.unige.ch
## Please type 'citation("TraMineR")' for citation information.
## This is WeightedCluster stable version 1.4-1 (Built: 2020-07-07)
##
## To get the manuals, please run:
## vignette("WeightedCluster") ## Complete manual in English
## vignette("WeightedClusterFR") ## Complete manual in French
## vignette("WeightedClusterPreview") ## Short preview in English
##
## To cite WeightedCluster in publications please use:
## Studer, Matthias (2013). WeightedCluster Library Manual: A practical
## guide to creating typologies of trajectories in the social sciences
## with R. LIVES Working Papers, 24. doi:
## 10.12682/lives.2296-1658.2013.24
as.clustrange(arbre, md) %>% plot()
as.clustrange(arbre_gower, md_gower) %>% plot()
hdv2003$typo <- cutree(arbre, 3)
hdv2003$typo_gower <- cutree(arbre_gower, 3)
hdv2003 %>%
tbl_summary(include = c(names(d2), "typo"), by = "typo")
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
Characteristic | 1, N = 1,2561 | 2, N = 1861 | 3, N = 5581 |
---|---|---|---|
groupe_age | |||
[18,25) | 5 (0.4%) | 164 (88%) | 0 (0%) |
[25,45) | 662 (53%) | 19 (10%) | 25 (4.5%) |
[45,65) | 557 (44%) | 3 (1.6%) | 185 (33%) |
[65,97] | 32 (2.5%) | 0 (0%) | 348 (62%) |
sexe | |||
Homme | 597 (48%) | 84 (45%) | 218 (39%) |
Femme | 659 (52%) | 102 (55%) | 340 (61%) |
etud | |||
Primaire | 56 (4.5%) | 1 (0.5%) | 409 (73%) |
Secondaire | 307 (24%) | 15 (8.1%) | 65 (12%) |
Technique/Pro | 498 (40%) | 48 (26%) | 48 (8.6%) |
Supérieur | 391 (31%) | 14 (7.5%) | 36 (6.5%) |
Non documenté | 4 (0.3%) | 108 (58%) | 0 (0%) |
peche.chasse | |||
Non | 1,048 (83%) | 170 (91%) | 558 (100%) |
Oui | 208 (17%) | 16 (8.6%) | 0 (0%) |
cinema | |||
Non | 648 (52%) | 41 (22%) | 485 (87%) |
Oui | 608 (48%) | 145 (78%) | 73 (13%) |
cuisine | |||
Non | 657 (52%) | 100 (54%) | 362 (65%) |
Oui | 599 (48%) | 86 (46%) | 196 (35%) |
bricol | |||
Non | 627 (50%) | 120 (65%) | 400 (72%) |
Oui | 629 (50%) | 66 (35%) | 158 (28%) |
sport | |||
Non | 731 (58%) | 64 (34%) | 482 (86%) |
Oui | 525 (42%) | 122 (66%) | 76 (14%) |
lecture.bd | |||
Non | 1,209 (96%) | 186 (100%) | 558 (100%) |
Oui | 47 (3.7%) | 0 (0%) | 0 (0%) |
1
n (%)
|
hdv2003$typo <- factor(hdv2003$typo)
hdv2003$typo_gower <- factor(hdv2003$typo_gower)
ggtable(
hdv2003,
columnsX = c("typo", "typo_gower"),
columnsY = names(d2),
cells = "col.prop",
fill = "std.resid",
legend = 1
)
## Warning in chisq.test(xtabs(weight ~ y + x, data = data)): Chi-squared
## approximation may be incorrect
## Warning in chisq.test(xtabs(weight ~ y + x, data = data)): Chi-squared
## approximation may be incorrect
fviz_mca_ind(acm, geom = "point", alpha.ind = .25, habillage = hdv2003$typo, addEllipses = TRUE)