webin-R #26 : Graphiques statistiques avec guideR

Author

Joseph Larmarange

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.6
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.1     ✔ tibble    3.3.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(guideR)
data("hdv2003", package = "questionr")

Variables catégorielles

hdv2003 |> 
  proportion(nivetud, .na.rm = TRUE, .by = sexe)
# A tibble: 16 × 5
# Groups:   sexe [2]
   sexe  nivetud                                                   n     N  prop
   <fct> <fct>                                                 <int> <int> <dbl>
 1 Homme N'a jamais fait d'etudes                                 16   845  1.89
 2 Homme A arrete ses etudes, avant la derniere annee d'etude…    37   845  4.38
 3 Homme Derniere annee d'etudes primaires                       131   845 15.5 
 4 Homme 1er cycle                                                82   845  9.70
 5 Homme 2eme cycle                                               67   845  7.93
 6 Homme Enseignement technique ou professionnel court           247   845 29.2 
 7 Homme Enseignement technique ou professionnel long             67   845  7.93
 8 Homme Enseignement superieur y compris technique superieur    198   845 23.4 
 9 Femme N'a jamais fait d'etudes                                 23  1043  2.21
10 Femme A arrete ses etudes, avant la derniere annee d'etude…    49  1043  4.70
11 Femme Derniere annee d'etudes primaires                       210  1043 20.1 
12 Femme 1er cycle                                               122  1043 11.7 
13 Femme 2eme cycle                                              116  1043 11.1 
14 Femme Enseignement technique ou professionnel court           216  1043 20.7 
15 Femme Enseignement technique ou professionnel long             64  1043  6.14
16 Femme Enseignement superieur y compris technique superieur    243  1043 23.3 
library(labelled)
hdv2003 <-
  hdv2003 |> 
  set_variable_labels(
    sexe = "Sexe",
    nivetud = "Niveau d'étude",
    age = "Âge"
  )
hdv2003 |> 
  plot_categorical(
    sport,
    by = c(sexe, nivetud, age),
    flip = TRUE,
    drop_na_by = TRUE,
    minimal = TRUE
  )

hdv2003 |> 
  plot_proportions(
    sport == "Oui",
    by = c(sexe, nivetud, age),
    flip = TRUE,
    drop_na_by = TRUE,
    minimal = TRUE,
    fill = "lightblue",
    show_overall_line = TRUE
  )

hdv2003 |> 
  plot_proportions(
    (sport == "Oui") |> stratified_by(sexe),
    by = c(nivetud, age),
    flip = TRUE,
    drop_na_by = TRUE,
    minimal = TRUE,
    fill = "lightblue",
    show_overall_line = TRUE
  )

Variables continues

hdv2003 |> 
  group_by(sexe) |> 
  summarise(m = mean(heures.tv, na.rm = TRUE))
# A tibble: 2 × 2
  sexe      m
  <fct> <dbl>
1 Homme  2.22
2 Femme  2.27
hdv2003 |> 
  mean_sd(heures.tv, .by = sexe, .conf.int = TRUE)
# A tibble: 2 × 8
  x         sexe   mean mean_low mean_high    sd     n missing
  <chr>     <fct> <dbl>    <dbl>     <dbl> <dbl> <int>   <int>
1 heures.tv Homme  2.22     2.11      2.33  1.71   895       4
2 heures.tv Femme  2.27     2.16      2.38  1.83  1100       1
hdv2003 |> 
  plot_means(
    heures.tv,
    by = c(sexe, nivetud, age),
    flip = TRUE,
    drop_na_by = TRUE,
    minimal = TRUE,
    colour = "red",
    show_overall_line = TRUE
  )

hdv2003 |> 
  median_iqr(heures.tv, .by = sexe)
# A tibble: 2 × 10
  x         sexe  median   min    q1    q3   max   iqr     n missing
  <chr>     <fct>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>   <int>
1 heures.tv Homme      2     0     1     3    11     2   895       4
2 heures.tv Femme      2     0     1     3    12     2  1100       1
hdv2003 |> 
  plot_continuous(
    heures.tv,
    by = c(sexe, nivetud, age),
    flip = TRUE,
    drop_na_by = TRUE,
    minimal = TRUE
  )

Question à choix multiples

hdv2003 |> 
  plot_multiple_answers(
    answers = cuisine:sport,
    value = "Oui",
    fill = "lightblue"
  )

hdv2003 |> 
  plot_multiple_answers(
    answers = cuisine:sport,
    value = "Oui",
    fill = "lightblue",
    combine_answers = TRUE
  )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
ℹ The deprecated feature was likely used in the ggupset package.
  Please report the issue at <https://github.com/const-ae/ggupset/issues>.

hdv2003 |> 
  plot_multiple_answers(
    answers = cuisine:sport,
    value = "Oui",
    fill = "lightblue",
    combine_answers = TRUE,
    flip = TRUE
  )

hdv2003 |> 
  plot_multiple_answers(
    answers = cuisine:sport,
    value = "Oui",
    fill = "lightblue",
    by = sexe
  )

hdv2003 |> 
  plot_multiple_answers_dodge(
    answers = cuisine:sport,
    value = "Oui",
    by = sexe
  )

hdv2003 |> 
  plot_multiple_answers_dodge(
    answers = cuisine:sport,
    value = "Oui",
    by = sexe,
    combine_answers = TRUE
  )