Compute median, quartiles and interquartile range by sub-groups
Source:R/median_iqr.R
median_iqr.Rdmedian_iqr() lets you quickly compute median, quartiles and interquartile
range by sub-groups. Use .outliers = TRUE to also return whiskers and
outliers (see ggplot2::stat_boxplot()).
Usage
median_iqr(data, ...)
# S3 method for class 'data.frame'
median_iqr(
data,
...,
.by = NULL,
.drop = FALSE,
.drop_na_by = FALSE,
.outliers = FALSE
)
# S3 method for class 'survey.design'
median_iqr(
data,
...,
.by = NULL,
.drop = FALSE,
.drop_na_by = FALSE,
.outliers = FALSE
)
# Default S3 method
median_iqr(data, ..., .drop = FALSE, .outliers = FALSE)Arguments
- data
A vector, a data frame, data frame extension (e.g. a tibble), or a survey design object.
- ...
<
data-masking> Variable(s) for which to compute median, quartiles and interquartile range.- .by
<
tidy-select> Optional additional variables to group by (in addition to those eventually previously declared usingdplyr::group_by()).- .drop
If
TRUE, will remove empty groups from the output.- .drop_na_by
If
TRUE, will remove anyNAvalues observed in the.byvariables (or variables defined withdplyr::group_by()).- .outliers
If
TRUE, will estimate whiskers and outliers.
Value
A tibble. Column "n" reports the number of valid observations
and "missing" the number of missing (NA) observations, unweighted for
survey objects.
A tibble with one row per group.
Examples
# using a vector
iris$Petal.Length |> median_iqr()
#> # A tibble: 1 × 9
#> x median min q1 q3 max iqr n missing
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 vector 4.35 1 1.6 5.1 6.9 3.5 150 0
# one variable
iris |> median_iqr(Petal.Length)
#> # A tibble: 1 × 9
#> x median min q1 q3 max iqr n missing
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 Petal.Length 4.35 1 1.6 5.1 6.9 3.5 150 0
iris |> median_iqr(Petal.Length, .outliers = TRUE)
#> # A tibble: 1 × 12
#> x median min q1 q3 max iqr whisker_low whisker_high outliers
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <list>
#> 1 Petal.… 4.35 1 1.6 5.1 6.9 3.5 1 6.9 <dbl>
#> # ℹ 2 more variables: n <int>, missing <int>
iris |> median_iqr(Petal.Length, .by = Species)
#> # A tibble: 3 × 10
#> x Species median min q1 q3 max iqr n missing
#> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 Petal.Length setosa 1.5 1 1.4 1.58 1.9 0.175 50 0
#> 2 Petal.Length versicolor 4.35 3 4 4.6 5.1 0.600 50 0
#> 3 Petal.Length virginica 5.55 4.5 5.1 5.88 6.9 0.775 50 0
mtcars |> median_iqr(mpg, .by = c(cyl, gear))
#> # A tibble: 8 × 11
#> # Groups: cyl [3]
#> x cyl gear median min q1 q3 max iqr n missing
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 mpg 4 3 21.5 21.5 21.5 21.5 21.5 0 1 0
#> 2 mpg 4 4 25.8 21.4 22.8 30.9 33.9 8.1 8 0
#> 3 mpg 4 5 28.2 26 27.1 29.3 30.4 2.20 2 0
#> 4 mpg 6 3 19.8 18.1 18.9 20.6 21.4 1.65 2 0
#> 5 mpg 6 4 20.1 17.8 18.8 21 21 2.15 4 0
#> 6 mpg 6 5 19.7 19.7 19.7 19.7 19.7 0 1 0
#> 7 mpg 8 3 15.2 10.4 14.0 16.6 19.2 2.57 12 0
#> 8 mpg 8 5 15.4 15 15.2 15.6 15.8 0.400 2 0
# two variables
iris |> median_iqr(Petal.Length, Petal.Width)
#> # A tibble: 2 × 9
#> x median min q1 q3 max iqr n missing
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 Petal.Length 4.35 1 1.6 5.1 6.9 3.5 150 0
#> 2 Petal.Width 1.3 0.1 0.3 1.8 2.5 1.5 150 0
iris |> median_iqr(dplyr::pick(dplyr::starts_with("Petal")), .by = Species)
#> # A tibble: 6 × 10
#> x Species median min q1 q3 max iqr n missing
#> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 Petal.Length setosa 1.5 1 1.4 1.58 1.9 0.175 50 0
#> 2 Petal.Length versicolor 4.35 3 4 4.6 5.1 0.600 50 0
#> 3 Petal.Length virginica 5.55 4.5 5.1 5.88 6.9 0.775 50 0
#> 4 Petal.Width setosa 0.2 0.1 0.2 0.3 0.6 0.1 50 0
#> 5 Petal.Width versicolor 1.3 1 1.2 1.5 1.8 0.3 50 0
#> 6 Petal.Width virginica 2 1.4 1.8 2.3 2.5 0.5 50 0
# missing values
d <- iris
d$Petal.Length[1:10] <- NA
d |> median_iqr(Petal.Length)
#> # A tibble: 1 × 9
#> x median min q1 q3 max iqr n missing
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 Petal.Length 4.5 1 1.67 5.1 6.9 3.42 140 10
d |> median_iqr(Petal.Length, .by = Species)
#> # A tibble: 3 × 10
#> x Species median min q1 q3 max iqr n missing
#> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
#> 1 Petal.Length setosa 1.5 1 1.37 1.6 1.9 0.225 40 10
#> 2 Petal.Length versicolor 4.35 3 4 4.6 5.1 0.600 50 0
#> 3 Petal.Length virginica 5.55 4.5 5.1 5.88 6.9 0.775 50 0
# \donttest{
## SURVEY DATA ------------------------------------------------------
ds <- srvyr::as_survey(iris)
ds |> median_iqr(Petal.Length, .by = Species, .outliers = TRUE)
#> # A tibble: 3 × 13
#> # Rowwise:
#> x Species median min q1 q3 max iqr whisker_low whisker_high
#> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Petal.L… setosa 1.5 1 1.4 1.6 1.9 0.200 1.1 1.9
#> 2 Petal.L… versic… 4.3 3 4 4.6 5.1 0.600 3.3 5.1
#> 3 Petal.L… virgin… 5.5 4.5 5.1 5.9 6.9 0.800 4.5 6.9
#> # ℹ 3 more variables: outliers <list>, n <int>, missing <int>
# }