Skip to contents

proportion() lets you quickly count observations (like dplyr::count()) and compute relative proportions. Proportions are computed separately by group (see examples).

Usage

proportion(data, ..., .by = NULL)

# S3 method for class 'data.frame'
proportion(
  data,
  ...,
  .by = NULL,
  .na.rm = FALSE,
  .weight = NULL,
  .scale = 100,
  .sort = FALSE,
  .drop = FALSE,
  .conf.int = FALSE,
  .conf.level = 0.95,
  .options = list(correct = TRUE)
)

# S3 method for class 'survey.design'
proportion(
  data,
  ...,
  .by = NULL,
  .na.rm = FALSE,
  .scale = 100,
  .sort = FALSE,
  .conf.int = FALSE,
  .conf.level = 0.95,
  .options = NULL
)

Arguments

data

A data frame, data frame extension (e.g. a tibble), or a survey design object.

...

<data-masking> Variable(s) for those computing proportions.

.by

<tidy-select> Optional additional variables to group by (in addition to those eventually previously declared using dplyr::group_by()).

.na.rm

Should NA values be removed (from variables declared in ...)?

.weight

<data-masking> Frequency weights. Can be NULL or a variable.

.scale

A scaling factor applied to proportion. Use 1 for keeping proportions unchanged.

.sort

If TRUE, will show the highest proportions at the top.

.drop

If TRUE, will remove empty groups from the output.

.conf.int

If TRUE, will estimate confidence intervals.

.conf.level

Confidence level for the returned confidence intervals.

.options

Additional arguments passed to stats::prop.test() or srvyr::survey_prop().

Value

A tibble.

A tibble with one row per group.

Examples


# univariable table
titanic |> proportion(Class)
#> # A tibble: 4 × 4
#>   Class     n     N  prop
#>   <chr> <int> <int> <dbl>
#> 1 1st     325  2201  14.8
#> 2 2nd     285  2201  12.9
#> 3 3rd     706  2201  32.1
#> 4 Crew    885  2201  40.2
titanic |> proportion(Class, .sort = TRUE)
#> # A tibble: 4 × 4
#>   Class     n     N  prop
#>   <chr> <int> <int> <dbl>
#> 1 Crew    885  2201  40.2
#> 2 3rd     706  2201  32.1
#> 3 1st     325  2201  14.8
#> 4 2nd     285  2201  12.9
titanic |> proportion(Class, .conf.int = TRUE)
#> # A tibble: 4 × 6
#>   Class     n     N  prop prop_low prop_high
#>   <chr> <int> <int> <dbl>    <dbl>     <dbl>
#> 1 1st     325  2201  14.8     13.3      16.3
#> 2 2nd     285  2201  12.9     11.6      14.4
#> 3 3rd     706  2201  32.1     30.1      34.1
#> 4 Crew    885  2201  40.2     38.2      42.3
titanic |> proportion(Class, .conf.int = TRUE, .scale = 1)
#> # A tibble: 4 × 6
#>   Class     n     N  prop prop_low prop_high
#>   <chr> <int> <int> <dbl>    <dbl>     <dbl>
#> 1 1st     325  2201 0.148    0.133     0.163
#> 2 2nd     285  2201 0.129    0.116     0.144
#> 3 3rd     706  2201 0.321    0.301     0.341
#> 4 Crew    885  2201 0.402    0.382     0.423

# bivariable table
titanic |> proportion(Class, Survived) # proportions of the total
#> # A tibble: 8 × 5
#>   Class Survived     n     N  prop
#>   <chr> <chr>    <int> <int> <dbl>
#> 1 1st   No         122  2201  5.54
#> 2 1st   Yes        203  2201  9.22
#> 3 2nd   No         167  2201  7.59
#> 4 2nd   Yes        118  2201  5.36
#> 5 3rd   No         528  2201 24.0 
#> 6 3rd   Yes        178  2201  8.09
#> 7 Crew  No         673  2201 30.6 
#> 8 Crew  Yes        212  2201  9.63
titanic |> proportion(Survived, .by = Class) # row proportions
#> # A tibble: 8 × 5
#> # Groups:   Class [4]
#>   Class Survived     n     N  prop
#>   <chr> <chr>    <int> <int> <dbl>
#> 1 1st   No         122   325  37.5
#> 2 1st   Yes        203   325  62.5
#> 3 2nd   No         167   285  58.6
#> 4 2nd   Yes        118   285  41.4
#> 5 3rd   No         528   706  74.8
#> 6 3rd   Yes        178   706  25.2
#> 7 Crew  No         673   885  76.0
#> 8 Crew  Yes        212   885  24.0
titanic |> # equivalent syntax
  dplyr::group_by(Class) |>
  proportion(Survived)
#> # A tibble: 8 × 5
#> # Groups:   Class [4]
#>   Class Survived     n     N  prop
#>   <chr> <chr>    <int> <int> <dbl>
#> 1 1st   No         122   325  37.5
#> 2 1st   Yes        203   325  62.5
#> 3 2nd   No         167   285  58.6
#> 4 2nd   Yes        118   285  41.4
#> 5 3rd   No         528   706  74.8
#> 6 3rd   Yes        178   706  25.2
#> 7 Crew  No         673   885  76.0
#> 8 Crew  Yes        212   885  24.0

# combining 3 variables or more
titanic |> proportion(Class, Sex, Survived)
#> # A tibble: 16 × 6
#>    Class Sex    Survived     n     N   prop
#>    <chr> <chr>  <chr>    <int> <int>  <dbl>
#>  1 1st   Female No           4  2201  0.182
#>  2 1st   Female Yes        141  2201  6.41 
#>  3 1st   Male   No         118  2201  5.36 
#>  4 1st   Male   Yes         62  2201  2.82 
#>  5 2nd   Female No          13  2201  0.591
#>  6 2nd   Female Yes         93  2201  4.23 
#>  7 2nd   Male   No         154  2201  7.00 
#>  8 2nd   Male   Yes         25  2201  1.14 
#>  9 3rd   Female No         106  2201  4.82 
#> 10 3rd   Female Yes         90  2201  4.09 
#> 11 3rd   Male   No         422  2201 19.2  
#> 12 3rd   Male   Yes         88  2201  4.00 
#> 13 Crew  Female No           3  2201  0.136
#> 14 Crew  Female Yes         20  2201  0.909
#> 15 Crew  Male   No         670  2201 30.4  
#> 16 Crew  Male   Yes        192  2201  8.72 
titanic |> proportion(Sex, Survived, .by = Class)
#> # A tibble: 16 × 6
#> # Groups:   Class [4]
#>    Class Sex    Survived     n     N   prop
#>    <chr> <chr>  <chr>    <int> <int>  <dbl>
#>  1 1st   Female No           4   325  1.23 
#>  2 1st   Female Yes        141   325 43.4  
#>  3 1st   Male   No         118   325 36.3  
#>  4 1st   Male   Yes         62   325 19.1  
#>  5 2nd   Female No          13   285  4.56 
#>  6 2nd   Female Yes         93   285 32.6  
#>  7 2nd   Male   No         154   285 54.0  
#>  8 2nd   Male   Yes         25   285  8.77 
#>  9 3rd   Female No         106   706 15.0  
#> 10 3rd   Female Yes         90   706 12.7  
#> 11 3rd   Male   No         422   706 59.8  
#> 12 3rd   Male   Yes         88   706 12.5  
#> 13 Crew  Female No           3   885  0.339
#> 14 Crew  Female Yes         20   885  2.26 
#> 15 Crew  Male   No         670   885 75.7  
#> 16 Crew  Male   Yes        192   885 21.7  
titanic |> proportion(Survived, .by = c(Class, Sex))
#> # A tibble: 16 × 6
#> # Groups:   Class, Sex [8]
#>    Class Sex    Survived     n     N  prop
#>    <chr> <chr>  <chr>    <int> <int> <dbl>
#>  1 1st   Female No           4   145  2.76
#>  2 1st   Female Yes        141   145 97.2 
#>  3 1st   Male   No         118   180 65.6 
#>  4 1st   Male   Yes         62   180 34.4 
#>  5 2nd   Female No          13   106 12.3 
#>  6 2nd   Female Yes         93   106 87.7 
#>  7 2nd   Male   No         154   179 86.0 
#>  8 2nd   Male   Yes         25   179 14.0 
#>  9 3rd   Female No         106   196 54.1 
#> 10 3rd   Female Yes         90   196 45.9 
#> 11 3rd   Male   No         422   510 82.7 
#> 12 3rd   Male   Yes         88   510 17.3 
#> 13 Crew  Female No           3    23 13.0 
#> 14 Crew  Female Yes         20    23 87.0 
#> 15 Crew  Male   No         670   862 77.7 
#> 16 Crew  Male   Yes        192   862 22.3 

# missing values
dna <- titanic
dna$Survived[c(1:20, 500:530)] <- NA
dna |> proportion(Survived)
#> # A tibble: 3 × 4
#>   Survived     n     N  prop
#>   <chr>    <int> <int> <dbl>
#> 1 No        1439  2201 65.4 
#> 2 Yes        711  2201 32.3 
#> 3 NA          51  2201  2.32
dna |> proportion(Survived, .na.rm = TRUE)
#> # A tibble: 2 × 4
#>   Survived     n     N  prop
#>   <chr>    <int> <int> <dbl>
#> 1 No        1439  2150  66.9
#> 2 Yes        711  2150  33.1

# \donttest{
## SURVEY DATA ------------------------------------------------------

ds <- srvyr::as_survey(titanic)

# univariable table
ds |> proportion(Class)
#> # A tibble: 4 × 4
#>   Class     n     N  prop
#>   <chr> <dbl> <dbl> <dbl>
#> 1 1st     325  2201  14.8
#> 2 2nd     285  2201  12.9
#> 3 3rd     706  2201  32.1
#> 4 Crew    885  2201  40.2
ds |> proportion(Class, .sort = TRUE)
#> # A tibble: 4 × 4
#>   Class     n     N  prop
#>   <chr> <dbl> <dbl> <dbl>
#> 1 Crew    885  2201  40.2
#> 2 3rd     706  2201  32.1
#> 3 1st     325  2201  14.8
#> 4 2nd     285  2201  12.9
ds |> proportion(Class, .conf.int = TRUE)
#> # A tibble: 4 × 6
#>   Class     n     N  prop prop_low prop_high
#>   <chr> <dbl> <dbl> <dbl>    <dbl>     <dbl>
#> 1 1st     325  2201  14.8     13.3      16.3
#> 2 2nd     285  2201  12.9     11.6      14.4
#> 3 3rd     706  2201  32.1     30.2      34.1
#> 4 Crew    885  2201  40.2     38.2      42.3
ds |> proportion(Class, .conf.int = TRUE, .scale = 1)
#> # A tibble: 4 × 6
#>   Class     n     N  prop prop_low prop_high
#>   <chr> <dbl> <dbl> <dbl>    <dbl>     <dbl>
#> 1 1st     325  2201 0.148    0.133     0.163
#> 2 2nd     285  2201 0.129    0.116     0.144
#> 3 3rd     706  2201 0.321    0.302     0.341
#> 4 Crew    885  2201 0.402    0.382     0.423

# bivariable table
ds |> proportion(Class, Survived) # proportions of the total
#> # A tibble: 8 × 5
#>   Class Survived     n     N  prop
#>   <chr> <chr>    <dbl> <dbl> <dbl>
#> 1 1st   No         122  2201  5.54
#> 2 1st   Yes        203  2201  9.22
#> 3 2nd   No         167  2201  7.59
#> 4 2nd   Yes        118  2201  5.36
#> 5 3rd   No         528  2201 24.0 
#> 6 3rd   Yes        178  2201  8.09
#> 7 Crew  No         673  2201 30.6 
#> 8 Crew  Yes        212  2201  9.63
ds |> proportion(Survived, .by = Class) # row proportions
#> # A tibble: 8 × 5
#> # Groups:   Class [4]
#>   Class Survived     n     N  prop
#>   <chr> <chr>    <dbl> <dbl> <dbl>
#> 1 1st   No         122   325  37.5
#> 2 1st   Yes        203   325  62.5
#> 3 2nd   No         167   285  58.6
#> 4 2nd   Yes        118   285  41.4
#> 5 3rd   No         528   706  74.8
#> 6 3rd   Yes        178   706  25.2
#> 7 Crew  No         673   885  76.0
#> 8 Crew  Yes        212   885  24.0
ds |> dplyr::group_by(Class) |> proportion(Survived)
#> # A tibble: 8 × 5
#> # Groups:   Class [4]
#>   Class Survived     n     N  prop
#>   <chr> <chr>    <dbl> <dbl> <dbl>
#> 1 1st   No         122   325  37.5
#> 2 1st   Yes        203   325  62.5
#> 3 2nd   No         167   285  58.6
#> 4 2nd   Yes        118   285  41.4
#> 5 3rd   No         528   706  74.8
#> 6 3rd   Yes        178   706  25.2
#> 7 Crew  No         673   885  76.0
#> 8 Crew  Yes        212   885  24.0

# combining 3 variables or more
ds |> proportion(Class, Sex, Survived)
#> # A tibble: 16 × 6
#>    Class Sex    Survived     n     N   prop
#>    <chr> <chr>  <chr>    <dbl> <dbl>  <dbl>
#>  1 1st   Female No           4  2201  0.182
#>  2 1st   Female Yes        141  2201  6.41 
#>  3 1st   Male   No         118  2201  5.36 
#>  4 1st   Male   Yes         62  2201  2.82 
#>  5 2nd   Female No          13  2201  0.591
#>  6 2nd   Female Yes         93  2201  4.23 
#>  7 2nd   Male   No         154  2201  7.00 
#>  8 2nd   Male   Yes         25  2201  1.14 
#>  9 3rd   Female No         106  2201  4.82 
#> 10 3rd   Female Yes         90  2201  4.09 
#> 11 3rd   Male   No         422  2201 19.2  
#> 12 3rd   Male   Yes         88  2201  4.00 
#> 13 Crew  Female No           3  2201  0.136
#> 14 Crew  Female Yes         20  2201  0.909
#> 15 Crew  Male   No         670  2201 30.4  
#> 16 Crew  Male   Yes        192  2201  8.72 
ds |> proportion(Sex, Survived, .by = Class)
#> # A tibble: 16 × 6
#> # Groups:   Class [4]
#>    Class Sex    Survived     n     N   prop
#>    <chr> <chr>  <chr>    <dbl> <dbl>  <dbl>
#>  1 1st   Female No           4   325  1.23 
#>  2 1st   Female Yes        141   325 43.4  
#>  3 1st   Male   No         118   325 36.3  
#>  4 1st   Male   Yes         62   325 19.1  
#>  5 2nd   Female No          13   285  4.56 
#>  6 2nd   Female Yes         93   285 32.6  
#>  7 2nd   Male   No         154   285 54.0  
#>  8 2nd   Male   Yes         25   285  8.77 
#>  9 3rd   Female No         106   706 15.0  
#> 10 3rd   Female Yes         90   706 12.7  
#> 11 3rd   Male   No         422   706 59.8  
#> 12 3rd   Male   Yes         88   706 12.5  
#> 13 Crew  Female No           3   885  0.339
#> 14 Crew  Female Yes         20   885  2.26 
#> 15 Crew  Male   No         670   885 75.7  
#> 16 Crew  Male   Yes        192   885 21.7  
ds |> proportion(Survived, .by = c(Class, Sex))
#> # A tibble: 16 × 6
#> # Groups:   Class, Sex [8]
#>    Class Sex    Survived     n     N  prop
#>    <chr> <chr>  <chr>    <dbl> <dbl> <dbl>
#>  1 1st   Female No           4   145  2.76
#>  2 1st   Female Yes        141   145 97.2 
#>  3 1st   Male   No         118   180 65.6 
#>  4 1st   Male   Yes         62   180 34.4 
#>  5 2nd   Female No          13   106 12.3 
#>  6 2nd   Female Yes         93   106 87.7 
#>  7 2nd   Male   No         154   179 86.0 
#>  8 2nd   Male   Yes         25   179 14.0 
#>  9 3rd   Female No         106   196 54.1 
#> 10 3rd   Female Yes         90   196 45.9 
#> 11 3rd   Male   No         422   510 82.7 
#> 12 3rd   Male   Yes         88   510 17.3 
#> 13 Crew  Female No           3    23 13.0 
#> 14 Crew  Female Yes         20    23 87.0 
#> 15 Crew  Male   No         670   862 77.7 
#> 16 Crew  Male   Yes        192   862 22.3 

# missing values
dsna <- srvyr::as_survey(dna)
dsna |> proportion(Survived)
#> # A tibble: 3 × 4
#>   Survived     n     N  prop
#>   <chr>    <dbl> <dbl> <dbl>
#> 1 No        1439  2201 65.4 
#> 2 Yes        711  2201 32.3 
#> 3 NA          51  2201  2.32
dsna |> proportion(Survived, .na.rm = TRUE)
#> # A tibble: 2 × 4
#>   Survived     n     N  prop
#>   <chr>    <dbl> <dbl> <dbl>
#> 1 No        1439  2150  66.9
#> 2 Yes        711  2150  33.1
# }