library("tidyverse")
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Have a look at variables and scale levels:
data_toy_properties <- tribble(
~variable_name, ~values, ~scale_level, ~quantity1, ~quantity2, ~r_type,
"austrian", "TRUE and FALSE", "nominal", "discrete", 2, "logical",
"marital_status", "married, single, divorced, widowed", "nominal", "discrete", 4, "factor",
"name", "string", "nominal", "discrete", Inf, "character",
"?", "?", "nominal", "continuous", Inf, "?",
"age_cut", "child, adolescent, adult", "ordinal", "discrete", 3, "ordered factor",
"?", "?", "ordinal", "continuous", Inf, "?",
"year_birth", "2023, 2024, etc.", "metric (interval-scaled)", "discrete", Inf, "integer",
"temperature", "[20, 50] °C", "metric (interval-scaled)", "continuous", Inf, "double",
"test_score", "{0, 1, ..., 100} points", "metric (ratio-scaled)", "discrete", 101, "integer",
"height", "[0, 300] cm", "metric (ratio-scaled)", "continuous", Inf, "double"
)
data_toy_properties
## # A tibble: 10 × 6
## variable_name values scale_level quantity1 quantity2 r_type
## <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 austrian TRUE and FALSE nominal discrete 2 logic…
## 2 marital_status married, single, divor… nominal discrete 4 factor
## 3 name string nominal discrete Inf chara…
## 4 ? ? nominal continuo… Inf ?
## 5 age_cut child, adolescent, adu… ordinal discrete 3 order…
## 6 ? ? ordinal continuo… Inf ?
## 7 year_birth 2023, 2024, etc. metric (in… discrete Inf integ…
## 8 temperature [20, 50] °C metric (in… continuo… Inf double
## 9 test_score {0, 1, ..., 100} points metric (ra… discrete 101 integ…
## 10 height [0, 300] cm metric (ra… continuo… Inf double
Or more compact:
data_toy_properties_wide_1 <- data_toy_properties |>
select(variable_name, scale_level, quantity1) |>
pivot_wider(
names_from = quantity1, # take values from quantity1 (discrete and continuous) and make columns
values_from = variable_name, # these values are inserted in the new columns discrete and continuous
# there is more than one value per cell to insert, therefore a function is used in each cell
values_fn = ~ paste(.x, collapse = ", ") # short form of anonymous function
# values_fn = function(x) paste(x, collapse = ", ") # long form of anonymous function
) |>
rename(
"scale level \\ quantity" = scale_level # rename column scale_level
)
data_toy_properties_wide_1
## # A tibble: 4 × 3
## `scale level \\ quantity` discrete continuous
## <chr> <chr> <chr>
## 1 nominal austrian, marital_status, name ?
## 2 ordinal age_cut ?
## 3 metric (interval-scaled) year_birth temperature
## 4 metric (ratio-scaled) test_score height
The following is the “inverse” map with pivot_longer. pivot_longer is used often to prepare data for plots.
data_toy_properties_wide_1 |>
pivot_longer(
cols = c("discrete", "continuous"), # chose columns
names_to = "quantity_1", # the names (discrete and continuous) will be stored in new variable quantity_1
values_to = "variable_name" # the values (austrian, marital_status, name)
) |>
separate_rows(variable_name, sep = "\\s*,\\s*") |> # to get for e.g. austrian, marital_status, name 3 rows instead of one
rename("scale_level" = "scale level \\ quantity") |> # rename column name
select(variable_name, scale_level, quantity_1) # new ordering
## # A tibble: 10 × 3
## variable_name scale_level quantity_1
## <chr> <chr> <chr>
## 1 austrian nominal discrete
## 2 marital_status nominal discrete
## 3 name nominal discrete
## 4 ? nominal continuous
## 5 age_cut ordinal discrete
## 6 ? ordinal continuous
## 7 year_birth metric (interval-scaled) discrete
## 8 temperature metric (interval-scaled) continuous
## 9 test_score metric (ratio-scaled) discrete
## 10 height metric (ratio-scaled) continuous
And how it is used in R:
data_toy_properties |>
select(r_type, scale_level, quantity1) |>
pivot_wider(
names_from = quantity1, # take values from quantitiy1 (discrecte and continuous) and make columns
values_from = r_type, # these values are inserted in the new columns discrete and continuous
# there is more than one value per cell to insert, therefore a function is used in each cell
values_fn = ~ paste(.x, collapse = ", ") # short form of anonymous function
# values_fn = function(x) paste(x, collapse = ", ") # long form of anonymous function
) |>
rename(
"scale level \\ quantity" = scale_level
)
## # A tibble: 4 × 3
## `scale level \\ quantity` discrete continuous
## <chr> <chr> <chr>
## 1 nominal logical, factor, character ?
## 2 ordinal ordered factor ?
## 3 metric (interval-scaled) integer double
## 4 metric (ratio-scaled) integer double
Read data with warning, because we do not specify the column types:
data_toy_temp <- read_csv("toy.csv")
## Rows: 20 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, marital_status, age_cat
## dbl (4): year_birth, temperature, test_score, height
## lgl (1): austrian
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_toy_temp |> head(3)
## # A tibble: 3 × 8
## name austrian marital_status year_birth age_cat temperature test_score height
## <chr> <lgl> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 Franz FALSE divorced 1990 adult 37 NA 180.
## 2 Sepp FALSE single 2009 adoles… 38.5 80 160.
## 3 Maria TRUE single 2005 adult 36 5 159.
Store values for marital_status:
marital_status_values <- data_toy_temp |>
pull(marital_status) |>
unique()
marital_status_values
## [1] "divorced" "single" "married" "widowed"
Store values for age.cat:
age_cat_values <- data_toy_temp |>
pull(age_cat) |>
unique()
age_cat_values
## [1] "adult" "adolescent" "child" NA
Change ordering (maybe it was already ok) and erase NA
age_cat_values <- age_cat_values[c(3, 2, 1)]
age_cat_values
## [1] "child" "adolescent" "adult"
Read data again:
data_coltypes <- cols(
name = col_character(),
austrian = col_logical(),
marital_status = col_factor(marital_status_values),
year_birth = col_integer(),
age_cat = col_factor(levels = age_cat_values, ordered = TRUE),
temperature = col_double(),
test_score = col_integer()
)
data_toy <- read_csv("toy.csv", col_types = data_coltypes)
data_toy |>
head(3)
## # A tibble: 3 × 8
## name austrian marital_status year_birth age_cat temperature test_score height
## <chr> <lgl> <fct> <int> <ord> <dbl> <int> <dbl>
## 1 Franz FALSE divorced 1990 adult 37 NA 180.
## 2 Sepp FALSE single 2009 adoles… 38.5 80 160.
## 3 Maria TRUE single 2005 adult 36 5 159.
data_toy |>
glimpse()
## Rows: 20
## Columns: 8
## $ name <chr> "Franz", "Sepp", "Maria", "Georg", "Karl", "Ulrike", "U…
## $ austrian <lgl> FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALS…
## $ marital_status <fct> divorced, single, single, single, married, married, div…
## $ year_birth <int> 1990, 2009, 2005, 2019, 1930, 1980, 1995, 1999, 2005, 2…
## $ age_cat <ord> adult, adolescent, adult, child, adult, adult, adult, a…
## $ temperature <dbl> 37.0, 38.5, 36.0, 35.4, 39.0, 38.0, 36.7, 37.9, 36.2, 3…
## $ test_score <int> NA, 80, 5, 30, 1, 9, 50, 30, 80, 90, 70, 77, 55, 99, 80…
## $ height <dbl> 180.4, 160.5, 158.9, 130.2, 174.3, 172.1, 159.5, 182.5,…
Levels of all factor variables:
data_toy |>
select(where(is.factor)) |>
summary()
## marital_status age_cat
## divorced:5 child : 1
## single :8 adolescent: 1
## married :5 adult :17
## widowed :2 NA's : 1
Levels of all ordered variables:
data_toy |>
select(where(is.ordered)) |>
summary()
## age_cat
## child : 1
## adolescent: 1
## adult :17
## NA's : 1
Full summary:
data_toy |>
summary()
## name austrian marital_status year_birth
## Length:20 Mode :logical divorced:5 Min. :1930
## Class :character FALSE:8 single :8 1st Qu.:1983
## Mode :character TRUE :9 married :5 Median :1999
## NA's :3 widowed :2 Mean :1990
## 3rd Qu.:2002
## Max. :2019
## NA's :1
## age_cat temperature test_score height
## child : 1 Min. :35.40 Min. : 1.00 Min. :130.2
## adolescent: 1 1st Qu.:36.42 1st Qu.:30.00 1st Qu.:166.5
## adult :17 Median :37.45 Median :50.00 Median :171.2
## NA's : 1 Mean :37.44 Mean :52.41 Mean :170.5
## 3rd Qu.:38.42 3rd Qu.:80.00 3rd Qu.:178.5
## Max. :39.50 Max. :99.00 Max. :192.5
## NA's :3 NA's :3
year_birth has na-values hence warnings:
data_toy |> # data
ggplot(aes(y = year_birth)) + # aesthetics (define what is on y axis) is used for all geom objects
geom_boxplot() # define geom object (plot type here boxplot)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
We can define aesthetics in geom_boxplot instead of ggplot. If we have more geom objects, then this can be necessary. Here it makes no difference.
data_toy |> # data
ggplot() + # empty command
geom_boxplot(aes(y = year_birth)) # aesthetics is defined only for this geom object
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
To avoid warning, we could use filter to exclude rows where year_birth is NA. Or we use na.rm = TRUE in ggplot to do the same.
data_toy |>
ggplot(aes(y = year_birth)) +
geom_boxplot(na.rm = TRUE)
Change dimensions in code chunk with e.g. {r, fig.width=1, fig.height=1.2} (the real text size stays the same at 11pt). Be careful: Here in r chunk we can only use inch not cm. So if we want to get the same text/plot ratio in e.g. pdf, we should also use inch later in ggsave.
data_toy |>
ggplot(aes(y = year_birth)) +
geom_boxplot(na.rm = TRUE)
Or with {r, fig.width=2, fig.height=2.4}
data_toy |>
ggplot(aes(y = year_birth)) +
geom_boxplot(na.rm = TRUE)
Save last plot. If we use width and height we change again the text/plot ratio because the textsize stays constant at 11pt. If we want to have the same text/plot ratio as in above r chunk, we should use inch in ggsave. Formats png and jpg are no vector graphics, I prefer pdf and svg in most cases.
ggsave("my_first_plot_a.pdf", width = 2, height = 2.4, units = "in") # compare text/plot ratio, should be as in last plot
ggsave("my_first_plot_b.pdf", width = 1, height = 1.2, units = "in") # compare text/plot ratio, should be as in plot above last plot
ggsave("my_first_plot_c.pdf", width = 2, height = 2.4, units = "in", dpi = 100) # compare file size, does not change for pdf (vector graphics) so dpi is unnecessary
ggsave("my_first_plot_d.pdf", width = 2, height = 2.4, units = "in", dpi = 600) # compare file size, does not change for pdf (vector graphics) so dpi is unnecessary
ggsave("my_first_plot_e.png", width = 2, height = 2.4, units = "in", dpi = 100) # here dpi makes a difference
ggsave("my_first_plot_f.png", width = 2, height = 2.4, units = "in", dpi = 600) # here dpi makes a difference compare file size with plot e
Or change text size for (ideally all) plots in document:
data_toy |>
ggplot(aes(y = year_birth)) +
geom_boxplot(na.rm = TRUE) +
theme_grey(base_size = 16)
ggsave("my_first_plot_g.pdf", width = 2, height = 2.5, units = "in")
It is also possible to store the plot as variable:
plot1 <- data_toy |>
ggplot(aes(y = year_birth)) +
geom_boxplot(na.rm = TRUE)
Next we can show the plot:
plot1
Modify the plot e.g. theme:
plot1 +
# theme_classic()
# theme_dark()
# theme_minimal()
# theme_grey()
theme_bw()
And of course save the plot:
ggsave("my_first_plot_h.pdf", plot1, width = 2, height = 2.4, units = "in")
fh1 <- rgb(r = 97 / 255, g = 164 / 255, b = 215 / 255) # fh lightblue
fh1a <- rgb(r = 97 / 255, g = 164 / 255, b = 215 / 255, alpha = 0.15) # transparancy: fh1 15% and background (maybe white) 85%
factor <- 0.15
fh1b <- rgb(t(col2rgb(fh1) / 255 * factor + 1 * (1 - factor))) # fh1 15% and white 85%
fh2 <- rgb(r = 22 / 255, g = 48 / 255, b = 114 / 255) # fh darkblue
ggplot() +
geom_rect(aes(xmin = 0, xmax = 1, ymin = 0, ymax = 1), fill = fh1) + # fh1
geom_rect(aes(xmin = 1, xmax = 2, ymin = 0, ymax = 1), fill = fh1a) + # fh1a, grids are darker, because background is gray and not white
geom_rect(aes(xmin = 2, xmax = 3, ymin = 0, ymax = 1), fill = fh1b) + # fh1b, no transparency, so we do not see the grids
geom_rect(aes(xmin = 3, xmax = 4, ymin = 0, ymax = 1), fill = fh2) + # fh2
theme_bw() # it is important to use white background, otherwise transparancy of fh1a is different, see grid lines
data_toy |>
ggplot(aes(y = year_birth)) +
geom_boxplot(
color = fh2, # lines are plotted in fh2
fill = fh1, # areas are plotted in fh1
linewidth = 1, # width of lines
median.linewidth = 1, # with of median line
outlier.shape = 23, # shape of outlier
outlier.size = 3, # size of outlier
outlier.stroke = 1, # width of outlier lines
na.rm = TRUE
) +
labs(
title = "Year of Birth",
y = ""
) +
scale_x_continuous(breaks = NULL) + # This ensures no x-axis labels or ticks
scale_y_continuous( # control the position of breaks, normally I keep standard values
breaks = 1920 + (0:4) * 20,
minor_breaks = 1930 + (0:4) * 20
) +
theme(
panel.background = element_rect(fill = fh1b), # background color
panel.grid.major.y = element_line(color = "black", linetype = "solid", linewidth = 0.1), # major y-grid
panel.grid.minor.y = element_line(color = "black", linetype = "dashed", linewidth = 0.05), # minor y-grid
# panel.grid.minor.y = element_blank(), # or if we do not like to see minor grid
text = element_text(face = "bold", size = 11), # font properties
axis.ticks = element_blank() # no ticks e.g. right of values 1940, 1960, ... on y-axis
)
We could store the above plot as variable e.g. plot2 and reuse it here. but it can make a difference which geom object (violin, boxplot) and gridlines etc. are used first.
data_toy |>
ggplot(aes(x = 0, y = year_birth)) + # geom_violin and geom_point need x value
geom_violin( # violin plot
linewidth = 1,
color = fh2,
fill = fh1,
alpha = 0.3,
width = 4.5,
na.rm = TRUE
) +
geom_boxplot( # boxplot
color = fh2,
fill = fh1,
linewidth = 1,
median.linewidth = 1,
outlier.shape = 23,
outlier.size = 3,
outlier.stroke = 1,
na.rm = TRUE
) +
stat_boxplot( # error bars
geom = "errorbar",
color = fh2,
linewidth = 1,
na.rm = TRUE
) +
geom_point( # data points
position = position_jitter(width = 0.2), # jittered is used (randomly x value)
color = fh2,
na.rm = TRUE
) +
labs(
title = "Year of Birth",
x = "",
y = ""
) +
scale_x_continuous(
breaks = NULL # This ensures no x-axis labels or ticks
) +
scale_y_continuous( # control the position of breaks, normally I keep standard values
breaks = 1920 + (0:4) * 20,
minor_breaks = 1930 + (0:4) * 20
) +
theme(
panel.background = element_rect(fill = fh1b), # background color
panel.grid.major.y = element_line(color = "black", linetype = "solid", linewidth = 0.1), # major y-grid
panel.grid.minor.y = element_line(color = "black", linetype = "dashed", linewidth = 0.05), # minor y-grid
# panel.grid.minor.y = element_blank(), # or if we do not like to see minor grid
text = element_text(face = "bold", size = 11), # font properties
axis.ticks = element_blank() # no ticks e.g. right of values 1940, 1960, ... on y-axis
)
Plot by marital_status
data_toy |>
ggplot(aes(x = marital_status, y = year_birth)) + # we get 4 boxplot because marital_status has 4 values
geom_boxplot(
color = fh2,
fill = fh1,
linewidth = 1,
median.linewidth = 1,
outlier.shape = 23,
outlier.size = 3,
outlier.stroke = 1,
na.rm = TRUE
) +
stat_boxplot(
geom = "errorbar",
color = fh2,
linewidth = 1,
na.rm = TRUE
) +
labs(
title = "Year of Birth\nBy marital status",
x = "",
y = ""
) +
scale_y_continuous( # control the position of breaks, normally I keep standard values
breaks = 1920 + (0:4) * 20,
minor_breaks = 1930 + (0:4) * 20
) +
theme(
panel.background = element_rect(fill = fh1b),
panel.grid.major.y = element_line(color = "black", linetype = "solid", linewidth = 0.1),
panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_line(color = "black", linetype = "dashed", linewidth = 0.05),
text = element_text(face = "bold", size = 11),
axis.ticks = element_blank()
)
We again plot by marital_status, but furthermore we use this variable for color and fill. There is no real advantage in using different colors here for the 3 boxes.
factor2 <- 0.3
darkred_alpha <- rgb(t(col2rgb("darkred") / 255 * factor2 + 1 * (1 - factor2)))
darkgreen_alpha <- rgb(t(col2rgb("darkgreen") / 255 * factor2 + 1 * (1 - factor2)))
darkblue_alpha <- rgb(t(col2rgb("darkblue") / 255 * factor2 + 1 * (1 - factor2)))
gray42_alpha <- rgb(t(col2rgb("gray42") / 255 * factor2 + 1 * (1 - factor2)))
data_toy |>
ggplot(aes(x = marital_status, y = year_birth)) +
geom_boxplot(aes(color = marital_status, fill = marital_status), # each boxplot gets its own color, we do not use default colors, therefore we use scale_color_manual and scale_fill_manual later
linewidth = 1,
median.linewidth = 1,
outlier.shape = 23,
outlier.size = 3,
outlier.stroke = 1,
na.rm = TRUE
) +
stat_boxplot(aes(color = marital_status),
geom = "errorbar",
linewidth = 1,
na.rm = TRUE
) +
labs(
title = "Year of Birth\nBy marital status",
x = "",
y = ""
) +
scale_color_manual(values = c("single" = "darkred", "married" = "darkblue", "divorced" = "darkgreen", "widowed" = "gray42")) + # use our own color selection for color
scale_fill_manual(values = c("single" = darkred_alpha, "married" = darkblue_alpha, "divorced" = darkgreen_alpha, "widowed" = gray42_alpha)) + # use our own color selection for fill
theme(
legend.position = "none", # legend is not necessary here
axis.ticks = element_blank()
)
Here we use marital_status again:
data_toy |>
ggplot(aes(y = year_birth)) + # we do not use x="marital_status" because we use facets
geom_boxplot(aes(color = marital_status, fill = marital_status),
linewidth = 1,
median.linewidth = 1,
outlier.shape = 23,
outlier.size = 3,
na.rm = TRUE
) +
stat_boxplot(aes(color = marital_status),
geom = "errorbar",
linewidth = 1,
na.rm = TRUE
) +
labs(
title = "Geburtsjahr\nnach Familienstand",
x = "",
y = ""
) +
facet_grid(. ~ marital_status, # facet creates again 4 boxplots
labeller = as_labeller(c("single" = "ledig", "married" = "verheiratet", "divorced" = "geschieden", "widowed" = "verwitwet")) # be careful here!
) +
scale_x_continuous(
breaks = NULL # This ensures no x-axis labels or ticks
) +
scale_y_continuous( # control the position of breaks, normally I keep standard values
breaks = 1920 + (0:4) * 20,
minor_breaks = 1930 + (0:4) * 20
) +
scale_color_manual(values = c("single" = "darkred", "married" = "darkblue", "divorced" = "darkgreen", "widowed" = "gray42")) + # use our own color selection for color
scale_fill_manual(values = c("single" = darkred_alpha, "married" = darkblue_alpha, "divorced" = darkgreen_alpha, "widowed" = gray42_alpha)) + # use our own color selection for fill
theme(
legend.position = "none", # we do not need legend here
axis.ticks = element_blank() # no ticks on x-axis
)
But we can use another factor to get a “matrix”. Here we use austrian for rows and marital_status for columns.
data_toy |>
filter(!is.na(austrian)) |> # we do not want to get a row for austrian == NA
ggplot(aes(y = year_birth)) + # we do not use x="marital_status" because we use facet
geom_boxplot(aes(color = marital_status, fill = marital_status),
linewidth = 1,
median.linewidth = 1,
outlier.shape = 23,
outlier.size = 3,
na.rm = TRUE
) +
stat_boxplot(aes(color = marital_status),
geom = "errorbar",
linewidth = 1,
na.rm = TRUE
) +
labs(
title = "Geburtsjahr\nnach Staatsbürgerschaft und Familienstand",
x = "",
y = ""
) +
facet_grid(austrian ~ marital_status,
labeller = as_labeller(c(
"single" = "ledig", "married" = "verheiratet", "divorced" = "geschieden", "widowed" = "verwitwet", # be careful here
"TRUE" = "Österreicher/in", "FALSE" = "kein/e Österreicher/in" # be careful here
))
) +
scale_x_continuous(
breaks = NULL # This ensures no x-axis labels or ticks
) +
scale_y_continuous( # control the position of breaks, normally I keep standard values
breaks = 1920 + (0:4) * 20,
minor_breaks = 1930 + (0:4) * 20
) +
scale_color_manual(values = c("single" = "darkred", "married" = "darkblue", "divorced" = "darkgreen", "widowed" = "gray42")) + # use our own color selection for color
scale_fill_manual(values = c("single" = darkred_alpha, "married" = darkblue_alpha, "divorced" = darkgreen_alpha, "widowed" = gray42_alpha)) + # use our own color selection for fill
theme(
legend.position = "none", # we do not need legend here
axis.ticks = element_blank() # no ticks on x-axis
)
Maybe we want to use different colors for Austrian as well. For that we define a new variable in our data_toy.
# for demonstration only
data_toy |>
mutate(
color = interaction(marital_status, austrian, sep = "_")
) |>
distinct(color)
## # A tibble: 8 × 1
## color
## <fct>
## 1 divorced_FALSE
## 2 single_FALSE
## 3 single_TRUE
## 4 married_FALSE
## 5 married_TRUE
## 6 divorced_TRUE
## 7 <NA>
## 8 widowed_TRUE
factor2 <- 0.3 # blend with white
darkred_alpha <- rgb(t(col2rgb("darkred") / 255 * factor2 + 1 * (1 - factor2)))
darkgreen_alpha <- rgb(t(col2rgb("darkgreen") / 255 * factor2 + 1 * (1 - factor2)))
darkblue_alpha <- rgb(t(col2rgb("darkblue") / 255 * factor2 + 1 * (1 - factor2)))
gray42_alpha <- rgb(t(col2rgb("gray42") / 255 * factor2 + 1 * (1 - factor2)))
factor3 <- 0.5 # blend with black
darkred_alpha2 <- rgb(t(col2rgb("darkred") / 255 * factor3 + 0 * (1 - factor3)))
darkgreen_alpha2 <- rgb(t(col2rgb("darkgreen") / 255 * factor3 + 0 * (1 - factor3)))
darkblue_alpha2 <- rgb(t(col2rgb("darkblue") / 255 * factor3 + 0 * (1 - factor3)))
gray42_alpha2 <- rgb(t(col2rgb("gray42") / 255 * factor3 + 0 * (1 - factor3)))
# blend with white
darkred_alpha3 <- rgb(t(col2rgb(darkred_alpha2) / 255 * factor2 + 1 * (1 - factor2)))
darkgreen_alpha3 <- rgb(t(col2rgb(darkgreen_alpha) / 255 * factor2 + 1 * (1 - factor2)))
darkblue_alpha3 <- rgb(t(col2rgb(darkblue_alpha2) / 255 * factor2 + 1 * (1 - factor2)))
gray42_alpha3 <- rgb(t(col2rgb(gray42_alpha2) / 255 * factor2 + 1 * (1 - factor2)))
data_toy |>
filter(!is.na(austrian)) |>
mutate(color = interaction(marital_status, austrian, sep = "_")) |> # add the color e.g. single_FALSE
ggplot(aes(y = year_birth)) + # we do not use x = "marital_status" because we use facet
geom_boxplot(aes(color = color, fill = color),
linewidth = 1,
median.linewidth = 1,
outlier.shape = 23,
outlier.size = 3,
na.rm = TRUE
) +
stat_boxplot(aes(color = color),
geom = "errorbar",
linewidth = 1,
na.rm = TRUE
) +
labs(
title = "Geburtsjahr\nnach Staatsbürgerschaft und Familienstand",
x = "",
y = ""
) +
facet_grid(austrian ~ marital_status,
labeller = as_labeller(c(
"single" = "ledig", "married" = "verheiratet", "divorced" = "geschieden", "widowed" = "verwitwet", # be careful here
"TRUE" = "Österreicher/in", "FALSE" = "kein/e Österreicher/in" # be careful here
))
) +
scale_x_continuous(
breaks = NULL # This ensures no x-axis labels or ticks
) +
scale_y_continuous( # control the position of breaks, normally I keep standard values
breaks = 1920 + (0:4) * 20,
minor_breaks = 1930 + (0:4) * 20
) +
scale_color_manual(values = c(
"single_FALSE" = "darkred", "single_TRUE" = darkred_alpha2,
"married_FALSE" = "darkblue", "married_TRUE" = darkblue_alpha2,
"divorced_FALSE" = "darkgreen", "divorced_TRUE" = darkgreen_alpha2,
"widowed_FALSE" = "gray42", "widowed_TRUE" = gray42_alpha2
)) + # use our own color selection for color
scale_fill_manual(values = c(
"single_FALSE" = darkred_alpha, "single_TRUE" = darkred_alpha3,
"married_FALSE" = darkblue_alpha, "married_TRUE" = darkblue_alpha3,
"divorced_FALSE" = darkgreen_alpha, "divorced_TRUE" = darkgreen_alpha3,
"widowed_FALSE" = gray42_alpha, "widowed_TRUE" = gray42_alpha3
)) + # use our own color selection for fill
theme(
legend.position = "none", # we do not need legend here
axis.ticks = element_blank() # no ticks on x-axis
)