Define correct Python (change this on your computer):
library(reticulate)
use_python("/home/familie_kimeswenger/venv/python_3_12/bin/python3", required = TRUE)
py_config()
## python: /home/familie_kimeswenger/venv/python_3_12/bin/python3
## libpython: /usr/lib/python3.12/config-3.12-x86_64-linux-gnu/libpython3.12.so
## pythonhome: /home/familie_kimeswenger/venv/python_3_12:/home/familie_kimeswenger/venv/python_3_12
## version: 3.12.3 (main, Mar 3 2026, 12:15:18) [GCC 13.3.0]
## numpy: /home/familie_kimeswenger/venv/python_3_12/lib/python3.12/site-packages/numpy
## numpy_version: 1.26.4
##
## NOTE: Python version was forced by use_python() function
Load packages in R:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Load packages in Python
import pandas as pd
Import data in e.g. Python
py_data_toy = pd.read_csv("toy.csv")
py_data_toy["austrian"] = py_data_toy["austrian"].astype(bool)
py_data_toy["marital_status"] = pd.Categorical(py_data_toy["marital_status"], categories=["single", "divorced", "married"])
py_data_toy["age_cat"] = pd.Categorical(py_data_toy["age_cat"], categories = ["child", "adolescent", "adult"], ordered = True)
py_data_toy.head()
## name austrian marital_status ... temperature test_score height
## 0 Franz False divorced ... 37.0 NaN 180.4
## 1 Sepp False single ... 38.5 80.0 160.5
## 2 Maria True single ... 36.0 5.0 158.9
## 3 Georg True single ... 35.4 30.0 130.2
## 4 Karl False married ... 39.0 1.0 174.3
##
## [5 rows x 8 columns]
Do something in Python …
We can use py_data_toy in R chunks too (converted as data.frame):
r_data_toy <- py$py_data_toy
r_data_toy |>
head()
## name austrian marital_status year_birth age_cat temperature test_score
## 1 Franz FALSE divorced 1990 adult 37.0 NaN
## 2 Sepp FALSE single 2009 adolescent 38.5 80
## 3 Maria TRUE single 2005 adult 36.0 5
## 4 Georg TRUE single 2019 child 35.4 30
## 5 Karl FALSE married 1930 adult 39.0 1
## 6 Ulrike TRUE married 1980 adult 38.0 9
## height
## 1 180.4
## 2 160.5
## 3 158.9
## 4 130.2
## 5 174.3
## 6 172.1
And next we can use ggplot:
r_data_toy |> # data
ggplot(aes(y = year_birth)) + # aesthetics (define what is on y axis) is used for all geom objects
geom_boxplot() # define geom object (plot type here boxplot)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
Do something in R:
r_summary <- r_data_toy |>
group_by(age_cat) |>
summarise(
mean_height = mean(height),
mean_temperature = mean(temperature),
.groups = "drop" # not necessary here
)
r_summary
## # A tibble: 4 × 3
## age_cat mean_height mean_temperature
## <ord> <dbl> <dbl>
## 1 child 130. 35.4
## 2 adolescent 160. 38.5
## 3 adult NaN 37.4
## 4 <NA> NaN 39.5
Use it in Python:
py_summary = r.r_summary
py_summary
## age_cat mean_height mean_temperature
## 0 child 130.2 35.400000
## 1 adolescent 160.5 38.500000
## 2 adult NaN 37.370588
## 3 NaN NaN 39.500000