import polars as pl
from great_tables import GT, html
import gt_extras as gte
pre_tax_col = "gini_market__age_total"
post_tax_col = "gini_disposable__age_total"
# Read the data
df = pl.read_csv(
"income_inequality_raw.csv",
schema={
"Entity": pl.String,
"Code": pl.String,
"Year": pl.Int64,
post_tax_col: pl.Float64,
pre_tax_col: pl.Float64,
"population_historical": pl.Int64,
"owid_region": pl.String,
},
null_values=["NA", ""],
)
# Propagate the region field to all rows of that country
df = (
df.sort("Entity")
.group_by("Entity", maintain_order=True)
.agg(
[
pl.col("Code"),
pl.col("Year"),
pl.col(post_tax_col),
pl.col(pre_tax_col),
pl.col("population_historical"),
pl.col("owid_region").fill_null(strategy="backward"),
]
)
.explode(
[
"Code",
"Year",
post_tax_col,
pre_tax_col,
"population_historical",
"owid_region",
]
)
)
# Drop rows where there is a null in either pre-tax or post-tax cols
df = df.drop_nulls(
subset=(
pl.col(post_tax_col),
pl.col(pre_tax_col),
)
)
# Compute the percent reduction in gini coefficient.
df = df.with_columns(
((pl.col(pre_tax_col) - pl.col(post_tax_col)) / pl.col(pre_tax_col) * 100)
.round(2)
.alias("gini_pct_change")
)
# Calculate 5-year benchmark (mean) of percent change for each country
df = df.with_columns(
pl.col("gini_pct_change")
.rolling_mean(window_size=5)
.over(pl.col("Entity"))
.alias("gini_pct_benchmark_5yr")
)
# Select rows with large population in the year 2020, sorted by coefficient post-tax
df = (
df.filter(pl.col("population_historical").gt(40000000))
.filter(pl.col("Year").eq(2020))
.sort(by=pl.col(post_tax_col))
)
# Scale population
df = df.with_columns((pl.col("population_historical").log10()).alias("pop_log"))
pop_min = df["pop_log"].min() / 1
pop_max = df["pop_log"].max()
# Set up gt-extras icons, scaling population to 1-10 range
df = df.with_columns(
((pl.col("pop_log") - pop_min) / (pop_max - pop_min) * 10 + 1)
.round(0)
.cast(pl.Int64)
.alias("pop_icons")
)
# Format original population value with commas
df = df.with_columns(
pl.col("population_historical").map_elements(
lambda x: f"{int(x):,}" if x is not None else None, return_dtype=pl.String
)
)
# Apply gte.fa_icon_repeat to each entry in the pop_icons column
df_with_icons = df.with_columns(
pl.col("pop_icons").map_elements(
lambda x: gte.fa_icon_repeat(name="person", repeats=int(x)),
return_dtype=pl.String,
)
)# Generate the table, before gt-extras add-ons
gt = (
GT(df_with_icons, rowname_col="Entity", groupname_col="owid_region")
.tab_header(
"Income Inequality Before and After Taxes in 2020",
"As measured by the Gini coefficient, where 0 is best and 1 is worst",
)
.cols_move("pop_icons", after=pre_tax_col)
.cols_align("left")
.cols_hide(["Year", "pop_log", "population_historical"])
.fmt_flag("Code")
.cols_label(
{
"Code": "",
"gini_pct_change": "Improvement Post Taxes",
"pop_icons": "Population",
}
)
.tab_source_note(
html(
"""
<div>
<strong>Source:</strong> Data from <a href="https://github.com/rfordatascience/tidytuesday">#TidyTuesday</a> (2025-08-05).<br>
<div>
<strong>Dumbbell plot:</strong>
<span style="color:#106ea0;">Blue:</span> post-tax Gini coefficient
<span style="color:#e0b165;">Gold:</span> pre-tax Gini coefficient
<br>
</div>
<strong>Bullet plot:</strong> Percent reduction in Gini after taxes for each country, compared to its 5-year average benchmark.
</div>
"""
)
)
)
# Apply the gt-extras functions via pipe
(
gt.pipe(
gte.gt_plt_dumbbell,
col1=pre_tax_col,
col2=post_tax_col,
col1_color="#e0b165",
col2_color="#106ea0",
dot_border_color="transparent",
num_decimals=2,
width=240,
label="Pre-tax to Post-tax Coefficient",
)
.pipe(
gte.gt_plt_bullet,
"gini_pct_change",
"gini_pct_benchmark_5yr",
fill="#963d4c",
target_color="#3D3D3D",
bar_height=15,
width=200,
)
.pipe(
gte.gt_merge_stack,
col1="pop_icons",
col2="population_historical",
)
.pipe(gte.gt_theme_guardian)
)| Income Inequality Before and After Taxes in 2020 | ||||
| As measured by the Gini coefficient, where 0 is best and 1 is worst | ||||
| Pre-tax to Post-tax Coefficient | Population | Improvement Post Taxes | ||
|---|---|---|---|---|
| Europe | ||||
| France |
65,905,226
|
|||
| Germany |
83,628,661
|
|||
| Spain |
47,679,437
|
|||
| Italy |
59,912,714
|
|||
| United Kingdom |
67,351,806
|
|||
| Asia | ||||
| South Korea |
51,858,440
|
|||
| Turkey |
86,091,644
|
|||
| North America | ||||
| United States |
339,436,106
|
|||
| Mexico |
126,798,998
|
|||
| South America | ||||
| Brazil |
208,660,785
|
|||