great_tables
  • Get Started
  • Examples
  • Reference
  • Blog
import polars as pl
from great_tables import GT, html
import gt_extras as gte

pre_tax_col = "gini_market__age_total"
post_tax_col = "gini_disposable__age_total"

# Read the data
df = pl.read_csv(
    "income_inequality_raw.csv",
    schema={
        "Entity": pl.String,
        "Code": pl.String,
        "Year": pl.Int64,
        post_tax_col: pl.Float64,
        pre_tax_col: pl.Float64,
        "population_historical": pl.Int64,
        "owid_region": pl.String,
    },
    null_values=["NA", ""],
)

# Propagate the region field to all rows of that country
df = (
    df.sort("Entity")
    .group_by("Entity", maintain_order=True)
    .agg(
        [
            pl.col("Code"),
            pl.col("Year"),
            pl.col(post_tax_col),
            pl.col(pre_tax_col),
            pl.col("population_historical"),
            pl.col("owid_region").fill_null(strategy="backward"),
        ]
    )
    .explode(
        [
            "Code",
            "Year",
            post_tax_col,
            pre_tax_col,
            "population_historical",
            "owid_region",
        ]
    )
)

# Drop rows where there is a null in either pre-tax or post-tax cols
df = df.drop_nulls(
    subset=(
        pl.col(post_tax_col),
        pl.col(pre_tax_col),
    )
)

# Compute the percent reduction in gini coefficient.
df = df.with_columns(
    ((pl.col(pre_tax_col) - pl.col(post_tax_col)) / pl.col(pre_tax_col) * 100)
    .round(2)
    .alias("gini_pct_change")
)

# Calculate 5-year benchmark (mean) of percent change for each country
df = df.with_columns(
    pl.col("gini_pct_change")
    .rolling_mean(window_size=5)
    .over(pl.col("Entity"))
    .alias("gini_pct_benchmark_5yr")
)

# Select rows with large population in the year 2020, sorted by coefficient post-tax
df = (
    df.filter(pl.col("population_historical").gt(40000000))
    .filter(pl.col("Year").eq(2020))
    .sort(by=pl.col(post_tax_col))
)


# Scale population
df = df.with_columns((pl.col("population_historical").log10()).alias("pop_log"))
pop_min = df["pop_log"].min() / 1
pop_max = df["pop_log"].max()

# Set up gt-extras icons, scaling population to 1-10 range
df = df.with_columns(
    ((pl.col("pop_log") - pop_min) / (pop_max - pop_min) * 10 + 1)
    .round(0)
    .cast(pl.Int64)
    .alias("pop_icons")
)

# Format original population value with commas
df = df.with_columns(
    pl.col("population_historical").map_elements(
        lambda x: f"{int(x):,}" if x is not None else None, return_dtype=pl.String
    )
)

# Apply gte.fa_icon_repeat to each entry in the pop_icons column
df_with_icons = df.with_columns(
    pl.col("pop_icons").map_elements(
        lambda x: gte.fa_icon_repeat(name="person", repeats=int(x)),
        return_dtype=pl.String,
    )
)
# Generate the table, before gt-extras add-ons
gt = (
    GT(df_with_icons, rowname_col="Entity", groupname_col="owid_region")
    .tab_header(
        "Income Inequality Before and After Taxes in 2020",
        "As measured by the Gini coefficient, where 0 is best and 1 is worst",
    )
    .cols_move("pop_icons", after=pre_tax_col)
    .cols_align("left")
    .cols_hide(["Year", "pop_log", "population_historical"])
    .fmt_flag("Code")
    .cols_label(
        {
            "Code": "",
            "gini_pct_change": "Improvement Post Taxes",
            "pop_icons": "Population",
        }
    )
    .tab_source_note(
        html(
            """
            <div>
            <strong>Source:</strong> Data from <a href="https://github.com/rfordatascience/tidytuesday">#TidyTuesday</a> (2025-08-05).<br>
                <div>
                <strong>Dumbbell plot:</strong>
                <span style="color:#106ea0;">Blue:</span> post-tax Gini coefficient
                <span style="color:#e0b165;">Gold:</span> pre-tax Gini coefficient
                <br>
                </div>
            <strong>Bullet plot:</strong> Percent reduction in Gini after taxes for each country, compared to its 5-year average benchmark.
            </div>
            """
        )
    )
)

# Apply the gt-extras functions via pipe
(
    gt.pipe(
        gte.gt_plt_dumbbell,
        col1=pre_tax_col,
        col2=post_tax_col,
        col1_color="#e0b165",
        col2_color="#106ea0",
        dot_border_color="transparent",
        num_decimals=2,
        width=240,
        label="Pre-tax to Post-tax Coefficient",
    )
    .pipe(
        gte.gt_plt_bullet,
        "gini_pct_change",
        "gini_pct_benchmark_5yr",
        fill="#963d4c",
        target_color="#3D3D3D",
        bar_height=15,
        width=200,
    )
    .pipe(
        gte.gt_merge_stack,
        col1="pop_icons",
        col2="population_historical",
    )
    .pipe(gte.gt_theme_guardian)
)
Income Inequality Before and After Taxes in 2020
As measured by the Gini coefficient, where 0 is best and 1 is worst
Pre-tax to Post-tax Coefficient Population Improvement Post Taxes
Europe
France France
0.520.28
65,905,226
Germany Germany
0.50.3
83,628,661
Spain Spain
0.520.33
47,679,437
Italy Italy
0.530.33
59,912,714
United Kingdom United Kingdom
0.510.35
67,351,806
Asia
South Korea Korea, Rep.
0.410.33
51,858,440
Turkey Turkiye
0.50.4
86,091,644
North America
United States United States
0.520.38
339,436,106
Mexico Mexico
0.430.42
126,798,998
South America
Brazil Brazil
0.550.45
208,660,785
Source: Data from #TidyTuesday (2025-08-05).
Dumbbell plot: Blue: post-tax Gini coefficient Gold: pre-tax Gini coefficient
Bullet plot: Percent reduction in Gini after taxes for each country, compared to its 5-year average benchmark.