Advanced Validation
A validation with a comprehensive set of rules.
import pointblank as pb
import polars as pl
validation = (
pb.Validate(
data=pb.load_dataset(dataset="game_revenue", tbl_type="polars"),
tbl_name="game_revenue",
label="Comprehensive validation example",
thresholds=pb.Thresholds(warning=0.10, error=0.25, critical=0.35),
)
.col_vals_regex(columns="player_id", pattern=r"^[A-Z]{12}[0-9]{3}$") # STEP 1
.col_vals_gt(columns="session_duration", value=5) # STEP 2
.col_vals_ge(columns="item_revenue", value=0.02) # STEP 3
.col_vals_in_set(columns="item_type", set=["iap", "ad"]) # STEP 4
.col_vals_in_set( # STEP 5
columns="acquisition",
set=["google", "facebook", "organic", "crosspromo", "other_campaign"]
)
.col_vals_not_in_set(columns="country", set=["Mongolia", "Germany"]) # STEP 6
.col_vals_between( # STEP 7
columns="session_duration",
left=10, right=50,
pre = lambda df: df.select(pl.median("session_duration"))
)
.rows_distinct(columns_subset=["player_id", "session_id", "time"]) # STEP 8
.row_count_match(count=2000) # STEP 9
.col_count_match(count=11) # STEP 10
.col_vals_not_null(columns=pb.starts_with("item")) # STEPS 11-13
.col_exists(columns="start_day") # STEP 14
.interrogate()
)
validationPreview of Input Table
PolarsRows2,000Columns11 |
|||||||||||