Skip to content

Pandera Integration

to_pandera_schema() converts a BaseSchema into a Pandera DataFrameSchema for runtime data validation.

Use typedframes for static analysis (catching column errors at lint time) and Pandera for runtime validation (checking data values, null constraints, and types against actual data).

from typing import Annotated
import pandas as pd
import pandera as pa
from typedframes import BaseSchema, Column
from typedframes.pandera import to_pandera_schema

class OrderSchema(BaseSchema):
    order_id = Column(type=int)
    amount   = Column(type=float)
    status   = Column(type=str)

# Static analysis: checker validates column access at lint time
def process(df: Annotated[pd.DataFrame, OrderSchema]) -> None:
    print(df["order_id"])   # ✓ validated by typedframes checker
    print(df["revenue"])    # ✗ unknown-column at lint time

# Runtime validation: Pandera validates actual data values
pandera_schema = to_pandera_schema(OrderSchema)
validated_df = pandera_schema.validate(pd.read_csv("orders.csv"))

!!! tip Install the Pandera extra to use this integration: shell pip install typedframes[pandera]


typedframes.pandera.to_pandera_schema(schema)

Convert a typedframes BaseSchema to a pandera DataFrameSchema.

Maps Column and ColumnSet definitions to pandera Column objects, enabling runtime validation using the same schema definitions used for static analysis.

Parameters:

Name Type Description Default
schema type[BaseSchema]

A BaseSchema subclass to convert.

required

Returns:

Type Description
DataFrameSchema

A pandera DataFrameSchema with columns matching the input schema.

Raises:

Type Description
MissingDependencyError

If pandera is not installed.

Example

class UserData(BaseSchema): user_id = Column(type=int) email = Column(type=str) age = Column(type=int, nullable=True)

pandera_schema = to_pandera_schema(UserData) validated_df = pandera_schema.validate(df)

Source code in src/typedframes/pandera.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def to_pandera_schema(schema: type[BaseSchema]) -> pa.DataFrameSchema:
    """Convert a typedframes BaseSchema to a pandera DataFrameSchema.

    Maps Column and ColumnSet definitions to pandera Column objects,
    enabling runtime validation using the same schema definitions
    used for static analysis.

    Args:
        schema: A BaseSchema subclass to convert.

    Returns:
        A pandera DataFrameSchema with columns matching the input schema.

    Raises:
        MissingDependencyError: If pandera is not installed.

    Example:
        class UserData(BaseSchema):
            user_id = Column(type=int)
            email = Column(type=str)
            age = Column(type=int, nullable=True)

        pandera_schema = to_pandera_schema(UserData)
        validated_df = pandera_schema.validate(df)

    """
    try:
        import pandera as pa
    except ImportError:
        from .missing_dependency_error import MissingDependencyError

        package = "pandera"
        raise MissingDependencyError(package, "to_pandera_schema") from None

    columns: dict[str, pa.Column] = {}

    for col in schema.columns().values():
        dtype = _map_dtype(col.type)
        columns[col.column_name] = pa.Column(dtype=dtype, nullable=col.nullable)

    for cs in schema.column_sets().values():
        dtype = _map_dtype(cs.type)
        if cs.regex:
            for pattern in cs.members:
                columns[pattern] = pa.Column(dtype=dtype, nullable=False, regex=True)
        else:
            for member in cs.members:
                columns[member] = pa.Column(dtype=dtype, nullable=False)

    strict = not schema.allow_extra_columns

    return pa.DataFrameSchema(columns=columns, strict=strict)