Skip to content

pandas

For most use cases, annotate with Annotated[pd.DataFrame, MySchema] and use native pandas subscript access. The checker validates all column references at lint time without any runtime overhead:

from typing import Annotated
import pandas as pd
from typedframes import BaseSchema, Column

class OrderSchema(BaseSchema):
    order_id   = Column(type=int)
    amount     = Column(type=float)
    status     = Column(type=str)

df: Annotated[pd.DataFrame, OrderSchema] = pd.read_csv("orders.csv")
print(df["order_id"])          # ✓ native pandas, validated by checker
print(df[OrderSchema.amount.s])  # ✓ refactor-safe via .s descriptor

PandasFrame — runtime enhancement

PandasFrame is a pd.DataFrame subclass that adds runtime column validation and descriptor dispatch (df[Schema.column]). Use it when you need:

  • Regex ColumnSet resolution against actual DataFrame columns at runtime
  • Descriptor-based subscript access (df[Schema.column]) without .s
from typedframes.pandas import PandasFrame
from typedframes import BaseSchema, Column, ColumnSet

class SalesSchema(BaseSchema):
    product_id  = Column(type=int)
    region_cols = ColumnSet(members=r"region_\w+", type=float, regex=True)

# Runtime: resolves regex ColumnSet against actual columns
df = PandasFrame.from_schema(pd.read_csv("sales.csv"), SalesSchema)

!!! note Prefer Annotated[pd.DataFrame, Schema] for new code. PandasFrame is most useful when regex ColumnSet patterns need to be resolved against actual data at runtime.


typedframes.pandas.PandasFrame(data=None, schema=None, column_consumed_map=None, **kwargs)

Bases: DataFrame, Generic[SchemaT]

Pandas DataFrame subclass with schema-aware column access.

Preserves all pandas functionality while adding schema-based column access via __getitem__ overloads.

Attributes:

Name Type Description
_schema_class type[SchemaT] | None

The schema class for this DataFrame.

_column_consumed_map dict[str, list[str]]

Mapping of ColumnSet names to matched columns.

Example

class UserData(BaseSchema): user_id = Column(type=int) email = Column(type=str)

df = PandasFrame.from_schema(pd.read_csv("data.csv"), UserData) df[UserData.user_id] # pd.Series via Column descriptor df["email"] # pd.Series via string key (standard pandas)

Initialize a PandasFrame.

Parameters:

Name Type Description Default
data DataFrame | dict | None

DataFrame data (passed to pd.DataFrame).

None
schema type[SchemaT] | None

The schema class to associate with this DataFrame.

None
column_consumed_map dict[str, list[str]] | None

Pre-computed ColumnSet consumption map.

None
**kwargs Any

Additional arguments passed to pd.DataFrame.

{}
Source code in src/typedframes/pandas.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    data: pd.DataFrame | dict | None = None,
    schema: type[SchemaT] | None = None,
    column_consumed_map: dict[str, list[str]] | None = None,
    **kwargs: Any,
) -> None:
    """
    Initialize a PandasFrame.

    Args:
        data: DataFrame data (passed to pd.DataFrame).
        schema: The schema class to associate with this DataFrame.
        column_consumed_map: Pre-computed ColumnSet consumption map.
        **kwargs: Additional arguments passed to pd.DataFrame.

    """
    super().__init__(data, **kwargs)
    self._schema_class = schema
    self._column_consumed_map = column_consumed_map or {}

Attributes

schema property

Return the schema class associated with this DataFrame.

Functions

__getitem__(key)

__getitem__(key: Column) -> pd.Series
__getitem__(key: ColumnSet) -> pd.DataFrame
__getitem__(key: ColumnGroup) -> pd.DataFrame
__getitem__(key: str) -> pd.Series
__getitem__(key: list[str]) -> pd.DataFrame
__getitem__(key: pd.Series) -> PandasFrame[SchemaT]

Access columns by schema descriptor, string key, or boolean mask.

Supports Column, ColumnSet, ColumnGroup descriptors from the schema, as well as standard pandas string and list-of-string access.

Source code in src/typedframes/pandas.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
def __getitem__(  # ty: ignore[invalid-method-override]
    self,
    key: Column | ColumnSet | ColumnGroup | str | list[str] | pd.Series,
) -> pd.Series | pd.DataFrame:
    """
    Access columns by schema descriptor, string key, or boolean mask.

    Supports Column, ColumnSet, ColumnGroup descriptors from the schema,
    as well as standard pandas string and list-of-string access.
    """
    if isinstance(key, Column):
        return super().__getitem__(key.column_name)
    if isinstance(key, ColumnSet):
        matched = self._column_consumed_map.get(key.name, [])
        return super().__getitem__(matched)
    if isinstance(key, ColumnGroup):
        names = key.get_column_names(self._column_consumed_map)
        return super().__getitem__(names)
    return super().__getitem__(key)

from_schema(df, schema, column_consumed_map=None) classmethod

Create a PandasFrame from an existing DataFrame and schema.

Parameters:

Name Type Description Default
df DataFrame

Source pandas DataFrame.

required
schema type[SchemaT]

Schema class to associate.

required
column_consumed_map dict[str, list[str]] | None

Pre-computed ColumnSet consumption map. If not provided, will be computed from schema.

None

Returns:

Type Description
PandasFrame[SchemaT]

PandasFrame with schema metadata.

Source code in src/typedframes/pandas.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
@classmethod
def from_schema(
    cls,
    df: pd.DataFrame,
    schema: type[SchemaT],
    column_consumed_map: dict[str, list[str]] | None = None,
) -> PandasFrame[SchemaT]:
    """
    Create a PandasFrame from an existing DataFrame and schema.

    Args:
        df: Source pandas DataFrame.
        schema: Schema class to associate.
        column_consumed_map: Pre-computed ColumnSet consumption map.
            If not provided, will be computed from schema.

    Returns:
        PandasFrame with schema metadata.

    """
    if column_consumed_map is None:
        _, column_consumed_map = schema.compute_column_map(list(df.columns))

    return cls(df, schema=schema, column_consumed_map=column_consumed_map)  # ty: ignore[no-matching-overload]

groupby(by=None, **kwargs)

Group by schema descriptors, strings, or mixed lists.

Accepts Column, ColumnSet, ColumnGroup descriptors in addition to standard pandas groupby arguments.

Parameters:

Name Type Description Default
by Any

Column(s) to group by. Accepts schema descriptors.

None
**kwargs Any

Additional arguments passed to pd.DataFrame.groupby.

{}

Returns:

Type Description
Any

DataFrameGroupBy object.

Source code in src/typedframes/pandas.py
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def groupby(self, by: Any = None, **kwargs: Any) -> Any:  # ty: ignore[invalid-method-override]
    """Group by schema descriptors, strings, or mixed lists.

    Accepts ``Column``, ``ColumnSet``, ``ColumnGroup`` descriptors
    in addition to standard pandas groupby arguments.

    Args:
        by: Column(s) to group by. Accepts schema descriptors.
        **kwargs: Additional arguments passed to ``pd.DataFrame.groupby``.

    Returns:
        DataFrameGroupBy object.

    """
    return super().groupby(self._resolve_by(by), **kwargs)

read_csv(filepath_or_buffer, schema, **kwargs) classmethod

Read a CSV file and create a schema-aware PandasFrame.

Parameters:

Name Type Description Default
filepath_or_buffer Any

File path or buffer to read from.

required
schema type[SchemaT]

Schema class to associate with the DataFrame.

required
**kwargs Any

Additional arguments passed to pd.read_csv.

{}

Returns:

Type Description
PandasFrame[SchemaT]

PandasFrame with schema metadata.

Source code in src/typedframes/pandas.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
@classmethod
def read_csv(cls, filepath_or_buffer: Any, schema: type[SchemaT], **kwargs: Any) -> PandasFrame[SchemaT]:
    """
    Read a CSV file and create a schema-aware PandasFrame.

    Args:
        filepath_or_buffer: File path or buffer to read from.
        schema: Schema class to associate with the DataFrame.
        **kwargs: Additional arguments passed to ``pd.read_csv``.

    Returns:
        PandasFrame with schema metadata.

    """
    return cls.from_schema(pd.read_csv(filepath_or_buffer, **kwargs), schema)

read_excel(io, schema, **kwargs) classmethod

Read an Excel file and create a schema-aware PandasFrame.

Parameters:

Name Type Description Default
io Any

File path or ExcelFile object to read from.

required
schema type[SchemaT]

Schema class to associate with the DataFrame.

required
**kwargs Any

Additional arguments passed to pd.read_excel.

{}

Returns:

Type Description
PandasFrame[SchemaT]

PandasFrame with schema metadata.

Source code in src/typedframes/pandas.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
@classmethod
def read_excel(cls, io: Any, schema: type[SchemaT], **kwargs: Any) -> PandasFrame[SchemaT]:
    """
    Read an Excel file and create a schema-aware PandasFrame.

    Args:
        io: File path or ExcelFile object to read from.
        schema: Schema class to associate with the DataFrame.
        **kwargs: Additional arguments passed to ``pd.read_excel``.

    Returns:
        PandasFrame with schema metadata.

    """
    return cls.from_schema(pd.read_excel(io, **kwargs), schema)

read_json(path_or_buf, schema, **kwargs) classmethod

Read a JSON file and create a schema-aware PandasFrame.

Parameters:

Name Type Description Default
path_or_buf Any

File path or buffer to read from.

required
schema type[SchemaT]

Schema class to associate with the DataFrame.

required
**kwargs Any

Additional arguments passed to pd.read_json.

{}

Returns:

Type Description
PandasFrame[SchemaT]

PandasFrame with schema metadata.

Source code in src/typedframes/pandas.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
@classmethod
def read_json(cls, path_or_buf: Any, schema: type[SchemaT], **kwargs: Any) -> PandasFrame[SchemaT]:
    """
    Read a JSON file and create a schema-aware PandasFrame.

    Args:
        path_or_buf: File path or buffer to read from.
        schema: Schema class to associate with the DataFrame.
        **kwargs: Additional arguments passed to ``pd.read_json``.

    Returns:
        PandasFrame with schema metadata.

    """
    return cls.from_schema(pd.read_json(path_or_buf, **kwargs), schema)

read_parquet(path, schema, **kwargs) classmethod

Read a Parquet file and create a schema-aware PandasFrame.

Parameters:

Name Type Description Default
path Any

File path to read from.

required
schema type[SchemaT]

Schema class to associate with the DataFrame.

required
**kwargs Any

Additional arguments passed to pd.read_parquet.

{}

Returns:

Type Description
PandasFrame[SchemaT]

PandasFrame with schema metadata.

Source code in src/typedframes/pandas.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
@classmethod
def read_parquet(cls, path: Any, schema: type[SchemaT], **kwargs: Any) -> PandasFrame[SchemaT]:
    """
    Read a Parquet file and create a schema-aware PandasFrame.

    Args:
        path: File path to read from.
        schema: Schema class to associate with the DataFrame.
        **kwargs: Additional arguments passed to ``pd.read_parquet``.

    Returns:
        PandasFrame with schema metadata.

    """
    return cls.from_schema(pd.read_parquet(path, **kwargs), schema)

to_pandas()

Convert to plain pandas DataFrame (drops schema metadata).

Source code in src/typedframes/pandas.py
256
257
258
def to_pandas(self) -> pd.DataFrame:
    """Convert to plain pandas DataFrame (drops schema metadata)."""
    return pd.DataFrame(self)