Classification

The following example, which closely inspired by the Claude documentation, shows how .extract_data() can be used to perform text classification.

from typing import Literal

from chatlas import ChatOpenAI
from pydantic import BaseModel, Field
import pandas as pd

text = "The new quantum computing breakthrough could revolutionize the tech industry."

class Classification(BaseModel):
    name: Literal[
        "Politics", "Sports", "Technology", "Entertainment", "Business", "Other"
    ] = Field(description="The category name")

    score: float = Field(
        description="The classification score for the category, ranging from 0.0 to 1.0."
    )

class Classifications(BaseModel):
    """Array of classification results. The scores should sum to 1."""
    classifications: list[Classification]


chat = ChatOpenAI()
data = chat.extract_data(text, data_model=Classifications)
pd.DataFrame(data["classifications"])

	name	score
0	Technology	0.95
1	Business	0.05
2	Politics	0.00
3	Sports	0.00
4	Entertainment	0.00
5	Other	0.00