Documentation Index
Fetch the complete documentation index at: https://mintlify.com/browser-use/browser-use/llms.txt
Use this file to discover all available pages before exploring further.
Overview
Structured output lets you define the exact data format you want from agent execution using Pydantic models. Instead of parsing unstructured text, you get validated Python objects with type safety and automatic serialization.
Structured output is ideal for data extraction tasks where you need reliable, type-safe results rather than free-form text.
Quick Start
from browser_use import Agent, ChatBrowserUse
from pydantic import BaseModel
class Product(BaseModel):
name: str
price: float
in_stock: bool
agent = Agent(
task="Go to example.com/product and extract product details",
llm=ChatBrowserUse(),
output_model_schema=Product,
)
history = await agent.run()
# Type-safe structured output
product: Product = history.structured_output
print(f"{product.name}: ${product.price}")
Defining Output Models
Basic Models
from pydantic import BaseModel, Field
class NewsArticle(BaseModel):
title: str
author: str
published_date: str
content: str
tags: list[str]
agent = Agent(
task="Extract the main article from this news page",
llm=ChatBrowserUse(),
output_model_schema=NewsArticle,
)
With Field Descriptions
Help the LLM understand what to extract:
class CompanyInfo(BaseModel):
name: str = Field(description="The company's full legal name")
founded_year: int = Field(description="Year the company was founded")
employees: int | None = Field(
default=None,
description="Number of employees, if available"
)
headquarters: str = Field(description="City and country of headquarters")
agent = Agent(
task="Extract company information from the about page",
llm=ChatBrowserUse(),
output_model_schema=CompanyInfo,
)
Nested Models
class Address(BaseModel):
street: str
city: str
state: str
zip_code: str
class Restaurant(BaseModel):
name: str
address: Address
rating: float
price_range: str
cuisine: list[str]
agent = Agent(
task="Extract restaurant details including full address",
llm=ChatBrowserUse(),
output_model_schema=Restaurant,
)
Lists of Objects
class SearchResult(BaseModel):
title: str
url: str
snippet: str
class SearchResults(BaseModel):
query: str
results: list[SearchResult]
total_results: int
agent = Agent(
task="Search for 'browser automation' and extract top 5 results",
llm=ChatBrowserUse(),
output_model_schema=SearchResults,
)
history = await agent.run()
data: SearchResults = history.structured_output
for result in data.results:
print(f"{result.title}: {result.url}")
From examples/features/custom_output.py:29-31.
Accessing Results
Via AgentHistoryList
history = await agent.run()
# Get structured output (auto-parsed)
if history.structured_output:
product = history.structured_output
print(product.name)
# Alternative: parse manually
final_result = history.final_result()
if final_result:
product = Product.model_validate_json(final_result)
With Sandbox Execution
Structured output works with sandbox:
from browser_use import sandbox, Browser
from browser_use.agent.views import AgentHistoryList
class IPLocation(BaseModel):
ip_address: str
country: str
city: str | None = None
@sandbox(log_level='INFO')
async def get_ip_location(browser: Browser) -> AgentHistoryList:
agent = Agent(
task="Go to ipinfo.io and extract my IP and location",
browser=browser,
llm=ChatBrowserUse(),
output_model_schema=IPLocation,
)
return await agent.run()
# Parse structured output from sandbox result
history = await get_ip_location()
location = history.get_structured_output(IPLocation)
if location:
print(f"IP: {location.ip_address}")
print(f"Location: {location.city}, {location.country}")
From examples/sandbox/structured_output.py:17-46.
When using sandbox, use get_structured_output(Model) instead of structured_output property, as the private _output_model_schema attribute isn’t serialized.
Complex Examples
E-commerce Product Catalog
from enum import Enum
from decimal import Decimal
class ProductCondition(str, Enum):
NEW = "new"
USED = "used"
REFURBISHED = "refurbished"
class Review(BaseModel):
rating: int = Field(ge=1, le=5, description="Star rating 1-5")
text: str
author: str
helpful_count: int = 0
class Product(BaseModel):
name: str
sku: str
price: Decimal
condition: ProductCondition
in_stock: bool
shipping_cost: Decimal | None = None
reviews: list[Review]
average_rating: float = Field(ge=0, le=5)
agent = Agent(
task="Extract complete product information including reviews",
llm=ChatBrowserUse(),
output_model_schema=Product,
)
history = await agent.run()
product = history.structured_output
print(f"{product.name} ({product.condition.value})")
print(f"Price: ${product.price}")
print(f"Rating: {product.average_rating}/5 ({len(product.reviews)} reviews)")
Social Media Post Analysis
from datetime import datetime
class Engagement(BaseModel):
likes: int
comments: int
shares: int
views: int | None = None
class Comment(BaseModel):
author: str
text: str
likes: int
class Post(BaseModel):
id: str
author: str
content: str
posted_at: str = Field(description="ISO format datetime")
engagement: Engagement
top_comments: list[Comment] = Field(
default_factory=list,
description="Top 3 comments by likes"
)
hashtags: list[str]
agent = Agent(
task="Extract details from the top post on the feed",
llm=ChatBrowserUse(),
output_model_schema=Post,
)
history = await agent.run()
post = history.structured_output
total_engagement = (
post.engagement.likes +
post.engagement.comments +
post.engagement.shares
)
print(f"Total engagement: {total_engagement}")
class StockPrice(BaseModel):
symbol: str
current_price: Decimal
change: Decimal
change_percent: float
day_high: Decimal
day_low: Decimal
volume: int
class Portfolio(BaseModel):
stocks: list[StockPrice]
total_value: Decimal
day_change: Decimal
day_change_percent: float
agent = Agent(
task="Extract my portfolio summary from the dashboard",
llm=ChatBrowserUse(),
output_model_schema=Portfolio,
)
history = await agent.run()
portfolio = history.structured_output
for stock in portfolio.stocks:
direction = "📈" if stock.change > 0 else "📉"
print(f"{stock.symbol}: ${stock.current_price} {direction} {stock.change_percent}%")
Validation
Built-in Validators
from pydantic import Field, field_validator, EmailStr, HttpUrl
class UserProfile(BaseModel):
username: str = Field(min_length=3, max_length=20)
email: EmailStr
website: HttpUrl | None = None
age: int = Field(ge=13, le=120, description="User's age")
bio: str = Field(max_length=500)
@field_validator('username')
def username_alphanumeric(cls, v: str) -> str:
if not v.isalnum():
raise ValueError('Username must be alphanumeric')
return v
agent = Agent(
task="Extract user profile information",
llm=ChatBrowserUse(),
output_model_schema=UserProfile,
)
If the LLM returns invalid data, Pydantic will raise ValidationError.
Custom Validators
from pydantic import model_validator
class DateRange(BaseModel):
start_date: str
end_date: str
@model_validator(mode='after')
def check_date_order(self) -> 'DateRange':
from datetime import datetime
start = datetime.fromisoformat(self.start_date)
end = datetime.fromisoformat(self.end_date)
if start > end:
raise ValueError('start_date must be before end_date')
return self
class Event(BaseModel):
name: str
date_range: DateRange
location: str
Error Handling
Validation Errors
from pydantic import ValidationError
try:
history = await agent.run()
data = history.structured_output
if data is None:
print("No structured output returned")
print(f"Raw result: {history.final_result()}")
except ValidationError as e:
print(f"Invalid data format: {e}")
# The LLM returned data that doesn't match your schema
Optional Fields
Make fields optional when data might not be available:
class Article(BaseModel):
title: str # Required
author: str | None = None # Optional
published_date: str | None = None
tags: list[str] = Field(default_factory=list) # Defaults to empty list
Fallback Values
history = await agent.run()
if history.structured_output:
product = history.structured_output
else:
# Fallback: parse from final_result string
result_text = history.final_result()
print(f"Unstructured result: {result_text}")
Best Practices
1. Clear Field Descriptions
Help the LLM understand what to extract:
# ✅ Good - descriptive fields
class Product(BaseModel):
name: str = Field(description="Product name as shown on the page")
price: float = Field(description="Current price in USD, without currency symbol")
availability: str = Field(description="In stock, out of stock, or pre-order")
# ❌ Bad - unclear fields
class Product(BaseModel):
name: str
price: str # Should be float, LLM might include "$"
status: str # Unclear what this represents
2. Use Enums for Fixed Values
from enum import Enum
class OrderStatus(str, Enum):
PENDING = "pending"
CONFIRMED = "confirmed"
SHIPPED = "shipped"
DELIVERED = "delivered"
CANCELLED = "cancelled"
class Order(BaseModel):
order_id: str
status: OrderStatus # LLM must return one of these values
total: Decimal
3. Provide Examples in Task
agent = Agent(
task="""
Extract product information in this format:
- name: The product title
- price: Numeric price without currency symbol
- in_stock: true if available, false otherwise
Example:
{
"name": "Wireless Mouse",
"price": 29.99,
"in_stock": true
}
""",
llm=ChatBrowserUse(),
output_model_schema=Product,
)
4. Keep Models Focused
# ✅ Good - focused model
class SearchResult(BaseModel):
title: str
url: str
snippet: str
# ❌ Bad - trying to extract too much
class PageData(BaseModel):
url: str
title: str
all_links: list[str]
all_images: list[str]
all_text: str
metadata: dict
# Too much data, increases errors
Structured output works with custom tools:
from browser_use import Tools, ActionResult
tools = Tools()
class EmailData(BaseModel):
subject: str
sender: str
date: str
body: str
@tools.action('Extract email content')
async def extract_email(email_selector: str) -> ActionResult:
# Use agent with structured output inside tool
agent = Agent(
task=f"Extract email details from {email_selector}",
llm=my_llm,
output_model_schema=EmailData,
)
history = await agent.run()
if history.structured_output:
email = history.structured_output
return ActionResult(
extracted_content=f"Email from {email.sender}: {email.subject}",
success=True
)
return ActionResult(error="Failed to extract email")
Actor API Integration
Use structured extraction with Actor API:
from browser_use.actor import Page
class ArticleContent(BaseModel):
title: str
author: str
published_date: str
paragraphs: list[str]
async def extract_article(page: Page, llm) -> ArticleContent:
await page.goto("https://example.com/article")
# Extract structured content from page
article = await page.extract_content(
prompt="Extract the article title, author, date, and all paragraphs",
structured_output=ArticleContent,
llm=llm
)
return article
From browser_use/actor/page.py:491-554.
See Also