Skip to content

Observability Hub

Observability Hub provides complete visibility into your multi-agent systems. Track traces across agents, monitor metrics, control costs, and set up alerts.

  • Distributed tracing - Follow requests across agents
  • Built-in metrics - Latency, throughput, error rates
  • Cost tracking - LLM spend by agent, plan, and model
  • Log aggregation - Searchable, correlated logs
  • Alerting - Threshold, anomaly, and absence alerts
  • OTLP export - Send data to external systems
async with client.observability.span(
name="process_document",
attributes={
"document_id": "doc-123",
"document_type": "pdf"
}
) as span:
# Your processing logic
result = await process(document)
span.set_attribute("pages_processed", result.page_count)
span.set_attribute("status", "success")
async with client.observability.span(name="parent_operation") as parent:
# First child
async with client.observability.span(name="step_1") as step1:
await do_step_1()
# Second child
async with client.observability.span(name="step_2") as step2:
await do_step_2()
# Find traces by time range
traces = await client.observability.query_traces(
start_time=datetime.now() - timedelta(hours=1),
end_time=datetime.now(),
limit=100
)
# Filter by attributes
traces = await client.observability.query_traces(
filters={
"agent_id": "document-processor",
"status": "error"
}
)
# Get a specific trace
trace = await client.observability.get_trace(trace_id="tr_xxx")

Track LLM costs per span:

async with client.observability.span(name="llm_call") as span:
response = await call_llm(prompt)
# Record cost
span.set_cost(
model="gpt-4",
input_tokens=1500,
output_tokens=500,
cost_usd=0.045
)
# Get cost summary
costs = await client.observability.get_costs(
start_time=datetime.now() - timedelta(days=7),
end_time=datetime.now(),
group_by=["agent_id", "model"]
)
for item in costs:
print(f"{item.agent_id} - {item.model}: ${item.total_cost:.2f}")
await client.observability.log(
level="info",
message="Document processed successfully",
attributes={
"document_id": "doc-123",
"processing_time_ms": 1500
}
)

Logs are automatically correlated with traces:

async with client.observability.span(name="process") as span:
# This log is automatically linked to the span
await client.observability.log(
level="info",
message="Starting processing"
)
logs = await client.observability.query_logs(
start_time=datetime.now() - timedelta(hours=1),
level="error",
search="connection failed"
)

Acenta tracks standard metrics automatically:

MetricDescription
request_latency_msRequest processing time
request_countTotal request count
error_countTotal error count
message_countMessages sent/received
llm_cost_usdTotal LLM spend
metrics = await client.observability.query_metrics(
metric="request_latency_ms",
start_time=datetime.now() - timedelta(hours=1),
aggregation="p95",
group_by=["agent_id"]
)
alert = await client.observability.create_alert(
name="High Error Rate",
condition={
"type": "threshold",
"metric": "error_count",
"operator": ">",
"threshold": 100,
"window": "5m"
},
channels=[
{"type": "email", "to": "ops@example.com"},
{"type": "slack", "webhook": "https://hooks.slack.com/..."}
]
)
alert = await client.observability.create_alert(
name="Latency Anomaly",
condition={
"type": "anomaly",
"metric": "request_latency_ms",
"sensitivity": 2.0, # Standard deviations
"baseline_window": "24h"
},
channels=[...]
)
alert = await client.observability.create_alert(
name="Missing Heartbeats",
condition={
"type": "absence",
"metric": "heartbeat_count",
"missing_for": "5m"
},
channels=[...]
)

Export data to external observability systems:

# Configure OTLP export
await client.observability.configure_export(
endpoint="https://otel-collector.example.com:4318",
headers={"Authorization": "Bearer xxx"},
export_traces=True,
export_logs=True
)

Get pre-built dashboard data:

dashboard = await client.observability.get_dashboard(
time_range="1h"
)
print(f"Total requests: {dashboard.total_requests}")
print(f"Error rate: {dashboard.error_rate:.2%}")
print(f"P95 latency: {dashboard.p95_latency_ms}ms")
print(f"Total cost: ${dashboard.total_cost:.2f}")

Configure trace sampling:

# Default: 10% sampling (errors always kept)
# Configure per-namespace
await client.observability.configure_sampling(
rate=0.25, # 25% sampling
always_keep=["error", "slow"] # Always keep errors and slow requests
)