Guardrails Example¶
Add safety controls to protect your AI agents.
Custom Validator¶
Create a validator to filter unsafe input:
from akordi_agents.core.interfaces import ValidatorInterface
from akordi_agents.models.validation_models import ValidationResult, ValidationError
from typing import Dict, Any
class SafetyValidator(ValidatorInterface):
"""Validate user input for safety."""
def __init__(self):
self.prohibited_patterns = [
"hack", "exploit", "bypass", "illegal",
"harmful", "weapon", "violence"
]
self.max_query_length = 5000
def validate(self, data: Dict[str, Any]) -> ValidationResult:
errors = []
query = data.get("query", "")
# Check length
if len(query) > self.max_query_length:
errors.append(ValidationError(
field="query",
message=f"Query exceeds maximum length of {self.max_query_length} characters"
))
# Check empty
if not query.strip():
errors.append(ValidationError(
field="query",
message="Query cannot be empty"
))
# Check prohibited content
query_lower = query.lower()
for pattern in self.prohibited_patterns:
if pattern in query_lower:
errors.append(ValidationError(
field="query",
message="Query contains prohibited content"
))
break
return ValidationResult(
is_valid=len(errors) == 0,
errors=errors
)
def get_validator_name(self) -> str:
return "safety_validator"
Using the Validator¶
from akordi_agents.core import create_langgraph_agent
from akordi_agents.services import AWSBedrockService
# Create validator
validator = SafetyValidator()
# Create agent with guardrails
agent = create_langgraph_agent(
name="safe_agent",
llm_service=AWSBedrockService(),
validator=validator,
config={
"enable_validation": True,
"temperature": 0.1,
}
)
# Safe query
response = agent.process_request({
"query": "How do I bake a chocolate cake?",
})
print("Response:", response["llm_response"]["response"])
# Unsafe query
response = agent.process_request({
"query": "How do I hack into a system?",
})
if not response.get("success"):
print("Blocked:", response.get("validation_errors"))
AWS Bedrock Guardrails¶
Use AWS Bedrock's built-in guardrails:
Create a Guardrail¶
import boto3
client = boto3.client("bedrock", region_name="us-east-1")
response = client.create_guardrail(
name="production-guardrail",
description="Guardrail for production AI agents",
# Content filtering
contentPolicyConfig={
"filtersConfig": [
{"type": "HATE", "inputStrength": "HIGH", "outputStrength": "HIGH"},
{"type": "VIOLENCE", "inputStrength": "HIGH", "outputStrength": "HIGH"},
{"type": "SEXUAL", "inputStrength": "HIGH", "outputStrength": "HIGH"},
{"type": "INSULTS", "inputStrength": "MEDIUM", "outputStrength": "MEDIUM"},
]
},
# Topic blocking
topicPolicyConfig={
"topicsConfig": [
{
"name": "illegal_activities",
"definition": "Topics related to illegal activities or hacking",
"examples": [
"How to hack",
"How to steal",
"How to make illegal substances"
],
"type": "DENY"
}
]
},
# Word filters
wordPolicyConfig={
"wordsConfig": [
{"text": "profanity1"},
{"text": "profanity2"},
],
"managedWordListsConfig": [
{"type": "PROFANITY"}
]
},
# PII handling
sensitiveInformationPolicyConfig={
"piiEntitiesConfig": [
{"type": "EMAIL", "action": "ANONYMIZE"},
{"type": "PHONE", "action": "ANONYMIZE"},
{"type": "SSN", "action": "BLOCK"},
{"type": "CREDIT_DEBIT_CARD_NUMBER", "action": "BLOCK"},
]
},
)
guardrail_id = response["guardrailId"]
print(f"Created guardrail: {guardrail_id}")
Create Guardrail Version¶
version_response = client.create_guardrail_version(
guardrailIdentifier=guardrail_id,
description="Production version 1"
)
version = version_response["version"]
print(f"Created version: {version}")
Using the CLI¶
# Create default guardrail
poetry run python examples/create_guardrail.py --create-default
# Create a version
poetry run python examples/create_guardrail.py \
--create-version \
--guardrail-id your-guardrail-id
# List guardrails
poetry run python examples/create_guardrail.py --list
Bedrock Guardrail Integration¶
Use the SDK's built-in BedrockGuardrail class:
from akordi_agents.guard_kit.bedrock.bedrock import BedrockGuardrail
# Create a guardrail instance
guardrail = BedrockGuardrail(region_name="us-east-1")
# Create a default guardrail with content filters
response = guardrail.create_default_guardrail(
name_prefix="my_app"
)
guardrail_id = response["guardrailId"]
# Create a version for production use
version_response = guardrail.create_guardrail_version(
guardrail_id=guardrail_id,
guardrail_version_description="Production version"
)
version = version_response["version"]
print(f"Guardrail ID: {guardrail_id}")
print(f"Version: {version}")
Custom LLM Service with Guardrails¶
From examples/agent_with_guardrails.py:
from akordi_agents.core import LLMServiceInterface, create_langgraph_agent
from akordi_agents.guard_kit.bedrock.bedrock import BedrockGuardrail
from akordi_agents.services.llm_service import AWSBedrockService
from akordi_agents.models.llm_models import ClaudeConfig
class GuardrailEnabledLLMService(LLMServiceInterface):
"""LLM service with integrated Bedrock guardrails."""
def __init__(self, model_id: str, guardrail_id: str, guardrail_version: str = "1"):
self.llm_service = AWSBedrockService(model_id)
self.guardrail_id = guardrail_id
self.guardrail_version = guardrail_version
def generate_response(self, prompt: str, context: str = None, **kwargs):
try:
config = ClaudeConfig(
max_tokens=kwargs.get("max_tokens", 2000),
model_id=self.llm_service.model_id,
temperature=kwargs.get("temperature", 0.1),
)
# Invoke model with guardrail parameters
llm_response = self.llm_service.invoke_model(
prompt=prompt,
config=config,
system_message=kwargs.get("system_message"),
guardrailIdentifier=self.guardrail_id,
guardrailVersion=self.guardrail_version,
)
return {
"response": llm_response.get("response", ""),
"model_info": {"model_id": self.llm_service.model_id},
"guardrail_status": "applied",
}
except Exception as e:
return {
"response": f"Error: {str(e)}",
"error": str(e),
}
def get_service_name(self) -> str:
return "guardrail_enabled_llm"
# Usage
guardrail_llm = GuardrailEnabledLLMService(
model_id="anthropic.claude-3-sonnet-20240229-v1:0",
guardrail_id="your-guardrail-id",
guardrail_version="1"
)
agent = create_langgraph_agent(
name="safe_agent",
llm_service=guardrail_llm,
)
Combining Multiple Validators¶
Layer multiple validation approaches:
class CompositeValidator(ValidatorInterface):
def __init__(self, validators: list):
self.validators = validators
def validate(self, data: Dict[str, Any]) -> ValidationResult:
all_errors = []
for validator in self.validators:
result = validator.validate(data)
all_errors.extend(result.errors)
# Stop on first blocking error
if not result.is_valid:
break
return ValidationResult(
is_valid=len(all_errors) == 0,
errors=all_errors
)
def get_validator_name(self) -> str:
return "composite_validator"
# Use multiple validators
validator = CompositeValidator([
LengthValidator(max_length=5000),
SafetyValidator(),
BedrockValidator(guardrail_id="your-id"),
])
agent = create_langgraph_agent(
name="multi_validated_agent",
llm_service=llm_service,
validator=validator,
config={"enable_validation": True}
)
Output Validation¶
Validate LLM responses before returning:
class OutputValidator:
def __init__(self, guardrail: BedrockGuardrail):
self.guardrail = guardrail
def validate_output(self, response: str) -> tuple[bool, str]:
result = self.guardrail.validate_output(response)
if result.blocked:
return False, "Response contained inappropriate content"
return True, response
# Use in custom processing
class SafeAgent:
def __init__(self, agent, output_validator):
self.agent = agent
self.output_validator = output_validator
def process_request(self, request_data):
response = self.agent.process_request(request_data)
if response.get("success"):
llm_response = response["llm_response"]["response"]
is_safe, safe_response = self.output_validator.validate_output(
llm_response
)
if not is_safe:
return {
"success": False,
"error": "Response validation failed"
}
response["llm_response"]["response"] = safe_response
return response
Complete Example¶
#!/usr/bin/env python
"""Guardrails example with AWS Bedrock."""
import os
from akordi_agents.core import create_langgraph_agent
from akordi_agents.services import AWSBedrockService
from akordi_agents.core.interfaces import ValidatorInterface
from akordi_agents.models.validation_models import ValidationResult, ValidationError
class SafetyValidator(ValidatorInterface):
def __init__(self):
self.blocked_terms = ["hack", "exploit", "illegal"]
def validate(self, data):
query = data.get("query", "").lower()
for term in self.blocked_terms:
if term in query:
return ValidationResult(
is_valid=False,
errors=[ValidationError("query", "Blocked content")]
)
return ValidationResult(is_valid=True, errors=[])
def get_validator_name(self):
return "safety_validator"
def main():
os.environ["AWS_REGION"] = "us-east-1"
# Create agent with guardrails
agent = create_langgraph_agent(
name="safe_agent",
llm_service=AWSBedrockService(),
validator=SafetyValidator(),
config={"enable_validation": True}
)
# Test queries
test_queries = [
"How do I make a chocolate cake?",
"What's the weather forecast?",
"How do I hack into a computer?", # Should be blocked
"Tell me about machine learning",
]
for query in test_queries:
print(f"\nQuery: {query}")
print("-" * 50)
response = agent.process_request({
"query": query,
"system_message": "You are a helpful assistant.",
})
if response.get("success"):
print(f"Response: {response['llm_response']['response'][:100]}...")
else:
print(f"BLOCKED: {response.get('validation_errors', response.get('error'))}")
if __name__ == "__main__":
main()
Running Examples¶
# With guardrails
poetry run python examples/agent_with_guardrails.py
# Without guardrails (for comparison)
poetry run python examples/agent_without_guardrails.py
# Create guardrail
poetry run python examples/create_guardrail.py --create-default
Best Practices¶
- Always validate input in production systems
- Use multiple validation layers for defense in depth
- Log validation failures for monitoring and improvement
- Keep blocklists updated with new threat patterns
- Test guardrails thoroughly before deployment
- Monitor false positives to avoid blocking legitimate queries
Next Steps¶
- Basic Agent - Start with basics
- Guardrails Concepts - Deep dive
- API Reference - ValidatorInterface API