OpenAI Integration
This guide explains how to integrate EncypherAI with OpenAI's API to embed metadata in AI-generated content from models like GPT-3.5 and GPT-4.
Prerequisites
Before you begin, make sure you have:
- An OpenAI API key
- The OpenAI Python package installed
- EncypherAI installed
Basic Integration
Non-Streaming Response
For standard (non-streaming) responses from OpenAI:
import openai
from encypher.core import MetadataEncoder
from datetime import datetime, timezone
import json
# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")
# Create a metadata encoder
encoder = MetadataEncoder(secret_key="your-secret-key") # Optional: secret_key is only needed if you want HMAC verification
# Create a completion
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write a short paragraph about AI ethics."}
]
)
# Get the response text
text = response.choices[0].message.content
# Create metadata
metadata = {
"model": response.model,
"organization": "YourOrganization",
"timestamp": datetime.now(timezone.utc).isoformat(),
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
# Embed metadata
encoded_text = encoder.encode_metadata(text, metadata)
print("Original response:")
print(text)
print("\nResponse with embedded metadata:")
print(encoded_text)
# Later, extract and verify the metadata
extracted_metadata = encoder.decode_metadata(encoded_text)
verification_result = encoder.verify_text(encoded_text, secret_key="your-secret-key") # Pass the same secret_key used during encoding
print("\nExtracted metadata:")
print(json.dumps(extracted_metadata, indent=2))
print(f"Verification result: {'✅ Verified' if verification_result else '❌ Failed'}")
Streaming Response
For streaming responses, use the StreamingHandler
:
import openai
from encypher.streaming import StreamingHandler
from datetime import datetime, timezone
# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")
# Create metadata
metadata = {
"model": "gpt-4",
"organization": "YourOrganization",
"timestamp": datetime.now(timezone.utc).isoformat()
}
# Initialize the streaming handler
handler = StreamingHandler(metadata=metadata, secret_key="your-secret-key") # Optional: secret_key is only needed if you want HMAC verification
# Create a streaming completion
completion = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write a short paragraph about AI ethics."}
],
stream=True
)
# Process each chunk
full_response = ""
for chunk in completion:
if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
# Process the chunk
processed_chunk = handler.process_chunk(chunk=content)
# Print and accumulate the processed chunk
print(processed_chunk, end="", flush=True)
full_response += processed_chunk
# Finalize the stream
handler.finalize()
print("\n\nStreaming completed!")
# Extract and verify the metadata
from encypher.core import MetadataEncoder
encoder = MetadataEncoder(secret_key="your-secret-key") # Optional: secret_key is only needed if you want HMAC verification
extracted_metadata = encoder.decode_metadata(full_response)
verification_result = encoder.verify_text(full_response, secret_key="your-secret-key") # Pass the same secret_key used during encoding
print("\nExtracted metadata:")
print(json.dumps(extracted_metadata, indent=2))
print(f"Verification result: {'✅ Verified' if verification_result else '❌ Failed'}")
Advanced Integration
Function Calling
When using OpenAI's function calling feature:
import openai
from encypher.core import MetadataEncoder
from datetime import datetime, timezone
import json
# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")
# Define functions
functions = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
}
]
# Create a completion with function calling
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What's the weather like in San Francisco?"}
],
tools=functions,
tool_choice="auto"
)
# Get the response
message = response.choices[0].message
# Check if the model wants to call a function
if message.tool_calls:
# Get the function call
function_call = message.tool_calls[0].function
function_name = function_call.name
function_args = json.loads(function_call.arguments)
print(f"Function call: {function_name}")
print(f"Arguments: {function_args}")
# Simulate function response
function_response = {
"location": function_args["location"],
"temperature": 72,
"unit": function_args.get("unit", "fahrenheit"),
"condition": "sunny"
}
# Continue the conversation with the function result
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What's the weather like in San Francisco?"},
message,
{
"role": "tool",
"tool_call_id": message.tool_calls[0].id,
"name": function_name,
"content": json.dumps(function_response)
}
]
)
# Get the final response text
text = response.choices[0].message.content
else:
# Get the response text
text = message.content
# Create metadata
metadata = {
"model": response.model,
"organization": "YourOrganization",
"timestamp": datetime.now(timezone.utc).isoformat(),
"function_call": message.tool_calls[0].function.name if message.tool_calls else None,
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
# Embed metadata
encoder = MetadataEncoder(secret_key="your-secret-key") # Optional: secret_key is only needed if you want HMAC verification
encoded_text = encoder.encode_metadata(text, metadata)
print("\nFinal response with embedded metadata:")
print(encoded_text)
Custom Metadata Extraction
You can create a helper function to extract metadata from OpenAI responses:
def extract_openai_metadata(response):
"""Extract metadata from an OpenAI API response."""
metadata = {
"model": response.model,
"organization": "YourOrganization",
"timestamp": datetime.now(timezone.utc).isoformat(),
}
# Add usage information if available
if hasattr(response, "usage"):
metadata.update({
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
})
# Add function call information if available
message = response.choices[0].message
if hasattr(message, "tool_calls") and message.tool_calls:
function_call = message.tool_calls[0].function
metadata.update({
"function_call": function_call.name,
"function_args": json.loads(function_call.arguments)
})
return metadata
Web Application Integration
Here's an example of integrating OpenAI and EncypherAI in a Flask web application:
from flask import Flask, request, jsonify
import openai
from encypher.core import MetadataEncoder
from datetime import datetime, timezone
app = Flask(__name__)
# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")
# Create a metadata encoder
encoder = MetadataEncoder(secret_key="your-secret-key") # Optional: secret_key is only needed if you want HMAC verification
@app.route('/generate', methods=['POST'])
def generate():
# Get request data
data = request.json
prompt = data.get('prompt', '')
# Create a completion
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
)
# Get the response text
text = response.choices[0].message.content
# Create metadata
metadata = {
"model": response.model,
"organization": "YourOrganization",
"timestamp": datetime.now(timezone.utc).isoformat(),
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens,
"user_id": data.get('user_id', 'anonymous')
}
# Embed metadata
encoded_text = encoder.encode_metadata(text, metadata)
# Return the response
return jsonify({
"text": encoded_text,
"metadata": metadata
})
@app.route('/verify', methods=['POST'])
def verify():
# Get request data
data = request.json
text = data.get('text', '')
# Extract and verify metadata
try:
metadata = encoder.decode_metadata(text)
verified = encoder.verify_text(text, secret_key="your-secret-key") # Pass the same secret_key used during encoding
return jsonify({
"has_metadata": True,
"metadata": metadata,
"verified": verified
})
except Exception as e:
return jsonify({
"has_metadata": False,
"error": str(e)
})
if __name__ == '__main__':
app.run(debug=True)
Streaming in Web Applications
For streaming responses in a web application:
from flask import Flask, Response, request, stream_with_context
import openai
from encypher.streaming import StreamingHandler
from datetime import datetime, timezone
app = Flask(__name__)
@app.route('/stream', methods=['POST'])
def stream():
# Get request data
data = request.json
prompt = data.get('prompt', '')
# Create metadata
metadata = {
"model": "gpt-4",
"organization": "YourOrganization",
"timestamp": datetime.now(timezone.utc).isoformat(),
"user_id": data.get('user_id', 'anonymous')
}
# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")
# Initialize the streaming handler
handler = StreamingHandler(metadata=metadata, secret_key="your-secret-key") # Optional: secret_key is only needed if you want HMAC verification
def generate_stream():
# Create a streaming completion
completion = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
stream=True
)
# Process each chunk
for chunk in completion:
if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
# Process the chunk
processed_chunk = handler.process_chunk(chunk=content)
# Yield the processed chunk
yield processed_chunk
# Finalize the stream
handler.finalize()
# Return a streaming response
return Response(stream_with_context(generate_stream()), mimetype='text/plain')
if __name__ == '__main__':
app.run(debug=True)
Best Practices
-
Include Model Information: Always include the model name, version, and other relevant information in the metadata.
-
Add Timestamps: Include a UTC timestamp to track when the content was generated.
-
Track Token Usage: Include token counts to monitor API usage and costs.
-
Use Secure Keys: Store your OpenAI API key and EncypherAI secret key securely, using environment variables or a secure key management system.
-
Handle Errors Gracefully: Implement proper error handling for both OpenAI API calls and EncypherAI operations.
-
Verify Before Trusting: Always verify the metadata before relying on it, especially for security-sensitive applications.
-
Choose Appropriate Targets: For longer responses, using
whitespace
as the embedding target is usually sufficient. For shorter responses, consider usingall_characters
to ensure enough targets are available.
Troubleshooting
API Key Issues
If you encounter authentication errors with the OpenAI API:
import os
# Set API key as environment variable
os.environ["OPENAI_API_KEY"] = "your-api-key"
# Or configure the client with the key
client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
Rate Limiting
If you hit rate limits, implement exponential backoff:
import time
import random
def call_with_retry(func, max_retries=5):
retries = 0
while retries < max_retries:
try:
return func()
except openai.RateLimitError:
retries += 1
if retries == max_retries:
raise
# Exponential backoff with jitter
sleep_time = (2 ** retries) + random.random()
print(f"Rate limited, retrying in {sleep_time:.2f} seconds...")
time.sleep(sleep_time)
# Example usage
def make_openai_call():
return client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Write a short paragraph about AI ethics."}
]
)
response = call_with_retry(make_openai_call)
Metadata Extraction Failures
If metadata extraction fails:
- Ensure the text hasn't been modified after embedding
- Check if the text has enough suitable targets for embedding
- Verify you're using the same secret key for embedding and extraction