OpenAI Integration

This guide explains how to integrate EncypherAI with OpenAI's API to embed metadata in AI-generated content from models like GPT-3.5 and GPT-4.

Prerequisites

Before you begin, make sure you have:

An OpenAI API key
The OpenAI Python package installed
EncypherAI installed

uv pip install encypher-ai openai

Basic Integration

Non-Streaming Response

For standard (non-streaming) responses from OpenAI:

import openai
from encypher.core.unicode_metadata import UnicodeMetadata
from encypher.core.keys import generate_key_pair
from cryptography.hazmat.primitives import serialization
from typing import Optional
from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes
import time
import json

# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")

# Generate key pair (replace with your actual key management)
private_key, public_key = generate_key_pair()

# Example public key resolver function
def resolve_public_key(key_id: str) -> Optional[PublicKeyTypes]:
    if key_id == "openai-nonstream-key":
        return public_key
    return None

# Create a completion
response = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Write a short paragraph about AI ethics."}
    ]
)

# Get the response text
text = response.choices[0].message.content

# Create metadata
metadata = {
    "model": response.model,
    "organization": "YourOrganization",
    "timestamp": time.time(),
    "prompt_tokens": response.usage.prompt_tokens,
    "completion_tokens": response.usage.completion_tokens,
    "total_tokens": response.usage.total_tokens,
    "key_id": "openai-nonstream-key" # Identifier for the key
}

# Embed metadata using UnicodeMetadata
encoded_text = UnicodeMetadata.embed_metadata(text, metadata, private_key)

print("Original response:")
print(text)
print("\nResponse with embedded metadata:")
print(encoded_text)

# Later, extract and verify the metadata
# Extract metadata (optional)
# extracted_metadata = UnicodeMetadata.extract_metadata(encoded_text)

# Verify the metadata using the public key resolver
is_valid, verified_metadata = UnicodeMetadata.verify_metadata(
    encoded_text,
    public_key_resolver=resolve_public_key # Pass the resolver function
)

print("\nExtracted metadata:")
print(json.dumps(verified_metadata, indent=2))
print(f"Verification result: {'✅ Verified' if is_valid else '❌ Failed'}")

Streaming Response

For streaming responses, use the StreamingHandler:

import openai
from encypher.streaming import StreamingHandler
from encypher.core.unicode_metadata import UnicodeMetadata
from encypher.core.keys import generate_key_pair
from cryptography.hazmat.primitives import serialization
from typing import Optional
from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes
import time
import json

# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")

# Generate key pair and resolver (replace with actual key management)
private_key, public_key = generate_key_pair()
def resolve_public_key(key_id: str) -> Optional[PublicKeyTypes]:
    if key_id == "openai-stream-key":
        return public_key
    return None

# Create metadata
metadata = {
    "model": "gpt-4",
    "organization": "YourOrganization",
    "timestamp": time.time(),
    "key_id": "openai-stream-key"
}

# Initialize the streaming handler
handler = StreamingHandler(
    metadata=metadata,
    private_key=private_key # Use the private key
)

# Create a streaming completion
completion = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Write a short paragraph about AI ethics."}
    ],
    stream=True
)

# Process each chunk
full_response = ""
for chunk in completion:
    if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
        content = chunk.choices[0].delta.content

        # Process the chunk
        processed_chunk = handler.process_chunk(chunk=content)

        # Print and accumulate the processed chunk if available
        if processed_chunk:
            print(processed_chunk, end="", flush=True)
            full_response += processed_chunk

# Finalize the stream to process any remaining buffer
final_chunk = handler.finalize()
if final_chunk:
    print(final_chunk, end="", flush=True)
    full_response += final_chunk

print("\n\nStreaming completed!")

# Extract and verify the metadata
# Extract metadata (optional)
# extracted_metadata = UnicodeMetadata.extract_metadata(full_response)

# Verify the metadata using the public key resolver
is_valid, verified_metadata = UnicodeMetadata.verify_metadata(
    full_response,
    public_key_resolver=resolve_public_key # Pass the resolver function
)

print("\nExtracted metadata:")
print(json.dumps(verified_metadata, indent=2))
print(f"Verification result: {'✅ Verified' if is_valid else '❌ Failed'}")

Advanced Integration

Function Calling

When using OpenAI's function calling feature:

import openai
from encypher.core.unicode_metadata import UnicodeMetadata
from encypher.core.keys import generate_key_pair
from cryptography.hazmat.primitives import serialization
from typing import Optional
from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes
import time
import json

# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")

# Generate key pair and resolver (replace with actual key management)
private_key, public_key = generate_key_pair()
def resolve_public_key(key_id: str) -> Optional[PublicKeyTypes]:
    if key_id == "openai-func-key":
        return public_key
    return None

# Define functions
functions = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"]
                    }
                },
                "required": ["location"]
            }
        }
    }
]

# Create a completion with function calling
response = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What's the weather like in San Francisco?"}
    ],
    tools=functions,
    tool_choice="auto"
)

# Get the response
message = response.choices[0].message

# Check if the model wants to call a function
if message.tool_calls:
    # Get the function call
    function_call = message.tool_calls[0].function
    function_name = function_call.name
    function_args = json.loads(function_call.arguments)

    print(f"Function call: {function_name}")
    print(f"Arguments: {function_args}")

    # Simulate function response
    function_response = {
        "location": function_args["location"],
        "temperature": 72,
        "unit": function_args.get("unit", "fahrenheit"),
        "condition": "sunny"
    }

    # Continue the conversation with the function result
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "What's the weather like in San Francisco?"},
            message,
            {
                "role": "tool",
                "tool_call_id": message.tool_calls[0].id,
                "name": function_name,
                "content": json.dumps(function_response)
            }
        ]
    )

    # Get the final response text
    final_text = response.choices[0].message.content

    # Create metadata
    metadata = {
        "model": response.model,
        "initial_function_call": function_name,
        "timestamp": time.time(),
        "key_id": "openai-func-key"
    }

    # Embed metadata
    encoded_text = UnicodeMetadata.embed_metadata(final_text, metadata, private_key)

    print("\nFinal response with embedded metadata:")
    print(encoded_text)

    # Verify the metadata
    is_valid, verified_metadata = UnicodeMetadata.verify_metadata(
        encoded_text,
        public_key_resolver=resolve_public_key
    )
    print(f"\nVerification result: {'✅ Verified' if is_valid else '❌ Failed'}")
    if is_valid:
        print(json.dumps(verified_metadata, indent=2))

else:
    # No function call, process as a regular response
    text = message.content
    metadata = {
        "model": response.model,
        "timestamp": time.time(),
        "key_id": "openai-func-key" # Use the same key_id for consistency
    }
    encoded_text = UnicodeMetadata.embed_metadata(text, metadata, private_key)
    print("Response with embedded metadata:")
    print(encoded_text)
    # Verification would be the same as above

Custom Metadata Extraction

You can create a helper function to extract metadata from OpenAI responses:

def extract_openai_metadata(response):
    """Extract metadata from an OpenAI API response."""
    metadata = {
        "model": response.model,
        "organization": "YourOrganization",
        "timestamp": time.time(),
    }

    # Add usage information if available
    if hasattr(response, "usage"):
        metadata.update({
            "prompt_tokens": response.usage.prompt_tokens,
            "completion_tokens": response.usage.completion_tokens,
            "total_tokens": response.usage.total_tokens
        })

    # Add function call information if available
    message = response.choices[0].message
    if hasattr(message, "tool_calls") and message.tool_calls:
        function_call = message.tool_calls[0].function
        metadata.update({
            "function_call": function_call.name,
            "function_args": json.loads(function_call.arguments)
        })

    return metadata

Web Application Integration

Here's an example of integrating OpenAI and EncypherAI in a Flask web application:

from flask import Flask, request, jsonify
import openai
from encypher.core.unicode_metadata import UnicodeMetadata
from encypher.core.keys import generate_key_pair
from cryptography.hazmat.primitives import serialization
from typing import Optional
from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes
import time

app = Flask(__name__)

# Initialize OpenAI client
client = openai.OpenAI(api_key="your-api-key")

# Generate key pair and resolver (replace with actual key management)
private_key, public_key = generate_key_pair()
def resolve_public_key(key_id: str) -> Optional[PublicKeyTypes]:
    if key_id == "fastapi-openai-key":
        return public_key
    return None

# Create a metadata encoder
encoder = UnicodeMetadata(private_key=private_key)

@app.route('/generate', methods=['POST'])
def generate():
    # Get request data
    data = request.json
    prompt = data.get('prompt', '')

    # Create a completion
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )

    # Get the response text
    text = response.choices[0].message.content

    # Create metadata
    metadata = {
        "model": response.model,
        "organization": "YourOrganization",
        "timestamp": time.time(),
        "prompt_tokens": response.usage.prompt_tokens,
        "completion_tokens": response.usage.completion_tokens,
        "total_tokens": response.usage.total_tokens,
        "user_id": data.get('user_id', 'anonymous')
    }

    # Embed metadata
    encoded_text = encoder.embed_metadata(text, metadata)

    # Return the response
    return jsonify({
        "text": encoded_text,
        "metadata": metadata
    })

@app.route('/verify', methods=['POST'])
def verify():
    # Get request data
    data = request.json
    text_to_verify = data.get("text")

    if not text_to_verify:
        raise HTTPException(status_code=400, detail="Text is required")

    is_valid, verified_metadata = UnicodeMetadata.verify_metadata(
        text_to_verify,
        public_key_resolver=resolve_public_key
    )

    return {
        "is_valid": is_valid,
        "metadata": verified_metadata
    }

if __name__ == '__main__':
    app.run(debug=True)

Streaming in Web Applications

For streaming responses in a web application:

from flask import Flask, Response, request, stream_with_context
import openai
from encypher.streaming import StreamingHandler
from encypher.core.unicode_metadata import UnicodeMetadata
from encypher.core.keys import generate_key_pair
from cryptography.hazmat.primitives import serialization
from typing import Optional
from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes
import time

app = Flask(__name__)

@app.post("/generate-stream")
def generate_stream(request: Request):
    # Get request data
    data = request.json
    prompt = data.get('prompt', '')

    # Create metadata
    metadata = {
        "model": "gpt-4",
        "timestamp": time.time(),
        "user_id": data.get('user_id', 'anonymous'), # Example extra field
        "key_id": "fastapi-openai-key"
    }

    # Initialize the streaming handler
    handler = StreamingHandler(
        metadata=metadata,
        private_key=private_key
    )

    async def generate():
        # Create a streaming completion
        completion = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            stream=True
        )

        # Process each chunk
        for chunk in completion:
            if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
                content = chunk.choices[0].delta.content

                # Process the chunk
                processed_chunk = handler.process_chunk(chunk=content)

                # Yield the processed chunk if available
                if processed_chunk:
                    yield processed_chunk

        # Finalize the stream
        final_chunk = handler.finalize()
        if final_chunk:
            yield final_chunk

    return StreamingResponse(generate(), media_type="text/plain")

# Example Verification Endpoint (add this to your FastAPI app)
@app.post("/verify-text")
async def verify_text(request: Request):
    data = await request.json()
    text_to_verify = data.get("text")

    if not text_to_verify:
        raise HTTPException(status_code=400, detail="Text is required")

    is_valid, verified_metadata = UnicodeMetadata.verify_metadata(
        text_to_verify,
        public_key_resolver=resolve_public_key
    )

    return {
        "is_valid": is_valid,
        "metadata": verified_metadata
    }

# Run with: uvicorn your_app_file:app --reload

Best Practices

Include Model Information: Always include the model name, version, and other relevant information in the metadata.
Add Timestamps: Include a UTC timestamp to track when the content was generated.
Track Token Usage: Include token counts to monitor API usage and costs.
Use Secure Keys: Store your OpenAI API key and EncypherAI secret key securely, using environment variables or a secure key management system.
Handle Errors Gracefully: Implement proper error handling for both OpenAI API calls and EncypherAI operations.
Verify Before Trusting: Always verify the metadata before relying on it, especially for security-sensitive applications.
Choose Appropriate Targets: For longer responses, using whitespace as the embedding target is usually sufficient. For shorter responses, consider using all_characters to ensure enough targets are available.

Troubleshooting

API Key Issues

If you encounter authentication errors with the OpenAI API:

import os

# Set API key as environment variable
os.environ["OPENAI_API_KEY"] = "your-api-key"

# Or configure the client with the key
client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

Rate Limiting

If you hit rate limits, implement exponential backoff:

import time
import random

def call_with_retry(func, max_retries=5):
    retries = 0
    while retries < max_retries:
        try:
            return func()
        except openai.RateLimitError:
            retries += 1
            if retries == max_retries:
                raise
            # Exponential backoff with jitter
            sleep_time = (2 ** retries) + random.random()
            print(f"Rate limited, retrying in {sleep_time:.2f} seconds...")
            time.sleep(sleep_time)

# Example usage
def make_openai_call():
    return client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Write a short paragraph about AI ethics."}
        ]
    )

response = call_with_retry(make_openai_call)

Metadata Extraction Failures

If metadata extraction fails:

Ensure the text hasn't been modified after embedding
Check if the text has enough suitable targets for embedding
Verify you're using the same secret key for embedding and extraction