Advanced Usage Examples
This guide provides advanced examples for using the EncypherAI package in various scenarios.
Custom Metadata Handling
Creating a Custom Metadata Encoder
You can extend the base MetadataEncoder
class to create custom encoders with specialized functionality:
from encypher.core.metadata_encoder import MetadataEncoder
import hashlib
import time
import json
class EnhancedMetadataEncoder(MetadataEncoder):
"""Custom metadata encoder with enhanced features."""
def __init__(self, secret_key=None, include_hash=True):
super().__init__(secret_key=secret_key)
self.include_hash = include_hash
def encode_metadata(self, text, metadata, target="whitespace"):
"""Encode metadata with additional content hash."""
# Add timestamp if not present
if "timestamp" not in metadata:
metadata["timestamp"] = int(time.time())
# Add content hash if enabled
if self.include_hash:
content_hash = hashlib.sha256(text.encode()).hexdigest()
metadata["content_hash"] = content_hash
# Use parent class to perform the encoding
return super().encode_metadata(text, metadata, target)
def verify_text(self, text, verify_hash=True):
"""Enhanced verification that also checks content hash."""
# Extract metadata
metadata = self.decode_metadata(text)
if not metadata:
return False
# Perform standard HMAC verification
standard_verification = super().verify_text(text)
# Optionally verify content hash
if verify_hash and self.include_hash and "content_hash" in metadata:
# Extract text without metadata
clean_text = self.strip_metadata(text)
# Calculate hash of clean text
current_hash = hashlib.sha256(clean_text.encode()).hexdigest()
# Compare with stored hash
hash_verification = current_hash == metadata["content_hash"]
# Both verifications must pass
return standard_verification and hash_verification
return standard_verification
# Example usage
encoder = EnhancedMetadataEncoder(secret_key="my-secret-key")
text = "This is a sample text for advanced encoding."
metadata = {
"model": "gpt-4",
"organization": "EncypherAI",
"version": "1.1.0"
}
# Encode with enhanced metadata
encoded_text = encoder.encode_metadata(text, metadata)
print(f"Encoded text: {encoded_text}")
# Verify with enhanced verification
verification_result = encoder.verify_text(encoded_text)
print(f"Verification result: {verification_result}")
Batch Processing
For processing large volumes of text, you can implement batch processing:
from encypher.core.metadata_encoder import MetadataEncoder
import time
import concurrent.futures
import json
def process_batch(texts, metadata_template, encoder=None, max_workers=4):
"""Process a batch of texts with metadata embedding."""
if encoder is None:
encoder = MetadataEncoder()
results = []
# Define processing function
def process_item(item):
text = item["text"]
# Create a copy of the template and add item-specific fields
metadata = metadata_template.copy()
metadata["item_id"] = item.get("id", f"item_{len(results)}")
metadata["timestamp"] = int(time.time())
try:
# Encode metadata
encoded_text = encoder.encode_metadata(text, metadata)
return {
"success": True,
"original_text": text,
"encoded_text": encoded_text,
"metadata": metadata
}
except Exception as e:
return {
"success": False,
"original_text": text,
"error": str(e)
}
# Process items in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
futures = [executor.submit(process_item, item) for item in texts]
for future in concurrent.futures.as_completed(futures):
results.append(future.result())
return results
# Example usage
texts = [
{"id": "item1", "text": "This is the first text item."},
{"id": "item2", "text": "This is the second text item."},
{"id": "item3", "text": "This is the third text item."}
]
metadata_template = {
"model": "gpt-4",
"organization": "EncypherAI",
"version": "1.1.0"
}
# Process batch
results = process_batch(texts, metadata_template)
# Print results
for result in results:
if result["success"]:
print(f"Successfully processed item: {result['metadata']['item_id']}")
else:
print(f"Failed to process item: {result.get('error', 'Unknown error')}")
Advanced Streaming Techniques
Custom Streaming Handler
You can create a custom streaming handler with specialized behavior:
from encypher.streaming.handlers import StreamingHandler
from encypher.core.unicode_metadata import MetadataTarget
import time
import json
class EnhancedStreamingHandler(StreamingHandler):
"""Enhanced streaming handler with additional features."""
def __init__(self, metadata=None, target=MetadataTarget.WHITESPACE,
secret_key=None, chunk_threshold=100, log_chunks=False):
super().__init__(metadata=metadata, target=target, secret_key=secret_key)
self.chunk_threshold = chunk_threshold
self.log_chunks = log_chunks
self.chunks_processed = 0
self.total_length = 0
def process_chunk(self, chunk):
"""Process a chunk with enhanced logging and analytics."""
self.chunks_processed += 1
self.total_length += len(chunk)
if self.log_chunks:
print(f"Processing chunk {self.chunks_processed}: {len(chunk)} chars")
# Add dynamic metadata if needed
if self.metadata and "chunks_processed" not in self.metadata:
self.metadata["chunks_processed"] = 0
if self.metadata:
self.metadata["chunks_processed"] = self.chunks_processed
# Use standard processing
processed_chunk = super().process_chunk(chunk)
# Apply special handling for large chunks
if len(chunk) > self.chunk_threshold and self.chunks_processed > 1:
# For large chunks after the first, we might want special handling
# This is just an example - you could implement custom logic here
pass
return processed_chunk
def finalize(self):
"""Finalize the stream with enhanced metadata."""
if self.metadata:
self.metadata["total_chunks"] = self.chunks_processed
self.metadata["total_length"] = self.total_length
self.metadata["finalized_at"] = int(time.time())
return super().finalize()
# Example usage
metadata = {
"model": "streaming-demo",
"organization": "EncypherAI",
"timestamp": int(time.time()),
"version": "1.1.0"
}
handler = EnhancedStreamingHandler(
metadata=metadata,
log_chunks=True,
chunk_threshold=50
)
# Simulate streaming
chunks = [
"The quick ",
"brown fox jumps ",
"over the lazy dog. ",
"This is an example of streaming text with embedded metadata."
]
full_text = ""
for chunk in chunks:
processed = handler.process_chunk(chunk)
full_text += processed
print(f"Accumulated text: {full_text}")
# Finalize
final_chunk = handler.finalize()
if final_chunk:
full_text += final_chunk
print(f"Final text: {full_text}")
# Extract metadata
from encypher.core.unicode_metadata import UnicodeMetadata
extracted = UnicodeMetadata.extract_metadata(full_text)
print(f"Extracted metadata: {json.dumps(extracted, indent=2)}")
Custom Verification Logic
You can implement custom verification logic for specific use cases:
from encypher.core.metadata_encoder import MetadataEncoder
import time
def verify_content_with_custom_logic(text, expected_organization=None, max_age_hours=24):
"""
Verify content with custom logic beyond the standard verification.
Args:
text: Text with embedded metadata to verify
expected_organization: If set, verify the organization matches
max_age_hours: Maximum age of content in hours
Returns:
dict: Verification results with detailed information
"""
encoder = MetadataEncoder()
# Standard verification
hmac_verified = encoder.verify_text(text)
# Extract metadata for custom checks
metadata = encoder.decode_metadata(text)
# Initialize results
results = {
"hmac_verified": hmac_verified,
"metadata_present": bool(metadata),
"custom_checks": {}
}
# If metadata is present, perform custom checks
if metadata:
# Check organization if specified
if expected_organization:
org_match = metadata.get("organization") == expected_organization
results["custom_checks"]["organization_match"] = org_match
# Check age if timestamp is present
if "timestamp" in metadata:
try:
# Get timestamp as int
timestamp = metadata["timestamp"]
if isinstance(timestamp, str) and timestamp.isdigit():
timestamp = int(timestamp)
# Calculate age in hours
current_time = int(time.time())
age_seconds = current_time - timestamp
age_hours = age_seconds / 3600
results["custom_checks"]["age_hours"] = age_hours
results["custom_checks"]["age_within_limit"] = age_hours <= max_age_hours
except Exception as e:
results["custom_checks"]["timestamp_error"] = str(e)
# Overall verification result
results["verified"] = (
hmac_verified and
bool(metadata) and
all(results["custom_checks"].values())
)
return results
# Example usage
encoder = MetadataEncoder()
text = "This is a sample text for verification."
metadata = {
"model": "gpt-4",
"organization": "EncypherAI",
"timestamp": int(time.time()),
"version": "1.1.0"
}
# Encode metadata
encoded_text = encoder.encode_metadata(text, metadata)
# Verify with custom logic
verification_results = verify_content_with_custom_logic(
encoded_text,
expected_organization="EncypherAI",
max_age_hours=48
)
print(f"Verification results: {json.dumps(verification_results, indent=2)}")
These advanced examples demonstrate how to extend and customize EncypherAI's functionality for various use cases.