Advanced Usage Examples

This guide provides advanced examples for using the EncypherAI package in various scenarios.

Custom Metadata Handling

Creating a Custom Metadata Handler

You can create custom handlers that build upon the UnicodeMetadata class to add specialized functionality:

```python from encypher.core.unicode_metadata import UnicodeMetadata from encypher.core.keys import generate_key_pair from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes from typing import Optional, Dict, Any, Tuple import hashlib import time import json

class EnhancedMetadataHandler: """Custom metadata handler with enhanced features."""

def __init__(self, private_key=None, include_hash=True):
    """
    Initialize the enhanced metadata handler.

    Args:
        private_key: The private key for signing metadata. If None, a new key pair is generated.
        include_hash: Whether to include a content hash in the metadata.
    """
    # Generate a key pair if not provided
    if private_key is None:
        self.private_key, self.public_key = generate_key_pair()
    else:
        self.private_key = private_key

    self.include_hash = include_hash
    self.key_id = "enhanced-handler-key"

    # In a real application, you would store this more securely
    self.public_keys = {self.key_id: self.public_key}

def resolve_public_key(self, key_id: str) -> Optional[PublicKeyTypes]:
    """Resolve a public key by its ID."""
    return self.public_keys.get(key_id)

def embed_metadata(self, text: str, metadata: Dict[str, Any], target: str = "whitespace") -> str:
    """Embed metadata with additional content hash."""
    # Add timestamp if not present
    if "timestamp" not in metadata:
        metadata["timestamp"] = int(time.time())

    # Add content hash if enabled
    if self.include_hash:
        content_hash = hashlib.sha256(text.encode()).hexdigest()
        metadata["content_hash"] = content_hash

    # Use UnicodeMetadata to perform the embedding
    return UnicodeMetadata.embed_metadata(
        text=text,
        custom_metadata=metadata,
        private_key=self.private_key,
        signer_id=self.key_id,
        timestamp=metadata.get("timestamp"),
        target=target
    )

def verify_metadata(self, text: str, verify_hash: bool = True) -> Tuple[bool, Optional[str], Optional[Dict[str, Any]]]:
    """Enhanced verification that also checks content hash."""
    # Standard verification with digital signature
    is_valid, signer_id, verified_payload_dict = UnicodeMetadata.verify_metadata(
        text=text,
        public_key_provider=self.resolve_public_key
    )

    if not is_valid or not verified_payload_dict:
        return False, signer_id, None

    # Optionally verify content hash
    if verify_hash and self.include_hash and verified_payload_dict.get("content_hash"):
        # Extract the original text (without metadata)
        # This is a simplified approach - in practice you'd need to strip the metadata
        original_text_for_hash_check = UnicodeMetadata.extract_original_text(text)

        # Calculate hash of original text
        current_hash = hashlib.sha256(original_text_for_hash_check.encode()).hexdigest()

        # Compare with stored hash
        hash_verification = current_hash == verified_payload_dict.get("content_hash")

        # Both verifications must pass
        return hash_verification, signer_id, verified_payload_dict if hash_verification else None

    return is_valid, signer_id, verified_payload_dict

Example usage

handler = EnhancedMetadataHandler() text = "This is a sample text for advanced encoding." metadata = { "model": "gpt-4", "organization": "EncypherAI", "version": "2.3.0" }

Encode with enhanced metadata

encoded_text = handler.embed_metadata(text, metadata) print(f"Encoded text: {encoded_text}")

Verify with enhanced verification

is_valid, signer_id, verified_payload = handler.verify_metadata(encoded_text) print(f"Verification result: {is_valid}") if is_valid and verified_payload: print(f"Signer ID: {signer_id}") print(f"Verified metadata: {verified_payload}")

Batch Processing

For processing large volumes of text, you can implement batch processing:

```python from encypher.core.unicode_metadata import UnicodeMetadata from encypher.core.keys import generate_key_pair from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes from typing import Optional, Dict, List import time import concurrent.futures import json

def process_batch(texts, metadata_template, private_key=None, key_id="batch-key", max_workers=4): """Process a batch of texts with metadata embedding.""" # Generate a key pair if not provided if private_key is None: private_key, public_key = generate_key_pair() else: # In a real application, you would have a way to get the public key # corresponding to the private key _, public_key = generate_key_pair() # This is just a placeholder

# Store the public key (in a real app, this would be in a secure database)
public_keys = {key_id: public_key}

def resolve_public_key(key_id: str) -> Optional[PublicKeyTypes]:
    return public_keys.get(key_id)

results = []

# Define processing function
def process_item(item):
    text = item["text"]
    # Create a copy of the template and add item-specific fields
    metadata = metadata_template.copy()
    metadata["item_id"] = item.get("id", f"item_{len(results)}")
    metadata["timestamp"] = int(time.time())
    metadata["key_id"] = key_id  # Required for verification

    try:
        # Encode metadata
        encoded_text = UnicodeMetadata.embed_metadata(
            text=text,
            custom_metadata=metadata,
            private_key=private_key,
            signer_id=key_id,
            timestamp=int(time.time())
        )
        return {
            "success": True,
            "original_text": text,
            "encoded_text": encoded_text,
            "metadata": metadata
        }
    except Exception as e:
        return {
            "success": False,
            "original_text": text,
            "error": str(e)
        }

# Process items in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = [executor.submit(process_item, item) for item in texts]
    for future in concurrent.futures.as_completed(futures):
        results.append(future.result())

# Add the resolver function to the results for later verification
results_with_resolver = {
    "results": results,
    "public_key_resolver": resolve_public_key
}

return results_with_resolver

Example usage

texts = [ {"id": "item1", "text": "This is the first text item."}, {"id": "item2", "text": "This is the second text item."}, {"id": "item3", "text": "This is the third text item."} ]

metadata_template = { "model": "gpt-4", "organization": "EncypherAI", "version": "2.3.0" }

Process batch

batch_result = process_batch(texts, metadata_template) results = batch_result["results"] resolver = batch_result["public_key_resolver"]

Print results

for result in results: if result["success"]: print(f"Successfully processed item: {result['metadata']['item_id']}")

    # Verify the encoded text
    is_valid, signer_id, verified_payload_dict = UnicodeMetadata.verify_metadata(
        text=result["encoded_text"],
        public_key_provider=resolver
    )

    if is_valid and verified_payload_dict:
        print(f"  Verification successful for signer {signer_id}: {verified_payload_dict.get('item_id')}")
    else:
        print(f"  Verification failed")
else:
    print(f"Failed to process item: {result.get('error', 'Unknown error')}")

Advanced Streaming Techniques

Custom Streaming Handler

You can create a custom streaming handler with specialized behavior:

```python from encypher.streaming.handlers import StreamingHandler from encypher.core.unicode_metadata import MetadataTarget from encypher.core.keys import generate_key_pair from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes from typing import Optional, Dict, Any import time import json

class EnhancedStreamingHandler(StreamingHandler): """Enhanced streaming handler with additional features."""

def __init__(self, metadata=None, private_key=None, target=MetadataTarget.WHITESPACE,
             chunk_threshold=100, log_chunks=False):
    # Generate a key pair if not provided
    if private_key is None:
        private_key, public_key = generate_key_pair()

    # Ensure metadata has a key_id
    if metadata is None:
        metadata = {}

    if "key_id" not in metadata:
        metadata["key_id"] = "enhanced-stream-key"

    super().__init__(metadata=metadata, private_key=private_key, target=target)
    self.chunk_threshold = chunk_threshold
    self.log_chunks = log_chunks
    self.chunks_processed = 0
    self.total_length = 0

    # Store public key for verification (in a real app, use a secure store)
    self.public_key = public_key
    self.public_keys = {metadata["key_id"]: self.public_key}

def resolve_public_key(self, key_id: str) -> Optional[PublicKeyTypes]:
    """Resolve a public key by its ID."""
    return self.public_keys.get(key_id)

def process_chunk(self, chunk):
    """Process a chunk with enhanced logging and analytics."""
    self.chunks_processed += 1
    self.total_length += len(chunk)

    if self.log_chunks:
        print(f"Processing chunk {self.chunks_processed}: {len(chunk)} chars")

    # Add dynamic metadata if needed
    if self.metadata and "chunks_processed" not in self.metadata:
        self.metadata["chunks_processed"] = 0

    if self.metadata:
        self.metadata["chunks_processed"] = self.chunks_processed

    # Use standard processing
    processed_chunk = super().process_chunk(chunk)

    # Apply special handling for large chunks
    if len(chunk) > self.chunk_threshold and self.chunks_processed > 1:
        # For large chunks after the first, we might want special handling
        # This is just an example - you could implement custom logic here
        pass

    return processed_chunk

def finalize(self):
    """Finalize the stream with enhanced metadata."""
    if self.metadata:
        self.metadata["total_chunks"] = self.chunks_processed
        self.metadata["total_length"] = self.total_length
        self.metadata["finalized_at"] = int(time.time())

    return super().finalize()

Example usage

metadata = { "model": "streaming-demo", "organization": "EncypherAI", "timestamp": int(time.time()), "version": "2.3.0", "key_id": "enhanced-stream-example" # Required for verification }

Generate a key pair for this example

private_key, public_key = generate_key_pair()

handler = EnhancedStreamingHandler( metadata=metadata, private_key=private_key, log_chunks=True, chunk_threshold=50 )

Simulate streaming

chunks = [ "The quick ", "brown fox jumps ", "over the lazy dog. ", "This is an example of streaming text with embedded metadata." ]

full_text = "" for chunk in chunks: processed = handler.process_chunk(chunk) if processed: # May be None if buffering full_text += processed print(f"Accumulated text: {full_text}")

Finalize

final_chunk = handler.finalize() if final_chunk: full_text += final_chunk

print(f"Final text: {full_text}")

Extract metadata without verification

from encypher.core.unicode_metadata import UnicodeMetadata extracted = UnicodeMetadata.extract_metadata(full_text) print(f"Extracted metadata (unverified): {json.dumps(extracted, indent=2)}")

Verify the metadata

is_valid, signer_id, verified_payload_dict = UnicodeMetadata.verify_metadata( text=full_text, public_key_provider=handler.resolve_public_key )

if is_valid and verified_payload_dict: print(f"Signer ID: {signer_id}") print(f"Verified metadata: {json.dumps(verified_payload_dict, indent=2)}")

Custom Verification Logic

You can implement custom verification logic for specific use cases:

```python from encypher.core.unicode_metadata import UnicodeMetadata from encypher.core.keys import generate_key_pair from cryptography.hazmat.primitives.asymmetric.types import PublicKeyTypes from typing import Optional, Dict, Any import time

def verify_content_with_custom_logic(text, resolver, expected_organization=None, max_age_hours=24): """ Verify content with custom logic beyond the standard verification.

Args:
    text: Text with embedded metadata to verify
    resolver: Function to resolve public keys by key_id
    expected_organization: If set, verify the organization matches
    max_age_hours: Maximum age of content in hours

Returns:
    dict: Verification results with detailed information
"""
# Standard digital signature verification
is_valid, signer_id, verified_payload_dict = UnicodeMetadata.verify_metadata(
    text=text,
    public_key_provider=resolver
)

# Initialize results
results = {
    "signature_verified": is_valid,
    "signer_id": signer_id,
    "metadata_present": bool(verified_payload_dict),
    "custom_checks": {},
    "payload": verified_payload_dict
}

# If metadata is present and verified, perform custom checks
if is_valid and verified_payload_dict:
    # Check organization if specified
    if expected_organization:
        org_match = verified_payload_dict.get("organization") == expected_organization
        results["custom_checks"]["organization_match"] = org_match

    # Check age if timestamp is present
    if "timestamp" in verified_payload_dict:
        try:
            # Get timestamp as int
            timestamp_data = verified_payload_dict.get("timestamp")
            timestamp = timestamp_data
            if isinstance(timestamp_data, str) and timestamp_data.isdigit():
                timestamp = int(timestamp_data)
            elif not isinstance(timestamp_data, (int, float)):
                # Attempt to parse if it's an ISO string, etc.
                # For this example, we'll assume it's already a Unix timestamp or parsable int string
                raise ValueError("Timestamp format not directly usable as int/float.")
            content_age_seconds = time.time() - timestamp
            age_match = content_age_seconds <= (max_age_hours * 3600)
            results["custom_checks"]["age_match"] = age_match
        except (ValueError, TypeError) as e:
            results["custom_checks"]["age_match"] = False
            results["custom_checks"]["age_error"] = str(e)

# Overall verification result
results["verified"] = (
    is_valid and
    bool(verified_payload_dict) and
    all(val for key, val in results["custom_checks"].items() if not key.endswith('_error')) # Check only boolean custom checks
)

return results

Example usage

Generate a key pair

private_key, public_key = generate_key_pair() key_id = "verification-example-key"

Create a resolver function

public_keys = {key_id: public_key} def resolve_public_key(key_id: str) -> Optional[PublicKeyTypes]: return public_keys.get(key_id)

text = "This is a sample text for verification." metadata = { "model": "gpt-4", "organization": "EncypherAI", "timestamp": int(time.time()), "version": "2.3.0", "key_id": key_id # Required for verification }

Embed metadata with digital signature

encoded_text = UnicodeMetadata.embed_metadata( text=text, custom_metadata=metadata, private_key=private_key, signer_id=key_id, timestamp=metadata.get("timestamp") )

Verify with custom logic

verification_results = verify_content_with_custom_logic( encoded_text, resolver=resolve_public_key, expected_organization="EncypherAI", max_age_hours=48 )

print(f"Verification results: {json.dumps(verification_results, indent=2)}")

These advanced examples demonstrate how to extend and customize EncypherAI's functionality for various use cases using digital signatures for enhanced security and verification.