tradusai/core/rate_limiter.py

"""
Rate limiter and buffer manager for memory leak protection
"""
import asyncio
import logging
import time
from typing import Dict, Any, List
from collections import deque

from config import settings


logger = logging.getLogger(__name__)


class RateLimiter:
    """
    Token bucket rate limiter for message processing.

    Prevents overwhelming downstream systems and protects against memory leaks.
    """

    def __init__(self, max_rate: int = settings.RATE_LIMIT_MESSAGES_PER_SEC):
        """
        Initialize rate limiter

        Args:
            max_rate: Maximum messages per second
        """
        self.max_rate = max_rate
        self.tokens = max_rate
        self.last_update = time.time()
        self.lock = asyncio.Lock()

    async def acquire(self) -> bool:
        """
        Acquire token for processing a message

        Returns:
            True if token acquired, False if rate limit exceeded
        """
        async with self.lock:
            now = time.time()
            elapsed = now - self.last_update

            # Refill tokens based on elapsed time
            self.tokens = min(
                self.max_rate,
                self.tokens + elapsed * self.max_rate
            )
            self.last_update = now

            if self.tokens >= 1:
                self.tokens -= 1
                return True

            return False

    async def wait(self) -> None:
        """Wait until a token is available"""
        while not await self.acquire():
            await asyncio.sleep(0.01)  # 10ms sleep


class BufferedMessageProcessor:
    """
    Buffered message processor with memory protection.

    Features:
    - Bounded buffer to prevent memory exhaustion
    - Batch processing for efficiency
    - Overflow detection and alerts
    - Backpressure handling
    """

    def __init__(
        self,
        max_buffer_size: int = settings.MAX_BUFFER_SIZE,
        batch_size: int = 100,
        batch_timeout: float = 1.0,
    ):
        """
        Initialize buffered processor

        Args:
            max_buffer_size: Maximum messages in buffer
            batch_size: Number of messages to batch before processing
            batch_timeout: Max time to wait before processing partial batch (seconds)
        """
        self.max_buffer_size = max_buffer_size
        self.batch_size = batch_size
        self.batch_timeout = batch_timeout

        # Bounded deque for FIFO buffer
        self.buffer: deque = deque(maxlen=max_buffer_size)
        self.lock = asyncio.Lock()

        # Statistics
        self.stats = {
            "messages_buffered": 0,
            "messages_processed": 0,
            "messages_dropped": 0,
            "buffer_overflows": 0,
            "current_buffer_size": 0,
            "max_buffer_size_reached": 0,
        }

    async def add_message(self, message: Dict[str, Any]) -> bool:
        """
        Add message to buffer

        Args:
            message: Message to buffer

        Returns:
            True if added successfully, False if buffer is full (message dropped)
        """
        async with self.lock:
            current_size = len(self.buffer)

            # Check if buffer is full
            if current_size >= self.max_buffer_size:
                self.stats["messages_dropped"] += 1
                self.stats["buffer_overflows"] += 1

                if self.stats["buffer_overflows"] % 100 == 1:
                    logger.warning(
                        f"Buffer overflow! Dropped message. "
                        f"Buffer size: {current_size}/{self.max_buffer_size}"
                    )
                return False

            # Add to buffer
            self.buffer.append(message)
            self.stats["messages_buffered"] += 1
            self.stats["current_buffer_size"] = len(self.buffer)

            # Track max buffer size
            if current_size > self.stats["max_buffer_size_reached"]:
                self.stats["max_buffer_size_reached"] = current_size

            return True

    async def get_batch(self, timeout: float = None) -> List[Dict[str, Any]]:
        """
        Get batch of messages from buffer

        Args:
            timeout: Max time to wait for batch (seconds)

        Returns:
            List of messages (may be less than batch_size)
        """
        timeout = timeout or self.batch_timeout
        start_time = time.time()
        batch = []

        while len(batch) < self.batch_size:
            async with self.lock:
                if self.buffer:
                    batch.append(self.buffer.popleft())
                    self.stats["current_buffer_size"] = len(self.buffer)

            # Check timeout
            if time.time() - start_time >= timeout:
                break

            # If buffer is empty and we have some messages, return them
            if not self.buffer and batch:
                break

            # Small sleep to avoid busy waiting
            if not batch:
                await asyncio.sleep(0.01)

        if batch:
            self.stats["messages_processed"] += len(batch)

        return batch

    def get_buffer_usage(self) -> float:
        """Get buffer usage percentage (0.0 to 1.0)"""
        return len(self.buffer) / self.max_buffer_size if self.max_buffer_size > 0 else 0.0

    def is_buffer_critical(self, threshold: float = 0.8) -> bool:
        """Check if buffer usage is above critical threshold"""
        return self.get_buffer_usage() > threshold

    def get_stats(self) -> Dict[str, Any]:
        """Get processor statistics"""
        buffer_usage = self.get_buffer_usage()
        drop_rate = (
            self.stats["messages_dropped"] / self.stats["messages_buffered"]
            if self.stats["messages_buffered"] > 0
            else 0.0
        )

        return {
            **self.stats,
            "buffer_usage": f"{buffer_usage:.1%}",
            "drop_rate": f"{drop_rate:.2%}",
        }

    async def clear(self) -> None:
        """Clear all buffered messages"""
        async with self.lock:
            self.buffer.clear()
            self.stats["current_buffer_size"] = 0
        logger.info("Message buffer cleared")