tradusai/core/rate_limiter.py
2025-12-02 22:54:03 +08:00

210 lines
6.1 KiB
Python

"""
Rate limiter and buffer manager for memory leak protection
"""
import asyncio
import logging
import time
from typing import Dict, Any, List
from collections import deque
from config import settings
logger = logging.getLogger(__name__)
class RateLimiter:
"""
Token bucket rate limiter for message processing.
Prevents overwhelming downstream systems and protects against memory leaks.
"""
def __init__(self, max_rate: int = settings.RATE_LIMIT_MESSAGES_PER_SEC):
"""
Initialize rate limiter
Args:
max_rate: Maximum messages per second
"""
self.max_rate = max_rate
self.tokens = max_rate
self.last_update = time.time()
self.lock = asyncio.Lock()
async def acquire(self) -> bool:
"""
Acquire token for processing a message
Returns:
True if token acquired, False if rate limit exceeded
"""
async with self.lock:
now = time.time()
elapsed = now - self.last_update
# Refill tokens based on elapsed time
self.tokens = min(
self.max_rate,
self.tokens + elapsed * self.max_rate
)
self.last_update = now
if self.tokens >= 1:
self.tokens -= 1
return True
return False
async def wait(self) -> None:
"""Wait until a token is available"""
while not await self.acquire():
await asyncio.sleep(0.01) # 10ms sleep
class BufferedMessageProcessor:
"""
Buffered message processor with memory protection.
Features:
- Bounded buffer to prevent memory exhaustion
- Batch processing for efficiency
- Overflow detection and alerts
- Backpressure handling
"""
def __init__(
self,
max_buffer_size: int = settings.MAX_BUFFER_SIZE,
batch_size: int = 100,
batch_timeout: float = 1.0,
):
"""
Initialize buffered processor
Args:
max_buffer_size: Maximum messages in buffer
batch_size: Number of messages to batch before processing
batch_timeout: Max time to wait before processing partial batch (seconds)
"""
self.max_buffer_size = max_buffer_size
self.batch_size = batch_size
self.batch_timeout = batch_timeout
# Bounded deque for FIFO buffer
self.buffer: deque = deque(maxlen=max_buffer_size)
self.lock = asyncio.Lock()
# Statistics
self.stats = {
"messages_buffered": 0,
"messages_processed": 0,
"messages_dropped": 0,
"buffer_overflows": 0,
"current_buffer_size": 0,
"max_buffer_size_reached": 0,
}
async def add_message(self, message: Dict[str, Any]) -> bool:
"""
Add message to buffer
Args:
message: Message to buffer
Returns:
True if added successfully, False if buffer is full (message dropped)
"""
async with self.lock:
current_size = len(self.buffer)
# Check if buffer is full
if current_size >= self.max_buffer_size:
self.stats["messages_dropped"] += 1
self.stats["buffer_overflows"] += 1
if self.stats["buffer_overflows"] % 100 == 1:
logger.warning(
f"Buffer overflow! Dropped message. "
f"Buffer size: {current_size}/{self.max_buffer_size}"
)
return False
# Add to buffer
self.buffer.append(message)
self.stats["messages_buffered"] += 1
self.stats["current_buffer_size"] = len(self.buffer)
# Track max buffer size
if current_size > self.stats["max_buffer_size_reached"]:
self.stats["max_buffer_size_reached"] = current_size
return True
async def get_batch(self, timeout: float = None) -> List[Dict[str, Any]]:
"""
Get batch of messages from buffer
Args:
timeout: Max time to wait for batch (seconds)
Returns:
List of messages (may be less than batch_size)
"""
timeout = timeout or self.batch_timeout
start_time = time.time()
batch = []
while len(batch) < self.batch_size:
async with self.lock:
if self.buffer:
batch.append(self.buffer.popleft())
self.stats["current_buffer_size"] = len(self.buffer)
# Check timeout
if time.time() - start_time >= timeout:
break
# If buffer is empty and we have some messages, return them
if not self.buffer and batch:
break
# Small sleep to avoid busy waiting
if not batch:
await asyncio.sleep(0.01)
if batch:
self.stats["messages_processed"] += len(batch)
return batch
def get_buffer_usage(self) -> float:
"""Get buffer usage percentage (0.0 to 1.0)"""
return len(self.buffer) / self.max_buffer_size if self.max_buffer_size > 0 else 0.0
def is_buffer_critical(self, threshold: float = 0.8) -> bool:
"""Check if buffer usage is above critical threshold"""
return self.get_buffer_usage() > threshold
def get_stats(self) -> Dict[str, Any]:
"""Get processor statistics"""
buffer_usage = self.get_buffer_usage()
drop_rate = (
self.stats["messages_dropped"] / self.stats["messages_buffered"]
if self.stats["messages_buffered"] > 0
else 0.0
)
return {
**self.stats,
"buffer_usage": f"{buffer_usage:.1%}",
"drop_rate": f"{drop_rate:.2%}",
}
async def clear(self) -> None:
"""Clear all buffered messages"""
async with self.lock:
self.buffer.clear()
self.stats["current_buffer_size"] = 0
logger.info("Message buffer cleared")