210 lines
6.1 KiB
Python
210 lines
6.1 KiB
Python
"""
|
|
Rate limiter and buffer manager for memory leak protection
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
import time
|
|
from typing import Dict, Any, List
|
|
from collections import deque
|
|
|
|
from config import settings
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class RateLimiter:
|
|
"""
|
|
Token bucket rate limiter for message processing.
|
|
|
|
Prevents overwhelming downstream systems and protects against memory leaks.
|
|
"""
|
|
|
|
def __init__(self, max_rate: int = settings.RATE_LIMIT_MESSAGES_PER_SEC):
|
|
"""
|
|
Initialize rate limiter
|
|
|
|
Args:
|
|
max_rate: Maximum messages per second
|
|
"""
|
|
self.max_rate = max_rate
|
|
self.tokens = max_rate
|
|
self.last_update = time.time()
|
|
self.lock = asyncio.Lock()
|
|
|
|
async def acquire(self) -> bool:
|
|
"""
|
|
Acquire token for processing a message
|
|
|
|
Returns:
|
|
True if token acquired, False if rate limit exceeded
|
|
"""
|
|
async with self.lock:
|
|
now = time.time()
|
|
elapsed = now - self.last_update
|
|
|
|
# Refill tokens based on elapsed time
|
|
self.tokens = min(
|
|
self.max_rate,
|
|
self.tokens + elapsed * self.max_rate
|
|
)
|
|
self.last_update = now
|
|
|
|
if self.tokens >= 1:
|
|
self.tokens -= 1
|
|
return True
|
|
|
|
return False
|
|
|
|
async def wait(self) -> None:
|
|
"""Wait until a token is available"""
|
|
while not await self.acquire():
|
|
await asyncio.sleep(0.01) # 10ms sleep
|
|
|
|
|
|
class BufferedMessageProcessor:
|
|
"""
|
|
Buffered message processor with memory protection.
|
|
|
|
Features:
|
|
- Bounded buffer to prevent memory exhaustion
|
|
- Batch processing for efficiency
|
|
- Overflow detection and alerts
|
|
- Backpressure handling
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
max_buffer_size: int = settings.MAX_BUFFER_SIZE,
|
|
batch_size: int = 100,
|
|
batch_timeout: float = 1.0,
|
|
):
|
|
"""
|
|
Initialize buffered processor
|
|
|
|
Args:
|
|
max_buffer_size: Maximum messages in buffer
|
|
batch_size: Number of messages to batch before processing
|
|
batch_timeout: Max time to wait before processing partial batch (seconds)
|
|
"""
|
|
self.max_buffer_size = max_buffer_size
|
|
self.batch_size = batch_size
|
|
self.batch_timeout = batch_timeout
|
|
|
|
# Bounded deque for FIFO buffer
|
|
self.buffer: deque = deque(maxlen=max_buffer_size)
|
|
self.lock = asyncio.Lock()
|
|
|
|
# Statistics
|
|
self.stats = {
|
|
"messages_buffered": 0,
|
|
"messages_processed": 0,
|
|
"messages_dropped": 0,
|
|
"buffer_overflows": 0,
|
|
"current_buffer_size": 0,
|
|
"max_buffer_size_reached": 0,
|
|
}
|
|
|
|
async def add_message(self, message: Dict[str, Any]) -> bool:
|
|
"""
|
|
Add message to buffer
|
|
|
|
Args:
|
|
message: Message to buffer
|
|
|
|
Returns:
|
|
True if added successfully, False if buffer is full (message dropped)
|
|
"""
|
|
async with self.lock:
|
|
current_size = len(self.buffer)
|
|
|
|
# Check if buffer is full
|
|
if current_size >= self.max_buffer_size:
|
|
self.stats["messages_dropped"] += 1
|
|
self.stats["buffer_overflows"] += 1
|
|
|
|
if self.stats["buffer_overflows"] % 100 == 1:
|
|
logger.warning(
|
|
f"Buffer overflow! Dropped message. "
|
|
f"Buffer size: {current_size}/{self.max_buffer_size}"
|
|
)
|
|
return False
|
|
|
|
# Add to buffer
|
|
self.buffer.append(message)
|
|
self.stats["messages_buffered"] += 1
|
|
self.stats["current_buffer_size"] = len(self.buffer)
|
|
|
|
# Track max buffer size
|
|
if current_size > self.stats["max_buffer_size_reached"]:
|
|
self.stats["max_buffer_size_reached"] = current_size
|
|
|
|
return True
|
|
|
|
async def get_batch(self, timeout: float = None) -> List[Dict[str, Any]]:
|
|
"""
|
|
Get batch of messages from buffer
|
|
|
|
Args:
|
|
timeout: Max time to wait for batch (seconds)
|
|
|
|
Returns:
|
|
List of messages (may be less than batch_size)
|
|
"""
|
|
timeout = timeout or self.batch_timeout
|
|
start_time = time.time()
|
|
batch = []
|
|
|
|
while len(batch) < self.batch_size:
|
|
async with self.lock:
|
|
if self.buffer:
|
|
batch.append(self.buffer.popleft())
|
|
self.stats["current_buffer_size"] = len(self.buffer)
|
|
|
|
# Check timeout
|
|
if time.time() - start_time >= timeout:
|
|
break
|
|
|
|
# If buffer is empty and we have some messages, return them
|
|
if not self.buffer and batch:
|
|
break
|
|
|
|
# Small sleep to avoid busy waiting
|
|
if not batch:
|
|
await asyncio.sleep(0.01)
|
|
|
|
if batch:
|
|
self.stats["messages_processed"] += len(batch)
|
|
|
|
return batch
|
|
|
|
def get_buffer_usage(self) -> float:
|
|
"""Get buffer usage percentage (0.0 to 1.0)"""
|
|
return len(self.buffer) / self.max_buffer_size if self.max_buffer_size > 0 else 0.0
|
|
|
|
def is_buffer_critical(self, threshold: float = 0.8) -> bool:
|
|
"""Check if buffer usage is above critical threshold"""
|
|
return self.get_buffer_usage() > threshold
|
|
|
|
def get_stats(self) -> Dict[str, Any]:
|
|
"""Get processor statistics"""
|
|
buffer_usage = self.get_buffer_usage()
|
|
drop_rate = (
|
|
self.stats["messages_dropped"] / self.stats["messages_buffered"]
|
|
if self.stats["messages_buffered"] > 0
|
|
else 0.0
|
|
)
|
|
|
|
return {
|
|
**self.stats,
|
|
"buffer_usage": f"{buffer_usage:.1%}",
|
|
"drop_rate": f"{drop_rate:.2%}",
|
|
}
|
|
|
|
async def clear(self) -> None:
|
|
"""Clear all buffered messages"""
|
|
async with self.lock:
|
|
self.buffer.clear()
|
|
self.stats["current_buffer_size"] = 0
|
|
logger.info("Message buffer cleared")
|