Files
myaps_api/apps/data_opt/utils/binlog_ha/enhanced_lock.py
T
chaoge 78269d8d74 新增: binlog监听器高可用增强模块
实现功能:
- Prometheus指标暴露(/metrics端点)
- 背压控制(主动限流机制)
- 事件去重(Redis+文件降级)
- 配置热更新(审计日志)
- 主备故障转移(心跳检测)
- 分布式锁安全降级
- 连接池监控(泄漏检测)
- 重试策略优化(指数退避)

新增接口:
- GET /metrics, /health, /binlog/status
- GET /binlog/backpressure/status
- POST /binlog/config/update, GET /binlog/config/audit
- GET /binlog/dedup/stats
- GET /binlog/failover/status

测试覆盖:
- 单元测试71个,全部通过
- 压测验证:吞吐量499事件/秒,达标率99.91%
2026-05-22 07:08:49 +08:00

244 lines
8.2 KiB
Python

"""
Binlog 监听器 - 分布式锁增强
提供安全降级策略的主备选举机制
"""
import os
import time
import threading
import uuid
from typing import Optional
from dataclasses import dataclass
from enum import Enum
from .models import EnvMode, FallbackMode
from globalobjects import logger
class LockResult:
"""锁获取结果"""
def __init__(
self,
success: bool,
mode: FallbackMode = FallbackMode.REDIS,
reason: Optional[str] = None
):
self.success = success
self.mode = mode
self.reason = reason
def __bool__(self) -> bool:
return self.success
class EnhancedDistributedLock:
"""分布式锁(增强版 - 支持安全降级)"""
def __init__(
self,
lock_name: str = "binlog_listener_lock",
ttl: int = 30,
environment_mode: EnvMode = EnvMode.SINGLE_NODE
):
"""
初始化增强分布式锁
Args:
lock_name: 锁名称
ttl: 锁TTL(秒)
environment_mode: 运行环境模式
"""
self.lock_name = lock_name
self.ttl = ttl
self.environment_mode = environment_mode
self._lock_holder = False
self._lock_value: Optional[str] = None
self._refresh_thread: Optional[threading.Thread] = None
self._stop_event = threading.Event()
self._redis_health = True
self._last_redis_check = 0.0
self._redis_check_interval = 10.0
def _get_redis_client(self):
"""获取Redis客户端"""
try:
from apps.common.utils.redis_pool_manager import get_redis_pool_manager
pool_manager = get_redis_pool_manager()
return pool_manager.get_client()
except Exception as e:
logger.warning(f"⚠️ 获取Redis客户端失败: {e}")
return None
def _check_redis_health(self) -> bool:
"""检查Redis健康状态"""
current_time = time.time()
if current_time - self._last_redis_check < self._redis_check_interval:
return self._redis_health
self._last_redis_check = current_time
try:
client = self._get_redis_client()
if not client:
self._redis_health = False
return False
client.ping()
self._redis_health = True
return True
except Exception as e:
self._redis_health = False
logger.warning(f"⚠️ Redis健康检查失败: {e}")
return False
def _detect_environment(self) -> EnvMode:
"""检测运行环境"""
return self.environment_mode
def acquire(self) -> LockResult:
"""
获取分布式锁(增强降级逻辑)
降级策略:
- 单机模式 + Redis可用 → 使用Redis锁
- 单机模式 + Redis不可用 → 允许单实例启动
- 多worker模式 + Redis可用 → 使用Redis锁
- 多worker模式 + Redis不可用 → 拒绝启动(安全策略)
Returns:
锁获取结果
"""
redis_healthy = self._check_redis_health()
env_mode = self._detect_environment()
if not redis_healthy:
if env_mode == EnvMode.MULTI_WORKER:
logger.error(
f"❌ 多worker模式下Redis不可用,拒绝启动 "
f"(lock={self.lock_name})"
)
return LockResult(
success=False,
mode=FallbackMode.REJECT,
reason="multi_worker_requires_redis"
)
else:
logger.warning(
f"⚠️ Redis不可用,降级为单实例模式 "
f"(lock={self.lock_name}, env={env_mode.value})"
)
self._lock_holder = True
return LockResult(
success=True,
mode=FallbackMode.SINGLE_INSTANCE,
reason="redis_unavailable_fallback"
)
try:
client = self._get_redis_client()
if not client:
return LockResult(
success=False,
mode=FallbackMode.REJECT,
reason="redis_client_unavailable"
)
self._lock_value = f"{os.getpid()}_{int(time.time())}_{uuid.uuid4().hex[:8]}"
if client.set(self.lock_name, self._lock_value, nx=True, ex=self.ttl):
logger.info(f"✅ 成功获取分布式锁: {self.lock_name}")
self._lock_holder = True
self._start_refresh_thread()
return LockResult(
success=True,
mode=FallbackMode.REDIS
)
else:
logger.info(f"⏳ 分布式锁已被其他节点持有: {self.lock_name}")
self._lock_holder = False
return LockResult(
success=False,
mode=FallbackMode.REDIS,
reason="lock_already_held"
)
except Exception as e:
logger.error(f"❌ 获取分布式锁异常: {e}")
if env_mode == EnvMode.MULTI_WORKER:
return LockResult(
success=False,
mode=FallbackMode.REJECT,
reason=f"redis_error_in_multi_worker: {e}"
)
else:
self._lock_holder = True
return LockResult(
success=True,
mode=FallbackMode.SINGLE_INSTANCE,
reason=f"redis_error_fallback: {e}"
)
def _start_refresh_thread(self):
"""启动锁刷新线程"""
if self._refresh_thread is not None:
return
def refresh_loop():
while not self._stop_event.is_set():
try:
time.sleep(self.ttl // 2)
if self._lock_holder and self._lock_value:
client = self._get_redis_client()
if client:
current_value = client.get(self.lock_name)
if current_value and current_value.decode() == self._lock_value:
client.expire(self.lock_name, self.ttl)
logger.debug(f"🔄 已刷新分布式锁: {self.lock_name}")
else:
logger.warning(f"⚠️ 锁已被其他节点抢占: {self.lock_name}")
self._lock_holder = False
break
except Exception as e:
logger.debug(f"刷新分布式锁失败: {e}")
self._refresh_thread = threading.Thread(target=refresh_loop, daemon=True)
self._refresh_thread.start()
logger.info("✅ 分布式锁刷新线程已启动")
def release(self):
"""释放分布式锁"""
try:
self._stop_event.set()
if self._refresh_thread and self._refresh_thread.is_alive():
self._refresh_thread.join(timeout=1)
if self._lock_holder and self._lock_value:
client = self._get_redis_client()
if client:
current_value = client.get(self.lock_name)
if current_value and current_value.decode() == self._lock_value:
client.delete(self.lock_name)
logger.info(f"✅ 已释放分布式锁: {self.lock_name}")
except Exception as e:
logger.error(f"❌ 释放分布式锁失败: {e}")
finally:
self._lock_holder = False
self._lock_value = None
@property
def is_holder(self) -> bool:
"""当前节点是否是锁持有者"""
return self._lock_holder
enhanced_distributed_lock = EnhancedDistributedLock()