Stabilize discovery lifecycle and rescan summary

This commit is contained in:
Artem Kokos
2026-05-16 10:59:31 +07:00
parent 15529961d6
commit 1ac66ec4ac
8 changed files with 604 additions and 124 deletions

View File

@@ -1,64 +1,224 @@
import asyncio
import ipaddress
import json
import socket
import logging
import os
import ipaddress
from typing import List, Dict
import socket
import struct
from dataclasses import dataclass
from typing import Dict, List
try:
import fcntl
except ImportError: # pragma: no cover - не на Linux
fcntl = None
logger = logging.getLogger(__name__)
# Минимальный допустимый prefixlen (больше число = меньше сеть)
# /16 = 65534 хоста, /8 = 16M хостов -- слишком много
MIN_PREFIX_LEN = 16
ENV_MIN_PREFIX_LEN = 16
AUTO_MIN_PREFIX_LEN = 24
DEFAULT_DISCOVERY_INTERVAL_SECONDS = 600
DEFAULT_BACKGROUND_MISSING_THRESHOLD = 2
EXCLUDED_INTERFACE_PREFIXES = (
"lo",
"docker",
"br-",
"veth",
"virbr",
"tun",
"tap",
"wg",
"tailscale",
"zt",
"utun",
"ppp",
)
SIOCGIFADDR = 0x8915
SIOCGIFNETMASK = 0x891B
@dataclass(frozen=True)
class InterfaceSubnet:
name: str
address: ipaddress.IPv4Address
network: ipaddress.IPv4Network
class DiscoveryService:
def __init__(self, port: int = 38899):
self.port = port
self.discover_msg = {"method": "getPilot", "params": {}}
self._scan_lock = asyncio.Lock()
def _get_target_subnets(self) -> List[str]:
"""
Определяет список подсетей для сканирования.
Приоритет:
1. Переменная окружения SCAN_NETWORK (можно через запятую: "192.168.0.0/24,192.168.1.0/24")
2. Автоопределение по дефолтному шлюзу
"""
env_network = os.getenv("SCAN_NETWORK")
if env_network:
subnets = []
for s in env_network.split(","):
s = s.strip()
def _env_min_prefix_len(self) -> int:
return int(os.getenv("DISCOVERY_ENV_MIN_PREFIX_LEN", ENV_MIN_PREFIX_LEN))
def _auto_min_prefix_len(self) -> int:
return int(os.getenv("DISCOVERY_AUTO_MIN_PREFIX_LEN", AUTO_MIN_PREFIX_LEN))
def _background_interval_seconds(self) -> int:
return int(
os.getenv(
"DISCOVERY_INTERVAL_SECONDS", DEFAULT_DISCOVERY_INTERVAL_SECONDS
)
)
def _background_missing_threshold(self) -> int:
return int(
os.getenv(
"DISCOVERY_BACKGROUND_MISSING_THRESHOLD",
DEFAULT_BACKGROUND_MISSING_THRESHOLD,
)
)
def _parse_env_subnets(self, value: str) -> List[str]:
subnets: list[str] = []
min_prefix_len = self._env_min_prefix_len()
for raw_subnet in value.split(","):
subnet = raw_subnet.strip()
if not subnet:
continue
try:
network = ipaddress.IPv4Network(subnet, strict=False)
except ValueError as exc:
logger.error("Неверный формат подсети %s: %s", subnet, exc)
continue
if network.prefixlen < min_prefix_len:
logger.warning(
"Подсеть %s слишком большая (/%s), ограничиваю до /%s",
subnet,
network.prefixlen,
min_prefix_len,
)
network = ipaddress.IPv4Network(
f"{network.network_address}/{min_prefix_len}", strict=False
)
subnets.append(str(network))
return subnets
def _interface_subnets(self) -> list[InterfaceSubnet]:
if fcntl is None:
return []
candidates: list[InterfaceSubnet] = []
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:
for _, interface_name in socket.if_nameindex():
ifreq = struct.pack("256s", interface_name.encode("utf-8")[:15])
try:
net = ipaddress.IPv4Network(s, strict=False)
if net.prefixlen < MIN_PREFIX_LEN:
logger.warning(
f"Подсеть {s} слишком большая (/{net.prefixlen}), "
f"ограничиваю до /{MIN_PREFIX_LEN}"
)
net = ipaddress.IPv4Network(
f"{net.network_address}/{MIN_PREFIX_LEN}", strict=False
)
subnets.append(str(net))
except ValueError as e:
logger.error(f"Неверный формат подсети {s}: {e}")
return subnets if subnets else ["192.168.1.0/24"]
address = socket.inet_ntoa(
fcntl.ioctl(sock.fileno(), SIOCGIFADDR, ifreq)[20:24]
)
netmask = socket.inet_ntoa(
fcntl.ioctl(sock.fileno(), SIOCGIFNETMASK, ifreq)[20:24]
)
except OSError:
continue
# Автоопределение
ipv4 = ipaddress.IPv4Address(address)
if ipv4.is_loopback or ipv4.is_link_local:
continue
network = ipaddress.IPv4Network(f"{address}/{netmask}", strict=False)
candidates.append(
InterfaceSubnet(
name=interface_name,
address=ipv4,
network=network,
)
)
return candidates
def _is_excluded_interface(self, interface_name: str) -> bool:
lowered = interface_name.lower()
return lowered.startswith(EXCLUDED_INTERFACE_PREFIXES)
def _normalize_auto_network(
self, candidate: InterfaceSubnet
) -> ipaddress.IPv4Network:
min_prefix_len = self._auto_min_prefix_len()
target_prefix_len = max(candidate.network.prefixlen, min_prefix_len)
if target_prefix_len != candidate.network.prefixlen:
logger.info(
"Авто-discovery: подсеть %s (%s) шире /%s, сканирую локальный сегмент /%s",
candidate.network,
candidate.name,
min_prefix_len,
target_prefix_len,
)
return ipaddress.IPv4Network(
f"{candidate.address}/{target_prefix_len}", strict=False
)
def _collect_auto_subnets(self) -> list[str]:
candidates = self._interface_subnets()
if not candidates:
return []
private_candidates = [candidate for candidate in candidates if candidate.address.is_private]
usable_candidates = private_candidates or candidates
preferred_candidates = [
candidate
for candidate in usable_candidates
if not self._is_excluded_interface(candidate.name)
]
selected_candidates = preferred_candidates or usable_candidates
subnets: list[str] = []
seen: set[str] = set()
for candidate in selected_candidates:
normalized = str(self._normalize_auto_network(candidate))
if normalized in seen:
continue
seen.add(normalized)
subnets.append(normalized)
if subnets:
logger.info(
"Авто-discovery: выбраны подсети %s",
", ".join(subnets),
)
return subnets
def _fallback_subnet(self) -> list[str]:
try:
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
# Коннект не создает трафика, но заставляет ОС выбрать нужный интерфейс
s.connect(("8.8.8.8", 80))
local_ip = s.getsockname()[0]
network = ipaddress.IPv4Network(f"{local_ip}/24", strict=False)
return [str(network)]
except Exception as e:
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock:
sock.connect(("8.8.8.8", 80))
local_ip = sock.getsockname()[0]
except Exception as exc:
logger.error(
f"Discovery Error: Не удалось определить подсеть автоматически: {e}"
"Discovery Error: Не удалось определить подсеть автоматически: %s",
exc,
)
return ["192.168.1.0/24"]
network = ipaddress.IPv4Network(
f"{local_ip}/{self._auto_min_prefix_len()}",
strict=False,
)
logger.info(
"Авто-discovery fallback: использую локальный сегмент %s", network
)
return [str(network)]
def _get_target_subnets(self) -> List[str]:
env_network = os.getenv("SCAN_NETWORK", "").strip()
if env_network:
subnets = self._parse_env_subnets(env_network)
return subnets if subnets else ["192.168.1.0/24"]
auto_subnets = self._collect_auto_subnets()
if auto_subnets:
return auto_subnets
return self._fallback_subnet()
async def scan_network(self, timeout: float = 2.0) -> List[Dict]:
subnets = self._get_target_subnets()
found_devices = []
@@ -69,65 +229,119 @@ class DiscoveryService:
loop = asyncio.get_running_loop()
message = json.dumps(self.discover_msg).encode()
logger.debug(f"Начинаю сканирование сетей: {', '.join(subnets)}...")
logger.debug("Начинаю сканирование сетей: %s...", ", ".join(subnets))
# Рассылаем запросы по всем целевым сетям
for subnet in subnets:
try:
network = ipaddress.IPv4Network(subnet)
for ip in network.hosts():
try:
sock.sendto(message, (str(ip), self.port))
except Exception:
try:
for subnet in subnets:
try:
network = ipaddress.IPv4Network(subnet)
for ip in network.hosts():
try:
sock.sendto(message, (str(ip), self.port))
except Exception:
continue
except ValueError as exc:
logger.error("Неверный формат подсети %s: %s", subnet, exc)
start_time = loop.time()
while (loop.time() - start_time) < timeout:
try:
data, addr = await asyncio.wait_for(
loop.run_in_executor(None, sock.recvfrom, 1024), timeout=0.2
)
resp = json.loads(data.decode())
if "result" not in resp:
continue
except ValueError as e:
logger.error(f"Неверный формат подсети {subnet}: {e}")
# Собираем ответы
start_time = loop.time()
while (loop.time() - start_time) < timeout:
try:
# Используем небольшой таймаут на чтение, чтобы успевать выходить из цикла
data, addr = await asyncio.wait_for(
loop.run_in_executor(None, sock.recvfrom, 1024), timeout=0.2
)
result = resp["result"]
mac = result.get("mac")
if not mac:
continue
resp = json.loads(data.decode())
if "result" in resp:
res = resp["result"]
mac = res.get("mac")
if mac:
found_devices.append(
{
"mac": mac,
"ip": addr[0],
"state": {
"on": res.get("state"),
"dimming": res.get("dimming"),
"temp": res.get("temp"),
},
}
)
logger.info(f" [+] Найдена лампа: {addr[0]} | MAC: {mac}")
found_devices.append(
{
"mac": mac,
"ip": addr[0],
"state": {
"on": result.get("state"),
"dimming": result.get("dimming"),
"temp": result.get("temp"),
},
}
)
logger.info(" [+] Найдена лампа: %s | MAC: %s", addr[0], mac)
except (asyncio.TimeoutError, json.JSONDecodeError):
continue
except Exception:
await asyncio.sleep(0.01)
continue
except (asyncio.TimeoutError, json.JSONDecodeError):
continue
except Exception:
await asyncio.sleep(0.01)
continue
finally:
sock.close()
sock.close()
# Фильтруем дубликаты
return list({d["mac"]: d for d in found_devices}.values())
return list({device["mac"]: device for device in found_devices}.values())
async def start_background_discovery(self, state_manager, interval=600):
"""Запускает бесконечный цикл сканирования."""
async def _refresh_devices(
self,
state_manager,
*,
mode: str,
remove_missing: bool,
missing_threshold: int,
timeout: float = 2.0,
):
async with self._scan_lock:
found_devices = await self.scan_network(timeout=timeout)
result = state_manager.apply_discovery_snapshot(
found_devices,
remove_missing=remove_missing,
missing_threshold=missing_threshold,
)
logger.info(
"Discovery (%s): found=%s added=%s updated=%s removed=%s pending_removal=%s online=%s",
mode,
result.found,
result.added,
result.updated,
result.removed_offline,
result.pending_removal,
result.online,
)
return result
async def startup_refresh(self, state_manager, timeout: float = 2.0):
return await self._refresh_devices(
state_manager,
mode="startup",
remove_missing=True,
missing_threshold=1,
timeout=timeout,
)
async def manual_refresh(self, state_manager, timeout: float = 2.0):
return await self._refresh_devices(
state_manager,
mode="manual",
remove_missing=True,
missing_threshold=1,
timeout=timeout,
)
async def background_refresh(self, state_manager, timeout: float = 2.0):
return await self._refresh_devices(
state_manager,
mode="background",
remove_missing=True,
missing_threshold=self._background_missing_threshold(),
timeout=timeout,
)
async def start_background_discovery(self, state_manager, interval: int | None = None):
interval_seconds = interval or self._background_interval_seconds()
while True:
await asyncio.sleep(interval_seconds)
try:
found_devices = await self.scan_network()
for dev_data in found_devices:
state_manager.update_device(dev_data)
logger.info(f"Discovery: онлайн {len(state_manager.devices)} устройств")
except Exception as e:
logger.error(f"Discovery background error: {e}")
await asyncio.sleep(interval)
await self.background_refresh(state_manager)
except Exception as exc:
logger.error("Discovery background error: %s", exc)

View File

@@ -1,28 +1,96 @@
from dataclasses import asdict, dataclass
import logging
from typing import Dict, List, Optional
from typing import Dict, List
from app.models.device import DeviceSchema, GroupModel
from app.core.discovery import DiscoveryService
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class DiscoveryApplyResult:
found: int
added: int
updated: int
removed_offline: int
pending_removal: int
online: int
def to_dict(self) -> dict:
return asdict(self)
class StateManager:
def __init__(self):
# Храним устройства как Pydantic объекты
self.devices: Dict[str, DeviceSchema] = {}
# Группы как модели SQLAlchemy
self.groups: Dict[str, GroupModel] = {}
# Сколько подряд циклов discovery устройство не видно
self._missing_scan_counts: Dict[str, int] = {}
def update_device(self, device_data: dict):
"""Обновляет или добавляет устройство в состояние."""
mac = device_data["mac"]
# Используем DeviceSchema вместо Device
current = self.devices.get(mac)
device = DeviceSchema(
id=mac, ip=device_data["ip"], name=f"WiZ {mac[-4:]}", room="Default"
id=mac,
ip=device_data["ip"],
name=current.name if current else f"WiZ {mac[-4:]}",
room=current.room if current else "Default",
)
self.devices[mac] = device
self._missing_scan_counts.pop(mac, None)
def apply_discovery_snapshot(
self,
found_devices: list[dict],
*,
remove_missing: bool,
missing_threshold: int = 1,
) -> DiscoveryApplyResult:
found_by_mac = {device["mac"]: device for device in found_devices}
added = 0
updated = 0
for mac, device_data in found_by_mac.items():
if mac in self.devices:
updated += 1
else:
added += 1
self.update_device(device_data)
removed_offline = 0
if remove_missing:
for mac in list(self.devices):
if mac in found_by_mac:
continue
missed_scans = self._missing_scan_counts.get(mac, 0) + 1
self._missing_scan_counts[mac] = missed_scans
if missed_scans < missing_threshold:
logger.info(
"Устройство %s не ответило (%s/%s), оставляю до следующего цикла",
mac,
missed_scans,
missing_threshold,
)
continue
self.devices.pop(mac, None)
self._missing_scan_counts.pop(mac, None)
removed_offline += 1
logger.info("Устройство %s не ответило -- убрано из списка", mac)
return DiscoveryApplyResult(
found=len(found_by_mac),
added=added,
updated=updated,
removed_offline=removed_offline,
pending_removal=len(self._missing_scan_counts),
online=len(self.devices),
)
def get_group_ips(self, group_id: str) -> List[str]:
"""Возвращает список IP всех ламп, входящих в группу."""