fix: 위험도 점수 수역 가산 + 클러스터 그리드 셀 방식 전환

- risk.py: 특정어업수역(ZONE_I~IV) 내 미허가 어선 +25점 가산 - fleet.py: DBSCAN → 고정 그리드 셀(5NM) 클러스터링 (체인 효과 차단) - max_cluster_size=20으로 거대 클러스터 방지 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 17:38:49 +09:00 · 2026-03-20 17:38:49 +09:00 · d13baf302f
--- a/prediction/algorithms/fleet.py
+++ b/prediction/algorithms/fleet.py
@ -10,37 +10,40 @@ logger = logging.getLogger(__name__)
 def detect_group_clusters(
    vessel_snapshots: list[dict],
-    spatial_eps_nm: float = 3.0,
+    cell_size_nm: float = 5.0,
    time_eps_hours: float = 2.0,
    min_vessels: int = 3,
    max_cluster_size: int = 20,
 ) -> dict[int, list[dict]]:
-    """DBSCAN 시공간 클러스터링으로 집단 탐지."""
+    """고정 그리드 셀 기반 클러스터링 — DBSCAN 체인 효과 방지.
    cell_size_nm 격자로 공간을 분할하여 같은 셀에 속하는 선박을 그룹핑.
    체인 효과 없이 max_cluster_size 제한으로 거대 클러스터 방지.
    """
    if len(vessel_snapshots) < min_vessels:
        return {}
-    try:
+    # 위도 1도 ≈ 60NM, 경도 1도 ≈ 60*cos(lat) NM
-        from sklearn.cluster import DBSCAN
+    # 중위도(35도) 기준 경도 1도 ≈ 49NM
-    except ImportError:
+    cell_lat = cell_size_nm / 60.0
-        logger.warning('sklearn not available for DBSCAN clustering')
+    cell_lng = cell_size_nm / 49.0
        return {}
-    lat_rad = [math.radians(v['lat']) * EARTH_RADIUS_NM for v in vessel_snapshots]
+    # 격자 셀별 선박 그룹핑
-    lon_rad = [math.radians(v['lon']) * EARTH_RADIUS_NM for v in vessel_snapshots]
+    cells: dict[tuple[int, int], list[dict]] = {}
-
+    for v in vessel_snapshots:
-    # 시간을 NM 단위로 정규화
+        cell_key = (int(v['lat'] / cell_lat), int(v['lon'] / cell_lng))
-    timestamps = [pd.Timestamp(v['timestamp']).timestamp() for v in vessel_snapshots]
+        cells.setdefault(cell_key, []).append(v)
    t_min = min(timestamps)
    time_nm = [(t - t_min) / 3600 * 10 / time_eps_hours for t in timestamps]
    X = np.array(list(zip(lat_rad, lon_rad, time_nm)))
    db = DBSCAN(eps=spatial_eps_nm, min_samples=min_vessels, metric='euclidean').fit(X)
    clusters: dict[int, list[dict]] = {}
-    for idx, label in enumerate(db.labels_):
+    cluster_id = 0
-        if label == -1:
+    for cell_vessels in cells.values():
        if len(cell_vessels) < min_vessels:
            continue
-        clusters.setdefault(int(label), []).append(vessel_snapshots[idx])
+        # 셀 내 선박을 max_cluster_size 단위로 분할
        for i in range(0, len(cell_vessels), max_cluster_size):
            batch = cell_vessels[i:i + max_cluster_size]
            if len(batch) >= min_vessels:
                clusters[cluster_id] = batch
                cluster_id += 1
    return clusters
--- a/prediction/algorithms/risk.py
+++ b/prediction/algorithms/risk.py
@ -32,6 +32,10 @@ def compute_vessel_risk_score(
        score += 40
    elif zone == 'CONTIGUOUS_ZONE':
        score += 10
    elif zone.startswith('ZONE_'):
        # 특정어업수역 내 — 무허가면 가산
        if is_permitted is not None and not is_permitted:
            score += 25
    # 2. 조업 행위 (최대 30점)
    segs = detect_fishing_segments(df_vessel)