From d13baf302f63835daf06c2b0800b0c51fc5d4bed Mon Sep 17 00:00:00 2001 From: htlee Date: Fri, 20 Mar 2026 17:38:49 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20=EC=9C=84=ED=97=98=EB=8F=84=20=EC=A0=90?= =?UTF-8?q?=EC=88=98=20=EC=88=98=EC=97=AD=20=EA=B0=80=EC=82=B0=20+=20?= =?UTF-8?q?=ED=81=B4=EB=9F=AC=EC=8A=A4=ED=84=B0=20=EA=B7=B8=EB=A6=AC?= =?UTF-8?q?=EB=93=9C=20=EC=85=80=20=EB=B0=A9=EC=8B=9D=20=EC=A0=84=ED=99=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - risk.py: 특정어업수역(ZONE_I~IV) 내 미허가 어선 +25점 가산 - fleet.py: DBSCAN → 고정 그리드 셀(5NM) 클러스터링 (체인 효과 차단) - max_cluster_size=20으로 거대 클러스터 방지 Co-Authored-By: Claude Opus 4.6 (1M context) --- prediction/algorithms/fleet.py | 47 ++++++++++++++++++---------------- prediction/algorithms/risk.py | 4 +++ 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/prediction/algorithms/fleet.py b/prediction/algorithms/fleet.py index 98224a1..f666639 100644 --- a/prediction/algorithms/fleet.py +++ b/prediction/algorithms/fleet.py @@ -10,37 +10,40 @@ logger = logging.getLogger(__name__) def detect_group_clusters( vessel_snapshots: list[dict], - spatial_eps_nm: float = 3.0, - time_eps_hours: float = 2.0, + cell_size_nm: float = 5.0, min_vessels: int = 3, + max_cluster_size: int = 20, ) -> dict[int, list[dict]]: - """DBSCAN 시공간 클러스터링으로 집단 탐지.""" + """고정 그리드 셀 기반 클러스터링 — DBSCAN 체인 효과 방지. + + cell_size_nm 격자로 공간을 분할하여 같은 셀에 속하는 선박을 그룹핑. + 체인 효과 없이 max_cluster_size 제한으로 거대 클러스터 방지. + """ if len(vessel_snapshots) < min_vessels: return {} - try: - from sklearn.cluster import DBSCAN - except ImportError: - logger.warning('sklearn not available for DBSCAN clustering') - return {} + # 위도 1도 ≈ 60NM, 경도 1도 ≈ 60*cos(lat) NM + # 중위도(35도) 기준 경도 1도 ≈ 49NM + cell_lat = cell_size_nm / 60.0 + cell_lng = cell_size_nm / 49.0 - lat_rad = [math.radians(v['lat']) * EARTH_RADIUS_NM for v in vessel_snapshots] - lon_rad = [math.radians(v['lon']) * EARTH_RADIUS_NM for v in vessel_snapshots] - - # 시간을 NM 단위로 정규화 - timestamps = [pd.Timestamp(v['timestamp']).timestamp() for v in vessel_snapshots] - t_min = min(timestamps) - time_nm = [(t - t_min) / 3600 * 10 / time_eps_hours for t in timestamps] - - X = np.array(list(zip(lat_rad, lon_rad, time_nm))) - - db = DBSCAN(eps=spatial_eps_nm, min_samples=min_vessels, metric='euclidean').fit(X) + # 격자 셀별 선박 그룹핑 + cells: dict[tuple[int, int], list[dict]] = {} + for v in vessel_snapshots: + cell_key = (int(v['lat'] / cell_lat), int(v['lon'] / cell_lng)) + cells.setdefault(cell_key, []).append(v) clusters: dict[int, list[dict]] = {} - for idx, label in enumerate(db.labels_): - if label == -1: + cluster_id = 0 + for cell_vessels in cells.values(): + if len(cell_vessels) < min_vessels: continue - clusters.setdefault(int(label), []).append(vessel_snapshots[idx]) + # 셀 내 선박을 max_cluster_size 단위로 분할 + for i in range(0, len(cell_vessels), max_cluster_size): + batch = cell_vessels[i:i + max_cluster_size] + if len(batch) >= min_vessels: + clusters[cluster_id] = batch + cluster_id += 1 return clusters diff --git a/prediction/algorithms/risk.py b/prediction/algorithms/risk.py index b11b3c0..d0c58b2 100644 --- a/prediction/algorithms/risk.py +++ b/prediction/algorithms/risk.py @@ -32,6 +32,10 @@ def compute_vessel_risk_score( score += 40 elif zone == 'CONTIGUOUS_ZONE': score += 10 + elif zone.startswith('ZONE_'): + # 특정어업수역 내 — 무허가면 가산 + if is_permitted is not None and not is_permitted: + score += 25 # 2. 조업 행위 (최대 30점) segs = detect_fishing_segments(df_vessel) -- 2.45.2