diff --git a/prediction/algorithms/fleet.py b/prediction/algorithms/fleet.py index 98224a1..f666639 100644 --- a/prediction/algorithms/fleet.py +++ b/prediction/algorithms/fleet.py @@ -10,37 +10,40 @@ logger = logging.getLogger(__name__) def detect_group_clusters( vessel_snapshots: list[dict], - spatial_eps_nm: float = 3.0, - time_eps_hours: float = 2.0, + cell_size_nm: float = 5.0, min_vessels: int = 3, + max_cluster_size: int = 20, ) -> dict[int, list[dict]]: - """DBSCAN 시공간 클러스터링으로 집단 탐지.""" + """고정 그리드 셀 기반 클러스터링 — DBSCAN 체인 효과 방지. + + cell_size_nm 격자로 공간을 분할하여 같은 셀에 속하는 선박을 그룹핑. + 체인 효과 없이 max_cluster_size 제한으로 거대 클러스터 방지. + """ if len(vessel_snapshots) < min_vessels: return {} - try: - from sklearn.cluster import DBSCAN - except ImportError: - logger.warning('sklearn not available for DBSCAN clustering') - return {} + # 위도 1도 ≈ 60NM, 경도 1도 ≈ 60*cos(lat) NM + # 중위도(35도) 기준 경도 1도 ≈ 49NM + cell_lat = cell_size_nm / 60.0 + cell_lng = cell_size_nm / 49.0 - lat_rad = [math.radians(v['lat']) * EARTH_RADIUS_NM for v in vessel_snapshots] - lon_rad = [math.radians(v['lon']) * EARTH_RADIUS_NM for v in vessel_snapshots] - - # 시간을 NM 단위로 정규화 - timestamps = [pd.Timestamp(v['timestamp']).timestamp() for v in vessel_snapshots] - t_min = min(timestamps) - time_nm = [(t - t_min) / 3600 * 10 / time_eps_hours for t in timestamps] - - X = np.array(list(zip(lat_rad, lon_rad, time_nm))) - - db = DBSCAN(eps=spatial_eps_nm, min_samples=min_vessels, metric='euclidean').fit(X) + # 격자 셀별 선박 그룹핑 + cells: dict[tuple[int, int], list[dict]] = {} + for v in vessel_snapshots: + cell_key = (int(v['lat'] / cell_lat), int(v['lon'] / cell_lng)) + cells.setdefault(cell_key, []).append(v) clusters: dict[int, list[dict]] = {} - for idx, label in enumerate(db.labels_): - if label == -1: + cluster_id = 0 + for cell_vessels in cells.values(): + if len(cell_vessels) < min_vessels: continue - clusters.setdefault(int(label), []).append(vessel_snapshots[idx]) + # 셀 내 선박을 max_cluster_size 단위로 분할 + for i in range(0, len(cell_vessels), max_cluster_size): + batch = cell_vessels[i:i + max_cluster_size] + if len(batch) >= min_vessels: + clusters[cluster_id] = batch + cluster_id += 1 return clusters diff --git a/prediction/algorithms/risk.py b/prediction/algorithms/risk.py index b11b3c0..d0c58b2 100644 --- a/prediction/algorithms/risk.py +++ b/prediction/algorithms/risk.py @@ -32,6 +32,10 @@ def compute_vessel_risk_score( score += 40 elif zone == 'CONTIGUOUS_ZONE': score += 10 + elif zone.startswith('ZONE_'): + # 특정어업수역 내 — 무허가면 가산 + if is_permitted is not None and not is_permitted: + score += 25 # 2. 조업 행위 (최대 30점) segs = detect_fishing_segments(df_vessel)