From e2fc355b2ccf5c208d525279befd3fc6de757156 Mon Sep 17 00:00:00 2001 From: htlee Date: Tue, 7 Apr 2026 12:56:51 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20S2=20prediction=20=EB=B6=84=EC=84=9D=20?= =?UTF-8?q?=EC=97=94=EC=A7=84=20=EB=AA=A8=EB=85=B8=EB=A0=88=ED=8F=AC=20?= =?UTF-8?q?=EC=9D=B4=EC=8B=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit iran prediction 47개 Python 파일을 prediction/ 디렉토리로 복제: - algorithms/ 14개 분석 알고리즘 (어구추론, 다크베셀, 스푸핑, 환적, 위험도 등) - pipeline/ 7단계 분류 파이프라인 - cache/vessel_store (24h 슬라이딩 윈도우) - db/ 어댑터 (snpdb 원본조회, kcgdb 결과저장) - chat/ AI 채팅 (Ollama, 후순위) - data/ 정적 데이터 (기선, 특정어업수역 GeoJSON) config.py를 kcgaidb로 재구성 (DB명, 사용자, 비밀번호) DB 연결 검증 완료 (kcgaidb 37개 테이블 접근 확인) Makefile에 dev-prediction / dev-all 타겟 추가 CLAUDE.md에 prediction 섹션 추가 Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 7 + CLAUDE.md | 33 +- Makefile | 13 +- prediction/algorithms/__init__.py | 0 prediction/algorithms/dark_vessel.py | 59 + prediction/algorithms/fishing_pattern.py | 137 ++ prediction/algorithms/fleet.py | 177 ++ prediction/algorithms/gear_correlation.py | 854 ++++++++++ prediction/algorithms/gear_name_rules.py | 19 + prediction/algorithms/gear_parent_episode.py | 631 +++++++ .../algorithms/gear_parent_inference.py | 1477 +++++++++++++++++ prediction/algorithms/location.py | 175 ++ prediction/algorithms/polygon_builder.py | 558 +++++++ prediction/algorithms/risk.py | 126 ++ prediction/algorithms/spoofing.py | 82 + prediction/algorithms/track_similarity.py | 394 +++++ prediction/algorithms/transshipment.py | 234 +++ prediction/cache/__init__.py | 0 prediction/cache/vessel_store.py | 463 ++++++ prediction/chat/__init__.py | 0 prediction/chat/cache.py | 90 + prediction/chat/context_builder.py | 140 ++ prediction/chat/domain_knowledge.py | 471 ++++++ prediction/chat/router.py | 236 +++ prediction/chat/tools.py | 420 +++++ prediction/config.py | 66 + prediction/data/korea_baseline.json | 1 + prediction/data/zones/특정어업수역Ⅰ.json | 1 + prediction/data/zones/특정어업수역Ⅱ.json | 1 + prediction/data/zones/특정어업수역Ⅲ.json | 1 + prediction/data/zones/특정어업수역Ⅳ.json | 1 + prediction/db/__init__.py | 0 prediction/db/kcgdb.py | 330 ++++ prediction/db/partition_manager.py | 143 ++ prediction/db/snpdb.py | 210 +++ prediction/env.example | 34 + prediction/fleet_tracker.py | 370 +++++ prediction/main.py | 159 ++ prediction/models/__init__.py | 0 prediction/models/ais.py | 38 + prediction/models/result.py | 104 ++ prediction/pipeline/__init__.py | 0 prediction/pipeline/behavior.py | 31 + prediction/pipeline/classifier.py | 100 ++ prediction/pipeline/clusterer.py | 101 ++ prediction/pipeline/constants.py | 26 + prediction/pipeline/features.py | 93 ++ prediction/pipeline/orchestrator.py | 95 ++ prediction/pipeline/preprocessor.py | 52 + prediction/pipeline/resampler.py | 35 + prediction/requirements.txt | 12 + prediction/scheduler.py | 385 +++++ prediction/scripts/load_fleet_registry.py | 176 ++ prediction/tests/test_gear_parent_episode.py | 177 ++ .../tests/test_gear_parent_inference.py | 279 ++++ prediction/tests/test_time_bucket.py | 90 + prediction/time_bucket.py | 42 + 57 files changed, 9936 insertions(+), 13 deletions(-) create mode 100644 prediction/algorithms/__init__.py create mode 100644 prediction/algorithms/dark_vessel.py create mode 100644 prediction/algorithms/fishing_pattern.py create mode 100644 prediction/algorithms/fleet.py create mode 100644 prediction/algorithms/gear_correlation.py create mode 100644 prediction/algorithms/gear_name_rules.py create mode 100644 prediction/algorithms/gear_parent_episode.py create mode 100644 prediction/algorithms/gear_parent_inference.py create mode 100644 prediction/algorithms/location.py create mode 100644 prediction/algorithms/polygon_builder.py create mode 100644 prediction/algorithms/risk.py create mode 100644 prediction/algorithms/spoofing.py create mode 100644 prediction/algorithms/track_similarity.py create mode 100644 prediction/algorithms/transshipment.py create mode 100644 prediction/cache/__init__.py create mode 100644 prediction/cache/vessel_store.py create mode 100644 prediction/chat/__init__.py create mode 100644 prediction/chat/cache.py create mode 100644 prediction/chat/context_builder.py create mode 100644 prediction/chat/domain_knowledge.py create mode 100644 prediction/chat/router.py create mode 100644 prediction/chat/tools.py create mode 100644 prediction/config.py create mode 100644 prediction/data/korea_baseline.json create mode 100644 prediction/data/zones/특정어업수역Ⅰ.json create mode 100644 prediction/data/zones/특정어업수역Ⅱ.json create mode 100644 prediction/data/zones/특정어업수역Ⅲ.json create mode 100644 prediction/data/zones/특정어업수역Ⅳ.json create mode 100644 prediction/db/__init__.py create mode 100644 prediction/db/kcgdb.py create mode 100644 prediction/db/partition_manager.py create mode 100644 prediction/db/snpdb.py create mode 100644 prediction/env.example create mode 100644 prediction/fleet_tracker.py create mode 100644 prediction/main.py create mode 100644 prediction/models/__init__.py create mode 100644 prediction/models/ais.py create mode 100644 prediction/models/result.py create mode 100644 prediction/pipeline/__init__.py create mode 100644 prediction/pipeline/behavior.py create mode 100644 prediction/pipeline/classifier.py create mode 100644 prediction/pipeline/clusterer.py create mode 100644 prediction/pipeline/constants.py create mode 100644 prediction/pipeline/features.py create mode 100644 prediction/pipeline/orchestrator.py create mode 100644 prediction/pipeline/preprocessor.py create mode 100644 prediction/pipeline/resampler.py create mode 100644 prediction/requirements.txt create mode 100644 prediction/scheduler.py create mode 100644 prediction/scripts/load_fleet_registry.py create mode 100644 prediction/tests/test_gear_parent_episode.py create mode 100644 prediction/tests/test_gear_parent_inference.py create mode 100644 prediction/tests/test_time_bucket.py create mode 100644 prediction/time_bucket.py diff --git a/.gitignore b/.gitignore index 2a3d704..6b5f67e 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,13 @@ frontend/build/ backend/target/ backend/build/ +# === Python (prediction) === +prediction/.venv/ +prediction/__pycache__/ +prediction/**/__pycache__/ +prediction/*.pyc +prediction/.env + # === Dependencies === frontend/node_modules/ node_modules/ diff --git a/CLAUDE.md b/CLAUDE.md index 2d4b589..d668cbb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -19,9 +19,11 @@ kcg-ai-monitoring/ ## 시스템 구성 ``` -[Frontend Vite :5173] ──→ [Backend Spring :8080] ──┬→ [Iran Backend :8080] (분석 데이터 read) - │ └→ [Prediction FastAPI :8001] - └→ [PostgreSQL kcgaidb] (자체 인증/권한/감사/의사결정) +[Frontend Vite :5173] ──→ [Backend Spring :8080] ──→ [PostgreSQL kcgaidb] + ↑ write + [Prediction FastAPI :8001] ──────┘ (5분 주기 분석 결과 저장) + ↑ read ↑ read + [SNPDB PostgreSQL] (AIS 원본) [Iran Backend] (레거시 프록시, 선택) ``` - **자체 백엔드**: 인증/권한/감사로그/관리자 + 운영자 의사결정 (확정/제외/학습) @@ -31,13 +33,15 @@ kcg-ai-monitoring/ ## 명령어 ```bash -make install # 의존성 설치 -make dev # 프론트 + 백엔드 동시 실행 -make dev-frontend # 프론트만 -make dev-backend # 백엔드만 -make build # 전체 빌드 -make lint # 프론트 lint -make format # 프론트 prettier +make install # 전체 의존성 설치 +make dev # 프론트 + 백엔드 동시 실행 +make dev-all # 프론트 + 백엔드 + prediction 동시 실행 +make dev-frontend # 프론트만 +make dev-backend # 백엔드만 +make dev-prediction # prediction 분석 엔진만 (FastAPI :8001) +make build # 전체 빌드 +make lint # 프론트 lint +make format # 프론트 prettier ``` ## 기술 스택 @@ -52,7 +56,14 @@ make format # 프론트 prettier - React Router 7 - ESLint 10 + Prettier -### Backend (`backend/`) — Phase 2에서 초기화 +### Prediction (`prediction/`) — 분석 엔진 +- Python 3.11+, FastAPI, APScheduler +- 14개 알고리즘 (어구 추론, 다크베셀, 스푸핑, 환적, 위험도 등) +- 7단계 분류 파이프라인 (전처리→행동→리샘플→특징→분류→클러스터→계절) +- AIS 원본: SNPDB (5분 증분), 결과: kcgaidb (직접 write) +- prediction과 backend는 DB만 공유 (HTTP 호출 X) + +### Backend (`backend/`) - Spring Boot 3.x + Java 21 - Spring Security + JWT - PostgreSQL + Flyway diff --git a/Makefile b/Makefile index 7f1ba41..e457db0 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,13 @@ -.PHONY: help install dev dev-frontend dev-backend build build-frontend build-backend lint format test clean +.PHONY: help install dev dev-frontend dev-backend dev-prediction build build-frontend build-backend lint format test clean help: @echo "사용 가능한 명령:" - @echo " make install - 프론트엔드 의존성 설치" + @echo " make install - 전체 의존성 설치" @echo " make dev - 프론트엔드 + 백엔드 동시 실행" + @echo " make dev-all - 프론트 + 백엔드 + prediction 동시 실행" @echo " make dev-frontend - 프론트엔드 dev 서버만 실행 (Vite)" @echo " make dev-backend - 백엔드 dev 서버만 실행 (Spring Boot)" + @echo " make dev-prediction - prediction 분석 엔진만 실행 (FastAPI :8001)" @echo " make build - 프론트엔드 + 백엔드 빌드" @echo " make build-frontend - 프론트엔드 빌드" @echo " make build-backend - 백엔드 빌드" @@ -16,6 +18,7 @@ help: install: cd frontend && npm install @if [ -f backend/pom.xml ]; then cd backend && ./mvnw dependency:resolve || true; fi + @if [ -f prediction/requirements.txt ]; then cd prediction && pip install -r requirements.txt 2>/dev/null || echo "prediction 의존성 설치는 가상환경에서 실행하세요: cd prediction && uv venv && source .venv/bin/activate && uv pip install -r requirements.txt"; fi dev-frontend: cd frontend && npm run dev @@ -24,9 +27,15 @@ dev-backend: @if [ -f backend/pom.xml ]; then cd backend && ./mvnw spring-boot:run -Dspring-boot.run.profiles=local; \ else echo "백엔드가 아직 초기화되지 않았습니다 (Phase 2에서 추가)"; fi +dev-prediction: + cd prediction && python main.py + dev: @$(MAKE) -j2 dev-frontend dev-backend +dev-all: + @$(MAKE) -j3 dev-frontend dev-backend dev-prediction + build-frontend: cd frontend && npm run build diff --git a/prediction/algorithms/__init__.py b/prediction/algorithms/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prediction/algorithms/dark_vessel.py b/prediction/algorithms/dark_vessel.py new file mode 100644 index 0000000..9e8b9f2 --- /dev/null +++ b/prediction/algorithms/dark_vessel.py @@ -0,0 +1,59 @@ +import pandas as pd +from algorithms.location import haversine_nm + +GAP_SUSPICIOUS_SEC = 1800 # 30분 +GAP_HIGH_SUSPICIOUS_SEC = 3600 # 1시간 +GAP_VIOLATION_SEC = 86400 # 24시간 + + +def detect_ais_gaps(df_vessel: pd.DataFrame) -> list[dict]: + """AIS 수신 기록에서 소실 구간 추출.""" + if len(df_vessel) < 2: + return [] + + gaps = [] + records = df_vessel.sort_values('timestamp').to_dict('records') + + for i in range(1, len(records)): + prev, curr = records[i - 1], records[i] + prev_ts = pd.Timestamp(prev['timestamp']) + curr_ts = pd.Timestamp(curr['timestamp']) + gap_sec = (curr_ts - prev_ts).total_seconds() + + if gap_sec < GAP_SUSPICIOUS_SEC: + continue + + disp = haversine_nm( + prev['lat'], prev['lon'], + curr['lat'], curr['lon'], + ) + + if gap_sec >= GAP_VIOLATION_SEC: + severity = 'VIOLATION' + elif gap_sec >= GAP_HIGH_SUSPICIOUS_SEC: + severity = 'HIGH_SUSPICIOUS' + else: + severity = 'SUSPICIOUS' + + gaps.append({ + 'gap_sec': int(gap_sec), + 'gap_min': round(gap_sec / 60, 1), + 'displacement_nm': round(disp, 2), + 'severity': severity, + }) + + return gaps + + +def is_dark_vessel(df_vessel: pd.DataFrame) -> tuple[bool, int]: + """다크베셀 여부 판정. + + Returns: (is_dark, max_gap_duration_min) + """ + gaps = detect_ais_gaps(df_vessel) + if not gaps: + return False, 0 + + max_gap_min = max(g['gap_min'] for g in gaps) + is_dark = max_gap_min >= 30 # 30분 이상 소실 + return is_dark, int(max_gap_min) diff --git a/prediction/algorithms/fishing_pattern.py b/prediction/algorithms/fishing_pattern.py new file mode 100644 index 0000000..64201b6 --- /dev/null +++ b/prediction/algorithms/fishing_pattern.py @@ -0,0 +1,137 @@ +from __future__ import annotations + +import pandas as pd +from algorithms.location import haversine_nm, classify_zone # noqa: F401 (haversine_nm re-exported for callers) + +# Yan et al. (2022) 어구별 조업 속도 임계값 +GEAR_SOG_THRESHOLDS: dict[str, tuple[float, float]] = { + 'PT': (2.5, 4.5), # 쌍끌이저인망 + 'OT': (2.0, 4.0), # 단선저인망 + 'GN': (0.5, 2.5), # 자망·유망 + 'SQ': (0.0, 1.0), # 오징어채낚기 + 'TRAP': (0.3, 1.5), # 통발 + 'PS': (3.0, 6.0), # 선망 + 'TRAWL': (2.0, 4.5), # (alias) + 'PURSE': (3.0, 6.0), # (alias) + 'LONGLINE': (0.5, 2.5), +} +TRANSIT_SOG_MIN = 5.0 +ANCHORED_SOG_MAX = 0.5 + + +def classify_vessel_state(sog: float, cog_delta: float = 0.0, + gear_type: str = 'PT') -> str: + """UCAF: 어구별 상태 분류.""" + if sog <= ANCHORED_SOG_MAX: + return 'ANCHORED' + if sog >= TRANSIT_SOG_MIN: + return 'TRANSIT' + sog_min, sog_max = GEAR_SOG_THRESHOLDS.get(gear_type, (1.0, 5.0)) + if sog_min <= sog <= sog_max: + return 'FISHING' + return 'UNKNOWN' + + +def compute_ucaf_score(df_vessel: pd.DataFrame, gear_type: str = 'PT') -> float: + """UCAF 점수: 어구별 조업 상태 비율 (0~1).""" + if len(df_vessel) == 0: + return 0.0 + sog_min, sog_max = GEAR_SOG_THRESHOLDS.get(gear_type, (1.0, 5.0)) + in_range = df_vessel['sog'].between(sog_min, sog_max).sum() + return round(in_range / len(df_vessel), 4) + + +def compute_ucft_score(df_vessel: pd.DataFrame) -> float: + """UCFT 점수: 조업 vs 항행 이진 신뢰도 (0~1).""" + if len(df_vessel) == 0: + return 0.0 + fishing = (df_vessel['sog'].between(0.5, 5.0)).sum() + transit = (df_vessel['sog'] >= TRANSIT_SOG_MIN).sum() + total = fishing + transit + if total == 0: + return 0.0 + return round(fishing / total, 4) + + +def detect_fishing_segments(df_vessel: pd.DataFrame, + window_min: int = 15, + gear_type: str = 'PT') -> list[dict]: + """연속 조업 구간 추출.""" + if len(df_vessel) < 2: + return [] + + segments: list[dict] = [] + in_fishing = False + seg_start_idx = 0 + + records = df_vessel.to_dict('records') + for i, rec in enumerate(records): + sog = rec.get('sog', 0) + state = classify_vessel_state(sog, gear_type=gear_type) + + if state == 'FISHING' and not in_fishing: + in_fishing = True + seg_start_idx = i + elif state != 'FISHING' and in_fishing: + start_ts = records[seg_start_idx].get('timestamp') + end_ts = rec.get('timestamp') + if start_ts and end_ts: + dur_sec = (pd.Timestamp(end_ts) - pd.Timestamp(start_ts)).total_seconds() + dur_min = dur_sec / 60 + if dur_min >= window_min: + zone_info = classify_zone( + records[seg_start_idx].get('lat', 0), + records[seg_start_idx].get('lon', 0), + ) + segments.append({ + 'start_idx': seg_start_idx, + 'end_idx': i - 1, + 'duration_min': round(dur_min, 1), + 'zone': zone_info.get('zone', 'UNKNOWN'), + 'in_territorial_sea': zone_info.get('zone') == 'TERRITORIAL_SEA', + }) + in_fishing = False + + # 트랙 끝까지 조업 중이면 마지막 세그먼트 추가 + if in_fishing and len(records) > seg_start_idx: + start_ts = records[seg_start_idx].get('timestamp') + end_ts = records[-1].get('timestamp') + if start_ts and end_ts: + dur_sec = (pd.Timestamp(end_ts) - pd.Timestamp(start_ts)).total_seconds() + dur_min = dur_sec / 60 + if dur_min >= window_min: + zone_info = classify_zone( + records[seg_start_idx].get('lat', 0), + records[seg_start_idx].get('lon', 0), + ) + segments.append({ + 'start_idx': seg_start_idx, + 'end_idx': len(records) - 1, + 'duration_min': round(dur_min, 1), + 'zone': zone_info.get('zone', 'UNKNOWN'), + 'in_territorial_sea': zone_info.get('zone') == 'TERRITORIAL_SEA', + }) + + return segments + + +def detect_trawl_uturn(df_vessel: pd.DataFrame, + uturn_threshold_deg: float = 150.0, + min_uturn_count: int = 3) -> dict: + """U-turn 왕복 패턴 감지 (저인망 특징).""" + if len(df_vessel) < 2: + return {'uturn_count': 0, 'trawl_suspected': False} + + uturn_count = 0 + cog_vals = df_vessel['cog'].values + sog_vals = df_vessel['sog'].values + + for i in range(1, len(cog_vals)): + delta = abs((cog_vals[i] - cog_vals[i - 1] + 180) % 360 - 180) + if delta >= uturn_threshold_deg and sog_vals[i] < TRANSIT_SOG_MIN: + uturn_count += 1 + + return { + 'uturn_count': uturn_count, + 'trawl_suspected': uturn_count >= min_uturn_count, + } diff --git a/prediction/algorithms/fleet.py b/prediction/algorithms/fleet.py new file mode 100644 index 0000000..ee56787 --- /dev/null +++ b/prediction/algorithms/fleet.py @@ -0,0 +1,177 @@ +"""선단(Fleet) 패턴 탐지 — 공간+행동 기반. + +단순 공간 근접이 아닌, 협조 운항 패턴(유사 속도/방향/역할)으로 선단을 판별. +- PT 저인망: 2척, 3NM 이내, 유사 속도(2~5kn) + 유사 방향(20° 이내) +- PS 선망: 3~5척, 2NM 이내, 모선(고속)+조명선(정지)+운반선(저속 대형) +- FC 환적: 2척, 0.5NM 이내, 양쪽 저속(2kn 이하) +""" + +import logging +from typing import Optional + +import numpy as np +import pandas as pd +from algorithms.location import haversine_nm, dist_to_baseline + +logger = logging.getLogger(__name__) + + +def _heading_diff(h1: float, h2: float) -> float: + """두 방향 사이 최소 각도차 (0~180).""" + d = abs(h1 - h2) % 360 + return d if d <= 180 else 360 - d + + +def detect_fleet_patterns( + vessel_dfs: dict[str, pd.DataFrame], +) -> dict[int, list[dict]]: + """행동 패턴 기반 선단 탐지. + + Returns: {fleet_id: [{mmsi, lat, lon, sog, cog, role, pattern}, ...]} + """ + # 각 선박의 최신 스냅샷 추출 + snapshots: list[dict] = [] + for mmsi, df in vessel_dfs.items(): + if df is None or len(df) == 0: + continue + last = df.iloc[-1] + snapshots.append({ + 'mmsi': mmsi, + 'lat': float(last['lat']), + 'lon': float(last['lon']), + 'sog': float(last.get('sog', 0)), + 'cog': float(last.get('cog', 0)), + }) + + if len(snapshots) < 2: + return {} + + matched: set[str] = set() + fleets: dict[int, list[dict]] = {} + fleet_id = 0 + + # 1차: PT 저인망 쌍 탐지 (2척, 3NM, 유사 속도/방향) + for i in range(len(snapshots)): + if snapshots[i]['mmsi'] in matched: + continue + a = snapshots[i] + for j in range(i + 1, len(snapshots)): + if snapshots[j]['mmsi'] in matched: + continue + b = snapshots[j] + dist = haversine_nm(a['lat'], a['lon'], b['lat'], b['lon']) + if dist > 3.0: + continue + # 둘 다 조업 속도 (2~5kn) + if not (2.0 <= a['sog'] <= 5.0 and 2.0 <= b['sog'] <= 5.0): + continue + # 유사 속도 (차이 1kn 미만) + if abs(a['sog'] - b['sog']) >= 1.0: + continue + # 유사 방향 (20° 미만) + if _heading_diff(a['cog'], b['cog']) >= 20.0: + continue + + fleets[fleet_id] = [ + {**a, 'role': 'LEADER', 'pattern': 'TRAWL_PAIR'}, + {**b, 'role': 'MEMBER', 'pattern': 'TRAWL_PAIR'}, + ] + matched.add(a['mmsi']) + matched.add(b['mmsi']) + fleet_id += 1 + break + + # 2차: FC 환적 쌍 탐지 (2척, 0.5NM, 양쪽 저속) + for i in range(len(snapshots)): + if snapshots[i]['mmsi'] in matched: + continue + a = snapshots[i] + for j in range(i + 1, len(snapshots)): + if snapshots[j]['mmsi'] in matched: + continue + b = snapshots[j] + dist = haversine_nm(a['lat'], a['lon'], b['lat'], b['lon']) + if dist > 0.5: + continue + if a['sog'] > 2.0 or b['sog'] > 2.0: + continue + + fleets[fleet_id] = [ + {**a, 'role': 'LEADER', 'pattern': 'TRANSSHIP'}, + {**b, 'role': 'MEMBER', 'pattern': 'TRANSSHIP'}, + ] + matched.add(a['mmsi']) + matched.add(b['mmsi']) + fleet_id += 1 + break + + # 3차: PS 선망 선단 탐지 (3~10척, 2NM 이내 클러스터) + unmatched = [s for s in snapshots if s['mmsi'] not in matched] + for anchor in unmatched: + if anchor['mmsi'] in matched: + continue + nearby = [] + for other in unmatched: + if other['mmsi'] == anchor['mmsi'] or other['mmsi'] in matched: + continue + dist = haversine_nm(anchor['lat'], anchor['lon'], other['lat'], other['lon']) + if dist <= 2.0: + nearby.append(other) + + if len(nearby) < 2: # 본인 포함 3척 이상 + continue + + # 역할 분류: 고속(모선), 정지(조명선), 나머지(멤버) + members = [{**anchor, 'role': 'LEADER', 'pattern': 'PURSE_SEINE'}] + matched.add(anchor['mmsi']) + for n in nearby[:9]: # 최대 10척 + if n['sog'] < 0.5: + role = 'LIGHTING' + else: + role = 'MEMBER' + members.append({**n, 'role': role, 'pattern': 'PURSE_SEINE'}) + matched.add(n['mmsi']) + + fleets[fleet_id] = members + fleet_id += 1 + + logger.info('fleet detection: %d fleets found (%d vessels matched)', + len(fleets), len(matched)) + return fleets + + +def assign_fleet_roles( + vessel_dfs: dict[str, pd.DataFrame], + cluster_map: dict[str, int], +) -> dict[str, dict]: + """선단 역할 할당 — 패턴 매칭 기반. + + cluster_map은 파이프라인에서 전달되지만, 여기서는 vessel_dfs로 직접 패턴 탐지. + """ + fleets = detect_fleet_patterns(vessel_dfs) + + results: dict[str, dict] = {} + + # 매칭된 선박 (fleet_id를 cluster_id로 사용) + fleet_mmsis: set[str] = set() + for fid, members in fleets.items(): + for m in members: + fleet_mmsis.add(m['mmsi']) + results[m['mmsi']] = { + 'cluster_id': fid, + 'cluster_size': len(members), + 'is_leader': m['role'] == 'LEADER', + 'fleet_role': m['role'], + } + + # 매칭 안 된 선박 → NOISE (cluster_id = -1) + for mmsi in vessel_dfs: + if mmsi not in fleet_mmsis: + results[mmsi] = { + 'cluster_id': -1, + 'cluster_size': 0, + 'is_leader': False, + 'fleet_role': 'NOISE', + } + + return results diff --git a/prediction/algorithms/gear_correlation.py b/prediction/algorithms/gear_correlation.py new file mode 100644 index 0000000..00ee786 --- /dev/null +++ b/prediction/algorithms/gear_correlation.py @@ -0,0 +1,854 @@ +"""어구 그룹 다단계 연관성 분석 — 멀티모델 패턴 추적. + +Phase 1: default 모델 1개로 동작 (DB에서 is_active=true 모델 로드). +Phase 2: 글로벌 모델 max 5개 병렬 실행. + +어구 중심 점수 체계: + - 어구 신호 기준 관측 윈도우 (어구 비활성 시 FREEZE) + - 선박 shadow 추적 (비활성 → 활성 전환 시 보너스) + - 적응형 EMA + streak 자기강화 + - 퍼센트 기반 무제한 추적 (50%+) +""" + +from __future__ import annotations + +import logging +import math +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Optional + +from algorithms.polygon_builder import _get_time_bucket_age +from config import qualified_table + +logger = logging.getLogger(__name__) + + +# ── 상수 ────────────────────────────────────────────────────────── +_EARTH_RADIUS_NM = 3440.065 +_NM_TO_M = 1852.0 +CORRELATION_PARAM_MODELS = qualified_table('correlation_param_models') +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +GEAR_CORRELATION_RAW_METRICS = qualified_table('gear_correlation_raw_metrics') + + +# ── 파라미터 모델 ───────────────────────────────────────────────── + +@dataclass +class ModelParams: + """추적 모델의 전체 파라미터셋.""" + + model_id: int = 1 + name: str = 'default' + + # EMA + alpha_base: float = 0.30 + alpha_min: float = 0.08 + alpha_decay_per_streak: float = 0.005 + + # 임계값 + track_threshold: float = 0.50 + polygon_threshold: float = 0.70 + + # 메트릭 가중치 — 어구-선박 + w_proximity: float = 0.45 + w_visit: float = 0.35 + w_activity: float = 0.20 + + # 메트릭 가중치 — 선박-선박 + w_dtw: float = 0.30 + w_sog_corr: float = 0.20 + w_heading: float = 0.25 + w_prox_vv: float = 0.25 + + # 메트릭 가중치 — 어구-어구 + w_prox_persist: float = 0.50 + w_drift: float = 0.30 + w_signal_sync: float = 0.20 + + # Freeze 기준 + group_quiet_ratio: float = 0.30 + normal_gap_hours: float = 1.0 + + # 감쇠 + decay_slow: float = 0.025 + decay_fast: float = 0.10 + stale_hours: float = 6.0 + + # Shadow + shadow_stay_bonus: float = 0.10 + shadow_return_bonus: float = 0.15 + + # 거리 + candidate_radius_factor: float = 3.0 + proximity_threshold_nm: float = 5.0 + visit_threshold_nm: float = 5.0 + + # 야간 + night_bonus: float = 1.3 + + # 장기 감쇠 + long_decay_days: float = 7.0 + + @classmethod + def from_db_row(cls, row: dict) -> ModelParams: + """DB correlation_param_models 행에서 생성.""" + params_json = row.get('params', {}) + return cls( + model_id=row['id'], + name=row['name'], + **{k: v for k, v in params_json.items() if hasattr(cls, k)}, + ) + + +# ── Haversine 거리 ──────────────────────────────────────────────── + +def _haversine_nm(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """두 좌표 간 거리 (해리).""" + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2 + return _EARTH_RADIUS_NM * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +# ── Freeze 판단 ─────────────────────────────────────────────────── + +def should_freeze( + gear_group_active_ratio: float, + target_last_observed: Optional[datetime], + now: datetime, + params: ModelParams, +) -> tuple[bool, str]: + """감쇠 적용 여부 판단. 어구 그룹이 기준.""" + # 1. 어구 그룹 비활성 → 비교 불가 + if gear_group_active_ratio < params.group_quiet_ratio: + return True, 'GROUP_QUIET' + + # 2. 개별 부재가 정상 범위 + if target_last_observed is not None: + hours_absent = (now - target_last_observed).total_seconds() / 3600 + if hours_absent < params.normal_gap_hours: + return True, 'NORMAL_GAP' + + return False, 'ACTIVE' + + +# ── EMA 업데이트 ────────────────────────────────────────────────── + +def update_score( + prev_score: Optional[float], + raw_score: Optional[float], + streak: int, + last_observed: Optional[datetime], + now: datetime, + gear_group_active_ratio: float, + shadow_bonus: float, + params: ModelParams, +) -> tuple[float, int, str]: + """적응형 EMA 점수 업데이트. + + Returns: (new_score, new_streak, state) + """ + # 관측 불가 + if raw_score is None: + frz, reason = should_freeze( + gear_group_active_ratio, last_observed, now, params, + ) + if frz: + return (prev_score or 0.0), streak, reason + + # 실제 이탈 → 감쇠 + hours_absent = 0.0 + if last_observed is not None: + hours_absent = (now - last_observed).total_seconds() / 3600 + decay = params.decay_fast if hours_absent > params.stale_hours else params.decay_slow + return max(0.0, (prev_score or 0.0) - decay), 0, 'SIGNAL_LOSS' + + # Shadow 보너스 + adjusted = min(1.0, raw_score + shadow_bonus) + + # Case 1: 임계값 이상 → streak 보상 + if adjusted >= params.track_threshold: + streak += 1 + alpha = max(params.alpha_min, + params.alpha_base - streak * params.alpha_decay_per_streak) + if prev_score is None: + return adjusted, streak, 'ACTIVE' + return alpha * adjusted + (1.0 - alpha) * prev_score, streak, 'ACTIVE' + + # Case 2: 패턴 이탈 + alpha = params.alpha_base + if prev_score is None: + return adjusted, 0, 'PATTERN_DIVERGE' + return alpha * adjusted + (1.0 - alpha) * prev_score, 0, 'PATTERN_DIVERGE' + + +# ── 어구-선박 메트릭 ────────────────────────────────────────────── + +def _compute_gear_vessel_metrics( + gear_center_lat: float, + gear_center_lon: float, + gear_radius_nm: float, + vessel_track: list[dict], + params: ModelParams, +) -> dict: + """어구 그룹 중심 vs 선박 궤적 메트릭. + + vessel_track: [{lat, lon, sog, cog, timestamp}, ...] + """ + if not vessel_track: + return {'proximity_ratio': 0, 'visit_score': 0, 'activity_sync': 0, 'composite': 0} + + threshold_nm = max(gear_radius_nm * 2, params.proximity_threshold_nm) + + # 1. proximity_ratio — 거리 구간별 차등 점수 + _PROX_CLOSE_NM = 2.5 + _PROX_NEAR_NM = 5.0 + _PROX_FAR_NM = 10.0 + prox_total = 0.0 + for p in vessel_track: + d = _haversine_nm(gear_center_lat, gear_center_lon, p['lat'], p['lon']) + if d < _PROX_CLOSE_NM: + prox_total += 1.0 + elif d < _PROX_NEAR_NM: + prox_total += 0.5 + elif d < _PROX_FAR_NM: + prox_total += 0.15 + proximity_ratio = prox_total / len(vessel_track) + + # 2. visit_score — 방문 패턴 (3NM 임계, 8회 기준) + _VISIT_THRESHOLD_NM = 3.0 + _VISIT_MAX = 8.0 + in_zone = False + visits = 0 + stay_points = 0 + consecutive_stay = 0 + stay_bonus = 0.0 + away_points = 0 + + for p in vessel_track: + d = _haversine_nm(gear_center_lat, gear_center_lon, p['lat'], p['lon']) + if d < _VISIT_THRESHOLD_NM: + if not in_zone: + visits += 1 + in_zone = True + consecutive_stay = 0 + stay_points += 1 + consecutive_stay += 1 + if consecutive_stay >= 3: + stay_bonus += 0.05 # 연속 체류 보너스 + else: + in_zone = False + consecutive_stay = 0 + away_points += 1 + + visit_count_norm = min(1.0, visits / _VISIT_MAX) if visits > 0 else 0.0 + total = stay_points + away_points + stay_ratio = stay_points / total if total > 0 else 0.0 + visit_score = min(1.0, 0.5 * visit_count_norm + 0.5 * stay_ratio + stay_bonus) + + # 3. activity_sync — 이중 판정 (저속 조업 + 고속 조업) + _MIN_ACTIVITY_POINTS = 6 + in_zone_count = 0 + activity_total = 0.0 + for p in vessel_track: + d = _haversine_nm(gear_center_lat, gear_center_lon, p['lat'], p['lon']) + if d < _PROX_NEAR_NM: + in_zone_count += 1 + sog = p.get('sog', 0) or 0 + if sog < 3.0: + activity_total += 1.0 # 저속 조업 (정박/어구 관리) + elif sog <= 7.0: + activity_total += 0.6 # 고속 조업 (쌍끌이/예인) + # else: 이동 중 → 0 + activity_sync = (activity_total / in_zone_count) if in_zone_count >= _MIN_ACTIVITY_POINTS else 0.0 + + # 가중 합산 + composite = ( + params.w_proximity * proximity_ratio + + params.w_visit * visit_score + + params.w_activity * activity_sync + ) + + return { + 'proximity_ratio': round(proximity_ratio, 4), + 'visit_score': round(visit_score, 4), + 'activity_sync': round(activity_sync, 4), + 'composite': round(composite, 4), + } + + +# ── 선박-선박 메트릭 ────────────────────────────────────────────── + +def _compute_vessel_vessel_metrics( + track_a: list[dict], + track_b: list[dict], + params: ModelParams, +) -> dict: + """두 선박 궤적 간 메트릭.""" + from algorithms.track_similarity import ( + compute_heading_coherence, + compute_proximity_ratio, + compute_sog_correlation, + compute_track_similarity, + ) + + if not track_a or not track_b: + return { + 'dtw_similarity': 0, 'speed_correlation': 0, + 'heading_coherence': 0, 'proximity_ratio': 0, 'composite': 0, + } + + # DTW + pts_a = [(p['lat'], p['lon']) for p in track_a] + pts_b = [(p['lat'], p['lon']) for p in track_b] + dtw_sim = compute_track_similarity(pts_a, pts_b) + + # SOG 상관 + sog_a = [p.get('sog', 0) for p in track_a] + sog_b = [p.get('sog', 0) for p in track_b] + sog_corr = compute_sog_correlation(sog_a, sog_b) + + # COG 동조 + cog_a = [p.get('cog', 0) for p in track_a] + cog_b = [p.get('cog', 0) for p in track_b] + heading = compute_heading_coherence(cog_a, cog_b) + + # 근접비 + prox = compute_proximity_ratio(pts_a, pts_b, params.proximity_threshold_nm) + + composite = ( + params.w_dtw * dtw_sim + + params.w_sog_corr * sog_corr + + params.w_heading * heading + + params.w_prox_vv * prox + ) + + return { + 'dtw_similarity': round(dtw_sim, 4), + 'speed_correlation': round(sog_corr, 4), + 'heading_coherence': round(heading, 4), + 'proximity_ratio': round(prox, 4), + 'composite': round(composite, 4), + } + + +# ── 어구-어구 메트릭 ────────────────────────────────────────────── + +def _compute_gear_gear_metrics( + center_a: tuple[float, float], + center_b: tuple[float, float], + center_history_a: list[dict], + center_history_b: list[dict], + params: ModelParams, +) -> dict: + """두 어구 그룹 간 메트릭.""" + if not center_history_a or not center_history_b: + return { + 'proximity_ratio': 0, 'drift_similarity': 0, + 'composite': 0, + } + + # 1. 근접 지속성 — 현재 중심 간 거리의 안정성 + dist_nm = _haversine_nm(center_a[0], center_a[1], center_b[0], center_b[1]) + prox_persist = max(0.0, 1.0 - dist_nm / 20.0) # 20NM 이상이면 0 + + # 2. 표류 유사도 — 중심 이동 벡터 코사인 유사도 + drift_sim = 0.0 + n = min(len(center_history_a), len(center_history_b)) + if n >= 2: + # 마지막 2점으로 이동 벡터 계산 + da_lat = center_history_a[-1].get('lat', 0) - center_history_a[-2].get('lat', 0) + da_lon = center_history_a[-1].get('lon', 0) - center_history_a[-2].get('lon', 0) + db_lat = center_history_b[-1].get('lat', 0) - center_history_b[-2].get('lat', 0) + db_lon = center_history_b[-1].get('lon', 0) - center_history_b[-2].get('lon', 0) + + dot = da_lat * db_lat + da_lon * db_lon + mag_a = (da_lat ** 2 + da_lon ** 2) ** 0.5 + mag_b = (db_lat ** 2 + db_lon ** 2) ** 0.5 + if mag_a > 1e-10 and mag_b > 1e-10: + cos_sim = dot / (mag_a * mag_b) + drift_sim = max(0.0, (cos_sim + 1.0) / 2.0) + + composite = ( + params.w_prox_persist * prox_persist + + params.w_drift * drift_sim + ) + + return { + 'proximity_ratio': round(prox_persist, 4), + 'drift_similarity': round(drift_sim, 4), + 'composite': round(composite, 4), + } + + +# ── Shadow 보너스 계산 ──────────────────────────────────────────── + +def compute_shadow_bonus( + vessel_positions_during_inactive: list[dict], + last_known_gear_center: tuple[float, float], + group_radius_nm: float, + params: ModelParams, +) -> tuple[float, bool, bool]: + """어구 비활성 동안 선박이 어구 근처에 머물렀는지 평가. + + Returns: (bonus, stayed_nearby, returned_before_resume) + """ + if not vessel_positions_during_inactive or last_known_gear_center is None: + return 0.0, False, False + + gc_lat, gc_lon = last_known_gear_center + threshold_nm = max(group_radius_nm * 2, params.proximity_threshold_nm) + + # 1. 평균 거리 + dists = [ + _haversine_nm(gc_lat, gc_lon, p['lat'], p['lon']) + for p in vessel_positions_during_inactive + ] + avg_dist = sum(dists) / len(dists) + stayed = avg_dist < threshold_nm + + # 2. 마지막 위치가 근처인지 (복귀 판단) + returned = dists[-1] < threshold_nm if dists else False + + bonus = 0.0 + if stayed: + bonus += params.shadow_stay_bonus + if returned: + bonus += params.shadow_return_bonus + + return bonus, stayed, returned + + +# ── 후보 필터링 ─────────────────────────────────────────────────── + +def _compute_group_radius(members: list[dict]) -> float: + """그룹 멤버 간 최대 거리의 절반 (NM).""" + if len(members) < 2: + return 1.0 # 최소 1NM + + max_dist = 0.0 + for i in range(len(members)): + for j in range(i + 1, len(members)): + d = _haversine_nm( + members[i]['lat'], members[i]['lon'], + members[j]['lat'], members[j]['lon'], + ) + if d > max_dist: + max_dist = d + + return max(1.0, max_dist / 2.0) + + +def find_candidates( + gear_center_lat: float, + gear_center_lon: float, + group_radius_nm: float, + group_mmsis: set[str], + all_positions: dict[str, dict], + params: ModelParams, +) -> list[str]: + """어구 그룹 주변 후보 MMSI 필터링.""" + search_radius = group_radius_nm * params.candidate_radius_factor + candidates = [] + + for mmsi, pos in all_positions.items(): + if mmsi in group_mmsis: + continue + d = _haversine_nm(gear_center_lat, gear_center_lon, pos['lat'], pos['lon']) + if d < search_radius: + candidates.append(mmsi) + + return candidates + + +# ── 메인 실행 ───────────────────────────────────────────────────── + +def _get_vessel_track(vessel_store, mmsi: str, hours: int = 6) -> list[dict]: + """vessel_store에서 특정 MMSI의 최근 N시간 궤적 추출 (벡터화).""" + df = vessel_store._tracks.get(mmsi) + if df is None or len(df) == 0: + return [] + + import pandas as pd + now = datetime.now(timezone.utc) + cutoff = now - pd.Timedelta(hours=hours) + + ts_col = df['timestamp'] + if hasattr(ts_col.dtype, 'tz') and ts_col.dtype.tz is not None: + mask = ts_col >= pd.Timestamp(cutoff) + else: + mask = ts_col >= pd.Timestamp(cutoff.replace(tzinfo=None)) + + recent = df.loc[mask] + if recent.empty: + return [] + + # 벡터화 추출 (iterrows 대신) + lats = recent['lat'].values + lons = recent['lon'].values + sogs = (recent['sog'] if 'sog' in recent.columns + else recent.get('raw_sog', pd.Series(dtype=float))).fillna(0).values + cogs = (recent['cog'] if 'cog' in recent.columns + else pd.Series(0, index=recent.index)).fillna(0).values + timestamps = recent['timestamp'].tolist() + + return [ + {'lat': float(lats[i]), 'lon': float(lons[i]), + 'sog': float(sogs[i]), 'cog': float(cogs[i]), 'timestamp': timestamps[i]} + for i in range(len(lats)) + ] + + +def _compute_gear_active_ratio( + gear_members: list[dict], + all_positions: dict[str, dict], + now: datetime, + stale_sec: float = 3600, +) -> float: + """어구 그룹의 활성 멤버 비율.""" + if not gear_members: + return 0.0 + + active = 0 + for m in gear_members: + pos = all_positions.get(m['mmsi']) + if pos is None: + continue + ts = pos.get('timestamp') + if ts is None: + continue + if isinstance(ts, datetime): + last_dt = ts if ts.tzinfo is not None else ts.replace(tzinfo=timezone.utc) + else: + try: + import pandas as pd + last_dt = pd.Timestamp(ts).to_pydatetime() + if last_dt.tzinfo is None: + last_dt = last_dt.replace(tzinfo=timezone.utc) + except Exception: + continue + age = (now - last_dt).total_seconds() + if age < stale_sec: + active += 1 + + return active / len(gear_members) + + +def _is_gear_pattern(name: str) -> bool: + """어구 이름 패턴 판별.""" + import re + return bool(re.match(r'^.+_\d+_\d*$', name or '')) + + +_MAX_CANDIDATES_PER_GROUP = 30 # 후보 수 상한 (성능 보호) + + +def run_gear_correlation( + vessel_store, + gear_groups: list[dict], + conn, +) -> dict: + """어구 연관성 분석 메인 실행 (배치 최적화). + + Args: + vessel_store: VesselStore 인스턴스 + gear_groups: detect_gear_groups() 결과 + conn: kcgdb 커넥션 + + Returns: + {'updated': int, 'models': int, 'raw_inserted': int} + """ + import time as _time + import re as _re + + _gear_re = _re.compile(r'^.+_(?=\S*\d)\S+(?:[_ ]\S*)*[_ ]*$|^.+%$|^\d+$') + + t0 = _time.time() + now = datetime.now(timezone.utc) + all_positions = vessel_store.get_all_latest_positions() + + # 활성 모델 로드 + models = _load_active_models(conn) + if not models: + logger.warning('no active correlation models found') + return {'updated': 0, 'models': 0, 'raw_inserted': 0} + + # 기존 점수 전체 사전 로드 (건별 쿼리 대신 벌크) + all_scores = _load_all_scores(conn) + + raw_batch: list[tuple] = [] + score_batch: list[tuple] = [] + total_updated = 0 + total_raw = 0 + processed_keys: set[tuple] = set() # (model_id, parent_name, sub_cluster_id, target_mmsi) + + default_params = models[0] + + for gear_group in gear_groups: + parent_name = gear_group['parent_name'] + sub_cluster_id = gear_group.get('sub_cluster_id', 0) + members = gear_group['members'] + if not members: + continue + + # 1h 활성 멤버 필터 (center/radius 계산용) + display_members = [ + m for m in members + if _get_time_bucket_age(m.get('mmsi'), all_positions, now) <= 3600 + ] + # fallback: < 2이면 time_bucket 최신 2개 유지 + if len(display_members) < 2 and len(members) >= 2: + display_members = sorted( + members, + key=lambda m: _get_time_bucket_age(m.get('mmsi'), all_positions, now), + )[:2] + active_members = display_members if len(display_members) >= 2 else members + + # 그룹 중심 + 반경 (1h 활성 멤버 기반) + center_lat = sum(m['lat'] for m in active_members) / len(active_members) + center_lon = sum(m['lon'] for m in active_members) / len(active_members) + group_radius = _compute_group_radius(active_members) + + # 어구 활성도 + active_ratio = _compute_gear_active_ratio(members, all_positions, now) + + # 그룹 멤버 MMSI 셋 + group_mmsis = {m['mmsi'] for m in members} + if gear_group.get('parent_mmsi'): + group_mmsis.add(gear_group['parent_mmsi']) + + # 후보 필터링 + 수 제한 + candidates = find_candidates( + center_lat, center_lon, group_radius, + group_mmsis, all_positions, default_params, + ) + if not candidates: + continue + if len(candidates) > _MAX_CANDIDATES_PER_GROUP: + # 가까운 순서로 제한 + candidates.sort(key=lambda m: _haversine_nm( + center_lat, center_lon, + all_positions[m]['lat'], all_positions[m]['lon'], + )) + candidates = candidates[:_MAX_CANDIDATES_PER_GROUP] + + for target_mmsi in candidates: + target_pos = all_positions.get(target_mmsi) + if target_pos is None: + continue + + target_name = target_pos.get('name', '') + target_is_gear = bool(_gear_re.match(target_name or '')) + target_type = 'GEAR_BUOY' if target_is_gear else 'VESSEL' + + # 메트릭 계산 (어구는 단순 거리, 선박은 track 기반) + if target_is_gear: + d = _haversine_nm(center_lat, center_lon, + target_pos['lat'], target_pos['lon']) + prox = max(0.0, 1.0 - d / 20.0) + metrics = {'proximity_ratio': prox, 'composite': prox} + else: + vessel_track = _get_vessel_track(vessel_store, target_mmsi, hours=6) + metrics = _compute_gear_vessel_metrics( + center_lat, center_lon, group_radius, + vessel_track, default_params, + ) + + # raw 메트릭 배치 수집 + raw_batch.append(( + now, parent_name, sub_cluster_id, target_mmsi, target_type, target_name, + metrics.get('proximity_ratio'), metrics.get('visit_score'), + metrics.get('activity_sync'), metrics.get('dtw_similarity'), + metrics.get('speed_correlation'), metrics.get('heading_coherence'), + metrics.get('drift_similarity'), False, False, active_ratio, + )) + total_raw += 1 + + # 모델별 EMA 업데이트 + for model in models: + if target_is_gear: + composite = metrics.get('proximity_ratio', 0) * model.w_prox_persist + else: + composite = ( + model.w_proximity * (metrics.get('proximity_ratio') or 0) + + model.w_visit * (metrics.get('visit_score') or 0) + + model.w_activity * (metrics.get('activity_sync') or 0) + ) + + # 사전 로드된 점수에서 조회 (DB 쿼리 없음) + score_key = (model.model_id, parent_name, sub_cluster_id, target_mmsi) + prev = all_scores.get(score_key) + prev_score = prev['current_score'] if prev else None + streak = prev['streak_count'] if prev else 0 + last_obs = prev['last_observed_at'] if prev else None + + new_score, new_streak, state = update_score( + prev_score, composite, streak, + last_obs, now, active_ratio, + 0.0, model, + ) + + processed_keys.add(score_key) + + if new_score >= model.track_threshold or prev is not None: + score_batch.append(( + model.model_id, parent_name, sub_cluster_id, target_mmsi, + target_type, target_name, + round(new_score, 6), new_streak, state, + now, now, now, + )) + total_updated += 1 + + # ── 반경 밖 이탈 선박 강제 감쇠 ────────────────────────────────── + # all_scores에 기록이 있지만 이번 사이클 후보에서 빠진 항목: + # 선박이 탐색 반경(group_radius × 3)을 완전히 벗어난 경우. + # Freeze 조건 무시하고 decay_fast 적용 → 빠르게 0으로 수렴. + for score_key, prev in all_scores.items(): + if score_key in processed_keys: + continue + prev_score = prev['current_score'] + if prev_score is None or prev_score <= 0: + continue + model_id, parent_name_s, sub_cluster_id_s, target_mmsi_s = score_key + # 해당 모델의 decay_fast 파라미터 사용 + model_params = next((m for m in models if m.model_id == model_id), default_params) + new_score = max(0.0, prev_score - model_params.decay_fast) + score_batch.append(( + model_id, parent_name_s, sub_cluster_id_s, target_mmsi_s, + prev.get('target_type', 'VESSEL'), prev.get('target_name', ''), + round(new_score, 6), 0, 'OUT_OF_RANGE', + prev.get('last_observed_at', now), now, now, + )) + total_updated += 1 + + # 배치 DB 저장 + _batch_insert_raw(conn, raw_batch) + _batch_upsert_scores(conn, score_batch) + conn.commit() + + elapsed = round(_time.time() - t0, 2) + logger.info( + 'gear correlation internals: %.2fs, %d groups, %d raw, %d scores, %d models', + elapsed, len(gear_groups), total_raw, total_updated, len(models), + ) + + return { + 'updated': total_updated, + 'models': len(models), + 'raw_inserted': total_raw, + } + + +# ── DB 헬퍼 (배치 최적화) ───────────────────────────────────────── + +def _load_active_models(conn) -> list[ModelParams]: + """활성 모델 로드.""" + cur = conn.cursor() + try: + cur.execute( + f"SELECT id, name, params FROM {CORRELATION_PARAM_MODELS} " + "WHERE is_active = TRUE ORDER BY is_default DESC, id ASC" + ) + rows = cur.fetchall() + models = [] + for row in rows: + import json + params = row[2] if isinstance(row[2], dict) else json.loads(row[2]) + models.append(ModelParams.from_db_row({ + 'id': row[0], 'name': row[1], 'params': params, + })) + return models + except Exception as e: + logger.error('failed to load models: %s', e) + return [ModelParams()] + finally: + cur.close() + + +def _load_all_scores(conn) -> dict[tuple, dict]: + """모든 점수를 사전 로드. {(model_id, group_key, sub_cluster_id, target_mmsi): {...}}""" + cur = conn.cursor() + try: + cur.execute( + "SELECT model_id, group_key, sub_cluster_id, target_mmsi, " + "current_score, streak_count, last_observed_at, " + "target_type, target_name " + f"FROM {GEAR_CORRELATION_SCORES}" + ) + result = {} + for row in cur.fetchall(): + key = (row[0], row[1], row[2], row[3]) + result[key] = { + 'current_score': row[4], + 'streak_count': row[5], + 'last_observed_at': row[6], + 'target_type': row[7], + 'target_name': row[8], + } + return result + except Exception as e: + logger.warning('failed to load all scores: %s', e) + return {} + finally: + cur.close() + + +def _batch_insert_raw(conn, batch: list[tuple]): + """raw 메트릭 배치 INSERT.""" + if not batch: + return + cur = conn.cursor() + try: + from psycopg2.extras import execute_values + execute_values( + cur, + f"""INSERT INTO {GEAR_CORRELATION_RAW_METRICS} + (observed_at, group_key, sub_cluster_id, target_mmsi, target_type, target_name, + proximity_ratio, visit_score, activity_sync, + dtw_similarity, speed_correlation, heading_coherence, + drift_similarity, shadow_stay, shadow_return, + gear_group_active_ratio) + VALUES %s""", + batch, + page_size=500, + ) + except Exception as e: + logger.warning('batch insert raw failed: %s', e) + finally: + cur.close() + + +def _batch_upsert_scores(conn, batch: list[tuple]): + """점수 배치 UPSERT.""" + if not batch: + return + cur = conn.cursor() + try: + from psycopg2.extras import execute_values + execute_values( + cur, + f"""INSERT INTO {GEAR_CORRELATION_SCORES} + (model_id, group_key, sub_cluster_id, target_mmsi, target_type, target_name, + current_score, streak_count, freeze_state, + first_observed_at, last_observed_at, updated_at) + VALUES %s + ON CONFLICT (model_id, group_key, sub_cluster_id, target_mmsi) + DO UPDATE SET + target_type = EXCLUDED.target_type, + target_name = EXCLUDED.target_name, + current_score = EXCLUDED.current_score, + streak_count = EXCLUDED.streak_count, + freeze_state = EXCLUDED.freeze_state, + observation_count = {GEAR_CORRELATION_SCORES}.observation_count + 1, + last_observed_at = EXCLUDED.last_observed_at, + updated_at = EXCLUDED.updated_at""", + batch, + page_size=500, + ) + except Exception as e: + logger.warning('batch upsert scores failed: %s', e) + finally: + cur.close() diff --git a/prediction/algorithms/gear_name_rules.py b/prediction/algorithms/gear_name_rules.py new file mode 100644 index 0000000..903edf1 --- /dev/null +++ b/prediction/algorithms/gear_name_rules.py @@ -0,0 +1,19 @@ +"""어구 parent name 정규화/필터 규칙.""" + +from __future__ import annotations + +from typing import Optional + +_TRACKABLE_PARENT_MIN_LENGTH = 4 +_REMOVE_TOKENS = (' ', '_', '-', '%') + + +def normalize_parent_name(name: Optional[str]) -> str: + value = (name or '').upper().strip() + for token in _REMOVE_TOKENS: + value = value.replace(token, '') + return value + + +def is_trackable_parent_name(name: Optional[str]) -> bool: + return len(normalize_parent_name(name)) >= _TRACKABLE_PARENT_MIN_LENGTH diff --git a/prediction/algorithms/gear_parent_episode.py b/prediction/algorithms/gear_parent_episode.py new file mode 100644 index 0000000..333d982 --- /dev/null +++ b/prediction/algorithms/gear_parent_episode.py @@ -0,0 +1,631 @@ +"""어구 모선 추론 episode continuity + prior bonus helper.""" + +from __future__ import annotations + +import json +import math +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Iterable, Optional +from uuid import uuid4 + +from config import qualified_table + +GEAR_GROUP_EPISODES = qualified_table('gear_group_episodes') +GEAR_GROUP_EPISODE_SNAPSHOTS = qualified_table('gear_group_episode_snapshots') +GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS = qualified_table('gear_group_parent_candidate_snapshots') +GEAR_PARENT_LABEL_SESSIONS = qualified_table('gear_parent_label_sessions') + +_ACTIVE_EPISODE_WINDOW_HOURS = 6 +_EPISODE_PRIOR_WINDOW_HOURS = 24 +_LINEAGE_PRIOR_WINDOW_DAYS = 7 +_LABEL_PRIOR_WINDOW_DAYS = 30 +_CONTINUITY_SCORE_THRESHOLD = 0.45 +_MERGE_SCORE_THRESHOLD = 0.35 +_CENTER_DISTANCE_THRESHOLD_NM = 12.0 +_EPISODE_PRIOR_MAX = 0.05 +_LINEAGE_PRIOR_MAX = 0.03 +_LABEL_PRIOR_MAX = 0.07 +_TOTAL_PRIOR_CAP = 0.10 + + +def _clamp(value: float, floor: float = 0.0, ceil: float = 1.0) -> float: + return max(floor, min(ceil, value)) + + +def _haversine_nm(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + earth_radius_nm = 3440.065 + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2 + return earth_radius_nm * 2 * math.atan2(math.sqrt(a), math.sqrt(max(0.0, 1 - a))) + + +def _json_list(value: Any) -> list[str]: + if value is None: + return [] + if isinstance(value, list): + return [str(item) for item in value if item] + try: + parsed = json.loads(value) + except Exception: + return [] + if isinstance(parsed, list): + return [str(item) for item in parsed if item] + return [] + + +@dataclass +class GroupEpisodeInput: + group_key: str + normalized_parent_name: str + sub_cluster_id: int + member_mmsis: list[str] + member_count: int + center_lat: float + center_lon: float + + @property + def key(self) -> tuple[str, int]: + return (self.group_key, self.sub_cluster_id) + + +@dataclass +class EpisodeState: + episode_id: str + lineage_key: str + group_key: str + normalized_parent_name: str + current_sub_cluster_id: int + member_mmsis: list[str] + member_count: int + center_lat: float + center_lon: float + last_snapshot_time: datetime + status: str + + +@dataclass +class EpisodeAssignment: + group_key: str + sub_cluster_id: int + normalized_parent_name: str + episode_id: str + continuity_source: str + continuity_score: float + split_from_episode_id: Optional[str] + merged_from_episode_ids: list[str] + member_mmsis: list[str] + member_count: int + center_lat: float + center_lon: float + + @property + def key(self) -> tuple[str, int]: + return (self.group_key, self.sub_cluster_id) + + +@dataclass +class EpisodePlan: + assignments: dict[tuple[str, int], EpisodeAssignment] + expired_episode_ids: set[str] + merged_episode_targets: dict[str, str] + + +def _member_jaccard(left: Iterable[str], right: Iterable[str]) -> tuple[float, int]: + left_set = {item for item in left if item} + right_set = {item for item in right if item} + if not left_set and not right_set: + return 0.0, 0 + overlap = len(left_set & right_set) + union = len(left_set | right_set) + return (overlap / union if union else 0.0), overlap + + +def continuity_score(current: GroupEpisodeInput, previous: EpisodeState) -> tuple[float, int, float]: + jaccard, overlap_count = _member_jaccard(current.member_mmsis, previous.member_mmsis) + distance_nm = _haversine_nm(current.center_lat, current.center_lon, previous.center_lat, previous.center_lon) + center_support = _clamp(1.0 - (distance_nm / _CENTER_DISTANCE_THRESHOLD_NM)) + score = _clamp((0.75 * jaccard) + (0.25 * center_support)) + return round(score, 6), overlap_count, round(distance_nm, 3) + + +def load_active_episode_states(conn, lineage_keys: list[str]) -> dict[str, list[EpisodeState]]: + if not lineage_keys: + return {} + + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT episode_id, lineage_key, group_key, normalized_parent_name, + current_sub_cluster_id, current_member_mmsis, current_member_count, + ST_Y(current_center_point) AS center_lat, + ST_X(current_center_point) AS center_lon, + last_snapshot_time, status + FROM {GEAR_GROUP_EPISODES} + WHERE lineage_key = ANY(%s) + AND status = 'ACTIVE' + AND last_snapshot_time >= NOW() - (%s * INTERVAL '1 hour') + ORDER BY lineage_key, last_snapshot_time DESC, episode_id ASC + """, + (lineage_keys, _ACTIVE_EPISODE_WINDOW_HOURS), + ) + result: dict[str, list[EpisodeState]] = {} + for row in cur.fetchall(): + state = EpisodeState( + episode_id=row[0], + lineage_key=row[1], + group_key=row[2], + normalized_parent_name=row[3], + current_sub_cluster_id=int(row[4] or 0), + member_mmsis=_json_list(row[5]), + member_count=int(row[6] or 0), + center_lat=float(row[7] or 0.0), + center_lon=float(row[8] or 0.0), + last_snapshot_time=row[9], + status=row[10], + ) + result.setdefault(state.lineage_key, []).append(state) + return result + finally: + cur.close() + + +def group_to_episode_input(group: dict[str, Any], normalized_parent_name: str) -> GroupEpisodeInput: + members = group.get('members') or [] + member_mmsis = sorted({str(member.get('mmsi')) for member in members if member.get('mmsi')}) + member_count = len(member_mmsis) + if members: + center_lat = sum(float(member['lat']) for member in members) / len(members) + center_lon = sum(float(member['lon']) for member in members) / len(members) + else: + center_lat = 0.0 + center_lon = 0.0 + return GroupEpisodeInput( + group_key=group['parent_name'], + normalized_parent_name=normalized_parent_name, + sub_cluster_id=int(group.get('sub_cluster_id', 0)), + member_mmsis=member_mmsis, + member_count=member_count, + center_lat=center_lat, + center_lon=center_lon, + ) + + +def build_episode_plan( + groups: list[GroupEpisodeInput], + previous_by_lineage: dict[str, list[EpisodeState]], +) -> EpisodePlan: + assignments: dict[tuple[str, int], EpisodeAssignment] = {} + expired_episode_ids: set[str] = set() + merged_episode_targets: dict[str, str] = {} + + groups_by_lineage: dict[str, list[GroupEpisodeInput]] = {} + for group in groups: + groups_by_lineage.setdefault(group.normalized_parent_name, []).append(group) + + for lineage_key, current_groups in groups_by_lineage.items(): + previous_groups = previous_by_lineage.get(lineage_key, []) + qualified_matches: dict[tuple[str, int], list[tuple[EpisodeState, float, int, float]]] = {} + prior_to_currents: dict[str, list[tuple[GroupEpisodeInput, float, int, float]]] = {} + + for current in current_groups: + for previous in previous_groups: + score, overlap_count, distance_nm = continuity_score(current, previous) + if score >= _CONTINUITY_SCORE_THRESHOLD or ( + overlap_count > 0 and distance_nm <= _CENTER_DISTANCE_THRESHOLD_NM + ): + qualified_matches.setdefault(current.key, []).append((previous, score, overlap_count, distance_nm)) + prior_to_currents.setdefault(previous.episode_id, []).append((current, score, overlap_count, distance_nm)) + + consumed_previous_ids: set[str] = set() + assigned_current_keys: set[tuple[str, int]] = set() + + for current in current_groups: + matches = sorted( + qualified_matches.get(current.key, []), + key=lambda item: (item[1], item[2], -item[3], item[0].last_snapshot_time), + reverse=True, + ) + merge_candidates = [ + item for item in matches + if item[1] >= _MERGE_SCORE_THRESHOLD + ] + if len(merge_candidates) >= 2: + episode_id = f"ep-{uuid4().hex[:12]}" + merged_ids = [item[0].episode_id for item in merge_candidates] + assignments[current.key] = EpisodeAssignment( + group_key=current.group_key, + sub_cluster_id=current.sub_cluster_id, + normalized_parent_name=current.normalized_parent_name, + episode_id=episode_id, + continuity_source='MERGE_NEW', + continuity_score=round(max(item[1] for item in merge_candidates), 6), + split_from_episode_id=None, + merged_from_episode_ids=merged_ids, + member_mmsis=current.member_mmsis, + member_count=current.member_count, + center_lat=current.center_lat, + center_lon=current.center_lon, + ) + assigned_current_keys.add(current.key) + for merged_id in merged_ids: + consumed_previous_ids.add(merged_id) + merged_episode_targets[merged_id] = episode_id + + previous_ranked = sorted( + previous_groups, + key=lambda item: item.last_snapshot_time, + reverse=True, + ) + for previous in previous_ranked: + if previous.episode_id in consumed_previous_ids: + continue + matches = [ + item for item in prior_to_currents.get(previous.episode_id, []) + if item[0].key not in assigned_current_keys + ] + if not matches: + continue + matches.sort(key=lambda item: (item[1], item[2], -item[3]), reverse=True) + current, score, _, _ = matches[0] + split_candidate_count = len(prior_to_currents.get(previous.episode_id, [])) + assignments[current.key] = EpisodeAssignment( + group_key=current.group_key, + sub_cluster_id=current.sub_cluster_id, + normalized_parent_name=current.normalized_parent_name, + episode_id=previous.episode_id, + continuity_source='SPLIT_CONTINUE' if split_candidate_count > 1 else 'CONTINUED', + continuity_score=score, + split_from_episode_id=None, + merged_from_episode_ids=[], + member_mmsis=current.member_mmsis, + member_count=current.member_count, + center_lat=current.center_lat, + center_lon=current.center_lon, + ) + assigned_current_keys.add(current.key) + consumed_previous_ids.add(previous.episode_id) + + for current in current_groups: + if current.key in assigned_current_keys: + continue + + matches = sorted( + qualified_matches.get(current.key, []), + key=lambda item: (item[1], item[2], -item[3], item[0].last_snapshot_time), + reverse=True, + ) + split_from_episode_id = None + continuity_source = 'NEW' + continuity_score_value = 0.0 + if matches: + best_previous, score, _, _ = matches[0] + split_from_episode_id = best_previous.episode_id + continuity_source = 'SPLIT_NEW' + continuity_score_value = score + + assignments[current.key] = EpisodeAssignment( + group_key=current.group_key, + sub_cluster_id=current.sub_cluster_id, + normalized_parent_name=current.normalized_parent_name, + episode_id=f"ep-{uuid4().hex[:12]}", + continuity_source=continuity_source, + continuity_score=continuity_score_value, + split_from_episode_id=split_from_episode_id, + merged_from_episode_ids=[], + member_mmsis=current.member_mmsis, + member_count=current.member_count, + center_lat=current.center_lat, + center_lon=current.center_lon, + ) + assigned_current_keys.add(current.key) + + current_previous_ids = {assignment.episode_id for assignment in assignments.values() if assignment.normalized_parent_name == lineage_key} + for previous in previous_groups: + if previous.episode_id in merged_episode_targets: + continue + if previous.episode_id not in current_previous_ids: + expired_episode_ids.add(previous.episode_id) + + return EpisodePlan( + assignments=assignments, + expired_episode_ids=expired_episode_ids, + merged_episode_targets=merged_episode_targets, + ) + + +def load_episode_prior_stats(conn, episode_ids: list[str]) -> dict[tuple[str, str], dict[str, Any]]: + if not episode_ids: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT episode_id, candidate_mmsi, + COUNT(*) AS seen_count, + SUM(CASE WHEN rank = 1 THEN 1 ELSE 0 END) AS top1_count, + AVG(final_score) AS avg_score, + MAX(observed_at) AS last_seen_at + FROM {GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS} + WHERE episode_id = ANY(%s) + AND observed_at >= NOW() - (%s * INTERVAL '1 hour') + GROUP BY episode_id, candidate_mmsi + """, + (episode_ids, _EPISODE_PRIOR_WINDOW_HOURS), + ) + result: dict[tuple[str, str], dict[str, Any]] = {} + for episode_id, candidate_mmsi, seen_count, top1_count, avg_score, last_seen_at in cur.fetchall(): + result[(episode_id, candidate_mmsi)] = { + 'seen_count': int(seen_count or 0), + 'top1_count': int(top1_count or 0), + 'avg_score': float(avg_score or 0.0), + 'last_seen_at': last_seen_at, + } + return result + finally: + cur.close() + + +def load_lineage_prior_stats(conn, lineage_keys: list[str]) -> dict[tuple[str, str], dict[str, Any]]: + if not lineage_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT normalized_parent_name, candidate_mmsi, + COUNT(*) AS seen_count, + SUM(CASE WHEN rank = 1 THEN 1 ELSE 0 END) AS top1_count, + SUM(CASE WHEN rank <= 3 THEN 1 ELSE 0 END) AS top3_count, + AVG(final_score) AS avg_score, + MAX(observed_at) AS last_seen_at + FROM {GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS} + WHERE normalized_parent_name = ANY(%s) + AND observed_at >= NOW() - (%s * INTERVAL '1 day') + GROUP BY normalized_parent_name, candidate_mmsi + """, + (lineage_keys, _LINEAGE_PRIOR_WINDOW_DAYS), + ) + result: dict[tuple[str, str], dict[str, Any]] = {} + for lineage_key, candidate_mmsi, seen_count, top1_count, top3_count, avg_score, last_seen_at in cur.fetchall(): + result[(lineage_key, candidate_mmsi)] = { + 'seen_count': int(seen_count or 0), + 'top1_count': int(top1_count or 0), + 'top3_count': int(top3_count or 0), + 'avg_score': float(avg_score or 0.0), + 'last_seen_at': last_seen_at, + } + return result + finally: + cur.close() + + +def load_label_prior_stats(conn, lineage_keys: list[str]) -> dict[tuple[str, str], dict[str, Any]]: + if not lineage_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT normalized_parent_name, label_parent_mmsi, + COUNT(*) AS session_count, + MAX(active_from) AS last_labeled_at + FROM {GEAR_PARENT_LABEL_SESSIONS} + WHERE normalized_parent_name = ANY(%s) + AND active_from >= NOW() - (%s * INTERVAL '1 day') + GROUP BY normalized_parent_name, label_parent_mmsi + """, + (lineage_keys, _LABEL_PRIOR_WINDOW_DAYS), + ) + result: dict[tuple[str, str], dict[str, Any]] = {} + for lineage_key, candidate_mmsi, session_count, last_labeled_at in cur.fetchall(): + result[(lineage_key, candidate_mmsi)] = { + 'session_count': int(session_count or 0), + 'last_labeled_at': last_labeled_at, + } + return result + finally: + cur.close() + + +def _recency_support(observed_at: Optional[datetime], now: datetime, hours: float) -> float: + if observed_at is None: + return 0.0 + if observed_at.tzinfo is None: + observed_at = observed_at.replace(tzinfo=timezone.utc) + delta_hours = max(0.0, (now - observed_at.astimezone(timezone.utc)).total_seconds() / 3600.0) + return _clamp(1.0 - (delta_hours / hours)) + + +def compute_prior_bonus_components( + observed_at: datetime, + normalized_parent_name: str, + episode_id: str, + candidate_mmsi: str, + episode_prior_stats: dict[tuple[str, str], dict[str, Any]], + lineage_prior_stats: dict[tuple[str, str], dict[str, Any]], + label_prior_stats: dict[tuple[str, str], dict[str, Any]], +) -> dict[str, float]: + episode_stats = episode_prior_stats.get((episode_id, candidate_mmsi), {}) + lineage_stats = lineage_prior_stats.get((normalized_parent_name, candidate_mmsi), {}) + label_stats = label_prior_stats.get((normalized_parent_name, candidate_mmsi), {}) + + episode_bonus = 0.0 + if episode_stats: + episode_bonus = _EPISODE_PRIOR_MAX * ( + 0.35 * min(1.0, episode_stats.get('seen_count', 0) / 6.0) + + 0.35 * min(1.0, episode_stats.get('top1_count', 0) / 3.0) + + 0.15 * _clamp(float(episode_stats.get('avg_score', 0.0))) + + 0.15 * _recency_support(episode_stats.get('last_seen_at'), observed_at, _EPISODE_PRIOR_WINDOW_HOURS) + ) + + lineage_bonus = 0.0 + if lineage_stats: + lineage_bonus = _LINEAGE_PRIOR_MAX * ( + 0.30 * min(1.0, lineage_stats.get('seen_count', 0) / 12.0) + + 0.25 * min(1.0, lineage_stats.get('top3_count', 0) / 6.0) + + 0.20 * min(1.0, lineage_stats.get('top1_count', 0) / 3.0) + + 0.15 * _clamp(float(lineage_stats.get('avg_score', 0.0))) + + 0.10 * _recency_support(lineage_stats.get('last_seen_at'), observed_at, _LINEAGE_PRIOR_WINDOW_DAYS * 24.0) + ) + + label_bonus = 0.0 + if label_stats: + label_bonus = _LABEL_PRIOR_MAX * ( + 0.70 * min(1.0, label_stats.get('session_count', 0) / 3.0) + + 0.30 * _recency_support(label_stats.get('last_labeled_at'), observed_at, _LABEL_PRIOR_WINDOW_DAYS * 24.0) + ) + + total = min(_TOTAL_PRIOR_CAP, episode_bonus + lineage_bonus + label_bonus) + return { + 'episodePriorBonus': round(episode_bonus, 6), + 'lineagePriorBonus': round(lineage_bonus, 6), + 'labelPriorBonus': round(label_bonus, 6), + 'priorBonusTotal': round(total, 6), + } + + +def sync_episode_states(conn, observed_at: datetime, plan: EpisodePlan) -> None: + cur = conn.cursor() + try: + if plan.expired_episode_ids: + cur.execute( + f""" + UPDATE {GEAR_GROUP_EPISODES} + SET status = 'EXPIRED', + updated_at = %s + WHERE episode_id = ANY(%s) + """, + (observed_at, list(plan.expired_episode_ids)), + ) + + for previous_episode_id, merged_into_episode_id in plan.merged_episode_targets.items(): + cur.execute( + f""" + UPDATE {GEAR_GROUP_EPISODES} + SET status = 'MERGED', + merged_into_episode_id = %s, + updated_at = %s + WHERE episode_id = %s + """, + (merged_into_episode_id, observed_at, previous_episode_id), + ) + + for assignment in plan.assignments.values(): + cur.execute( + f""" + INSERT INTO {GEAR_GROUP_EPISODES} ( + episode_id, lineage_key, group_key, normalized_parent_name, + current_sub_cluster_id, status, continuity_source, continuity_score, + first_seen_at, last_seen_at, last_snapshot_time, + current_member_count, current_member_mmsis, current_center_point, + split_from_episode_id, merged_from_episode_ids, metadata, updated_at + ) VALUES ( + %s, %s, %s, %s, + %s, 'ACTIVE', %s, %s, + %s, %s, %s, + %s, %s::jsonb, ST_SetSRID(ST_MakePoint(%s, %s), 4326), + %s, %s::jsonb, '{{}}'::jsonb, %s + ) + ON CONFLICT (episode_id) + DO UPDATE SET + group_key = EXCLUDED.group_key, + normalized_parent_name = EXCLUDED.normalized_parent_name, + current_sub_cluster_id = EXCLUDED.current_sub_cluster_id, + status = 'ACTIVE', + continuity_source = EXCLUDED.continuity_source, + continuity_score = EXCLUDED.continuity_score, + last_seen_at = EXCLUDED.last_seen_at, + last_snapshot_time = EXCLUDED.last_snapshot_time, + current_member_count = EXCLUDED.current_member_count, + current_member_mmsis = EXCLUDED.current_member_mmsis, + current_center_point = EXCLUDED.current_center_point, + split_from_episode_id = COALESCE(EXCLUDED.split_from_episode_id, {GEAR_GROUP_EPISODES}.split_from_episode_id), + merged_from_episode_ids = EXCLUDED.merged_from_episode_ids, + updated_at = EXCLUDED.updated_at + """, + ( + assignment.episode_id, + assignment.normalized_parent_name, + assignment.group_key, + assignment.normalized_parent_name, + assignment.sub_cluster_id, + assignment.continuity_source, + assignment.continuity_score, + observed_at, + observed_at, + observed_at, + assignment.member_count, + json.dumps(assignment.member_mmsis, ensure_ascii=False), + assignment.center_lon, + assignment.center_lat, + assignment.split_from_episode_id, + json.dumps(assignment.merged_from_episode_ids, ensure_ascii=False), + observed_at, + ), + ) + finally: + cur.close() + + +def insert_episode_snapshots( + conn, + observed_at: datetime, + plan: EpisodePlan, + snapshot_payloads: dict[tuple[str, int], dict[str, Any]], +) -> int: + if not snapshot_payloads: + return 0 + rows: list[tuple[Any, ...]] = [] + for key, payload in snapshot_payloads.items(): + assignment = plan.assignments.get(key) + if assignment is None: + continue + rows.append(( + assignment.episode_id, + assignment.normalized_parent_name, + assignment.group_key, + assignment.normalized_parent_name, + assignment.sub_cluster_id, + observed_at, + assignment.member_count, + json.dumps(assignment.member_mmsis, ensure_ascii=False), + assignment.center_lon, + assignment.center_lat, + assignment.continuity_source, + assignment.continuity_score, + json.dumps(payload.get('parentEpisodeIds') or assignment.merged_from_episode_ids, ensure_ascii=False), + payload.get('topCandidateMmsi'), + payload.get('topCandidateScore'), + payload.get('resolutionStatus'), + json.dumps(payload.get('metadata') or {}, ensure_ascii=False), + )) + + if not rows: + return 0 + + cur = conn.cursor() + try: + from psycopg2.extras import execute_values + execute_values( + cur, + f""" + INSERT INTO {GEAR_GROUP_EPISODE_SNAPSHOTS} ( + episode_id, lineage_key, group_key, normalized_parent_name, sub_cluster_id, + observed_at, member_count, member_mmsis, center_point, + continuity_source, continuity_score, parent_episode_ids, + top_candidate_mmsi, top_candidate_score, resolution_status, metadata + ) VALUES %s + ON CONFLICT (episode_id, observed_at) DO NOTHING + """, + rows, + template="(%s, %s, %s, %s, %s, %s, %s, %s::jsonb, ST_SetSRID(ST_MakePoint(%s, %s), 4326), %s, %s, %s::jsonb, %s, %s, %s, %s::jsonb)", + page_size=200, + ) + return len(rows) + finally: + cur.close() diff --git a/prediction/algorithms/gear_parent_inference.py b/prediction/algorithms/gear_parent_inference.py new file mode 100644 index 0000000..00e37f4 --- /dev/null +++ b/prediction/algorithms/gear_parent_inference.py @@ -0,0 +1,1477 @@ +"""어구 그룹 대표 모선 추론.""" + +from __future__ import annotations + +import json +import logging +import math +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Any, Optional + +from algorithms.gear_correlation import _get_vessel_track +from algorithms.gear_parent_episode import ( + build_episode_plan, + compute_prior_bonus_components, + group_to_episode_input, + insert_episode_snapshots, + load_active_episode_states, + load_episode_prior_stats, + load_label_prior_stats, + load_lineage_prior_stats, + sync_episode_states, +) +from algorithms.gear_name_rules import is_trackable_parent_name, normalize_parent_name +from algorithms.track_similarity import compute_track_similarity_v2, _resample_temporal, haversine_m + +_KST = timezone(timedelta(hours=9)) + + +def _to_epoch_ms(ts) -> int: + """timestamp를 epoch_ms로 변환. tz-naive는 KST로 간주.""" + if hasattr(ts, 'timestamp'): + if hasattr(ts, 'tzinfo') and ts.tzinfo is not None: + return int(ts.timestamp() * 1000) + # tz-naive → KST wall-clock으로 간주 + import pandas as pd + if isinstance(ts, pd.Timestamp): + return int(ts.tz_localize(_KST).timestamp() * 1000) + return int(ts.replace(tzinfo=_KST).timestamp() * 1000) + return int(ts) +from config import qualified_table + +logger = logging.getLogger(__name__) + +FLEET_VESSELS = qualified_table('fleet_vessels') +GROUP_POLYGON_SNAPSHOTS = qualified_table('group_polygon_snapshots') +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +GEAR_CORRELATION_RAW_METRICS = qualified_table('gear_correlation_raw_metrics') +CORRELATION_PARAM_MODELS = qualified_table('correlation_param_models') +GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS = qualified_table('gear_group_parent_candidate_snapshots') +GEAR_GROUP_PARENT_RESOLUTION = qualified_table('gear_group_parent_resolution') +GEAR_PARENT_CANDIDATE_EXCLUSIONS = qualified_table('gear_parent_candidate_exclusions') +GEAR_PARENT_LABEL_SESSIONS = qualified_table('gear_parent_label_sessions') +GEAR_PARENT_LABEL_TRACKING_CYCLES = qualified_table('gear_parent_label_tracking_cycles') + +_SHORT_NAME_STATUS = 'SKIPPED_SHORT_NAME' +_NO_CANDIDATE_STATUS = 'NO_CANDIDATE' +_MANUAL_CONFIRMED_STATUS = 'MANUAL_CONFIRMED' +_AUTO_PROMOTED_STATUS = 'AUTO_PROMOTED' +_REVIEW_REQUIRED_STATUS = 'REVIEW_REQUIRED' +_UNRESOLVED_STATUS = 'UNRESOLVED' +_DIRECT_PARENT_MATCH_STATUS = 'DIRECT_PARENT_MATCH' +_REJECT_COOLDOWN_HOURS = 24 +_MAX_CORRELATION_CANDIDATES = 5 +_MIN_AUTO_PROMOTION_STABLE_CYCLES = 3 +_MIN_AUTO_PROMOTION_SCORE = 0.72 +_MIN_AUTO_PROMOTION_MARGIN = 0.15 +_MIN_REVIEW_REQUIRED_SCORE = 0.60 +_MIN_PREFIX_BONUS_SCORE = 0.50 +_CHINA_MMSI_PREFIX_BONUS = 0.05 +_CHINA_MMSI_PREFIXES = ('412', '413') +_TRACK_SUPPORT_POINT_TARGET = 12 +_TRACK_SUPPORT_SPAN_TARGET_MINUTES = 90.0 +_VISIT_SUPPORT_POINT_TARGET = 8 +_VISIT_SUPPORT_SPAN_TARGET_MINUTES = 60.0 +_ACTIVITY_SUPPORT_POINT_TARGET = 12 +_ACTIVITY_SUPPORT_SPAN_TARGET_MINUTES = 90.0 +_VISIT_ZONE_THRESHOLD_NM = 5.0 +_RAW_SCORE_WINDOW_HOURS = 6 + + +@dataclass +class RegistryVessel: + vessel_id: int + mmsi: str + name_cn: str + name_en: str + + +@dataclass +class CandidateScore: + mmsi: str + name: str + vessel_id: Optional[int] + target_type: str + candidate_source: str + base_corr_score: float + name_match_score: float + track_similarity_score: float + visit_score_6h: float + proximity_score_6h: float + activity_sync_score_6h: float + stability_score: float + registry_bonus: float + episode_prior_bonus: float + lineage_prior_bonus: float + label_prior_bonus: float + final_score: float + streak_count: int + model_id: int + model_name: str + evidence: dict[str, Any] + + +def _clamp(value: float, floor: float = 0.0, ceil: float = 1.0) -> float: + return max(floor, min(ceil, value)) + + +def _china_mmsi_prefix_bonus(mmsi: str, pre_bonus_score: float) -> float: + if pre_bonus_score < _MIN_PREFIX_BONUS_SCORE: + return 0.0 + if any((mmsi or '').startswith(prefix) for prefix in _CHINA_MMSI_PREFIXES): + return _CHINA_MMSI_PREFIX_BONUS + return 0.0 + + +def _apply_final_score_bonus(mmsi: str, weighted_score: float) -> tuple[float, float, float]: + pre_bonus_score = _clamp(weighted_score) + china_mmsi_bonus = _china_mmsi_prefix_bonus(mmsi, pre_bonus_score) + final_score = _clamp(weighted_score + china_mmsi_bonus) + return pre_bonus_score, china_mmsi_bonus, final_score + + +def _to_aware_utc(value: Any) -> Optional[datetime]: + if value is None: + return None + if isinstance(value, datetime): + if value.tzinfo is None: + return value.replace(tzinfo=timezone.utc) + return value.astimezone(timezone.utc) + try: + parsed = datetime.fromisoformat(str(value)) + except Exception: + return None + if parsed.tzinfo is None: + return parsed.replace(tzinfo=timezone.utc) + return parsed.astimezone(timezone.utc) + + +def _span_minutes(timestamps: list[datetime]) -> float: + if len(timestamps) < 2: + return 0.0 + return max(0.0, (timestamps[-1] - timestamps[0]).total_seconds() / 60.0) + + +def _support_factor(point_count: int, span_minutes: float, point_target: int, span_target_minutes: float) -> float: + if point_count <= 0 or span_minutes <= 0: + return 0.0 + point_support = min(1.0, point_count / max(point_target, 1)) + span_support = min(1.0, span_minutes / max(span_target_minutes, 1.0)) + return _clamp(math.sqrt(point_support * span_support)) + + +def _haversine_nm(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + earth_radius_nm = 3440.065 + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2 + return earth_radius_nm * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _build_track_coverage_metrics( + center_track: list[dict[str, Any]], + vessel_track: list[dict[str, Any]], + gear_center_lat: float, + gear_center_lon: float, +) -> dict[str, float | int]: + vessel_timestamps = sorted( + ts for ts in (_to_aware_utc(point.get('timestamp')) for point in vessel_track) + if ts is not None + ) + center_timestamps = sorted( + ts for ts in (_to_aware_utc(point.get('timestamp')) for point in center_track) + if ts is not None + ) + + track_point_count = len(vessel_track) + track_span_minutes = _span_minutes(vessel_timestamps) + center_point_count = len(center_track) + center_span_minutes = _span_minutes(center_timestamps) + + overlap_points: list[dict[str, Any]] = vessel_track + if vessel_timestamps and center_timestamps: + overlap_start = center_timestamps[0] + overlap_end = center_timestamps[-1] + overlap_points = [ + point for point in vessel_track + if (ts := _to_aware_utc(point.get('timestamp'))) is not None and overlap_start <= ts <= overlap_end + ] + overlap_timestamps = sorted( + ts for ts in (_to_aware_utc(point.get('timestamp')) for point in overlap_points) + if ts is not None + ) + overlap_point_count = len(overlap_points) + overlap_span_minutes = _span_minutes(overlap_timestamps) + + in_zone_points = [ + point for point in overlap_points + if _haversine_nm(gear_center_lat, gear_center_lon, float(point['lat']), float(point['lon'])) < _VISIT_ZONE_THRESHOLD_NM + ] + in_zone_timestamps = sorted( + ts for ts in (_to_aware_utc(point.get('timestamp')) for point in in_zone_points) + if ts is not None + ) + in_zone_point_count = len(in_zone_points) + in_zone_span_minutes = _span_minutes(in_zone_timestamps) + + track_coverage_factor = _support_factor( + track_point_count, + track_span_minutes, + _TRACK_SUPPORT_POINT_TARGET, + _TRACK_SUPPORT_SPAN_TARGET_MINUTES, + ) + visit_coverage_factor = _support_factor( + in_zone_point_count, + in_zone_span_minutes, + _VISIT_SUPPORT_POINT_TARGET, + _VISIT_SUPPORT_SPAN_TARGET_MINUTES, + ) + activity_coverage_factor = _support_factor( + in_zone_point_count, + in_zone_span_minutes, + _ACTIVITY_SUPPORT_POINT_TARGET, + _ACTIVITY_SUPPORT_SPAN_TARGET_MINUTES, + ) + coverage_factor = round( + (track_coverage_factor + visit_coverage_factor + activity_coverage_factor) / 3.0, + 4, + ) + + return { + 'trackPointCount': track_point_count, + 'trackSpanMinutes': round(track_span_minutes, 1), + 'centerPointCount': center_point_count, + 'centerSpanMinutes': round(center_span_minutes, 1), + 'overlapPointCount': overlap_point_count, + 'overlapSpanMinutes': round(overlap_span_minutes, 1), + 'inZonePointCount': in_zone_point_count, + 'inZoneSpanMinutes': round(in_zone_span_minutes, 1), + 'trackCoverageFactor': round(track_coverage_factor, 4), + 'visitCoverageFactor': round(visit_coverage_factor, 4), + 'activityCoverageFactor': round(activity_coverage_factor, 4), + 'coverageFactor': coverage_factor, + 'scoreWindowHours': _RAW_SCORE_WINDOW_HOURS, + } + + +def _candidate_sources(candidate: Optional[CandidateScore]) -> set[str]: + if candidate is None: + return set() + raw = candidate.evidence.get('sources') + if isinstance(raw, list): + return {str(item) for item in raw if item} + return set() + + +def _top_candidate_stable_cycles(existing: Optional[dict[str, Any]], top_candidate: Optional[CandidateScore]) -> int: + if top_candidate is None: + return 0 + previous_mmsi = None + previous_cycles = 0 + if existing is not None: + previous_summary = existing.get('evidence_summary') or {} + previous_mmsi = previous_summary.get('topCandidateMmsi') + previous_cycles = int(existing.get('stable_cycles') or 0) + if previous_mmsi == top_candidate.mmsi: + return max(previous_cycles + 1, 1) + return 1 + + +def _status_reason(status: str) -> Optional[str]: + if status == _SHORT_NAME_STATUS: + return '정규화 이름 길이 4 미만' + if status == _NO_CANDIDATE_STATUS: + return '후보를 생성하지 못함' + if status == _DIRECT_PARENT_MATCH_STATUS: + return '그룹 멤버에 직접 모선이 포함됨' + return None + + +def _select_status( + top_candidate: Optional[CandidateScore], + margin: float, + stable_cycles: int, +) -> tuple[str, str]: + if top_candidate is None: + return _NO_CANDIDATE_STATUS, 'AUTO_NO_CANDIDATE' + + has_correlation = 'CORRELATION' in _candidate_sources(top_candidate) + if ( + top_candidate.target_type == 'VESSEL' + and has_correlation + and top_candidate.final_score >= _MIN_AUTO_PROMOTION_SCORE + and margin >= _MIN_AUTO_PROMOTION_MARGIN + and stable_cycles >= _MIN_AUTO_PROMOTION_STABLE_CYCLES + ): + return _AUTO_PROMOTED_STATUS, 'AUTO_PROMOTION' + + if top_candidate.final_score >= _MIN_REVIEW_REQUIRED_SCORE: + return _REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW' + + return _UNRESOLVED_STATUS, 'AUTO_SCORE' + + +def _load_default_model(conn) -> tuple[int, str]: + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT id, name + FROM {CORRELATION_PARAM_MODELS} + WHERE is_active = TRUE + ORDER BY is_default DESC, id ASC + LIMIT 1 + """ + ) + row = cur.fetchone() + if row is None: + return 1, 'default' + return int(row[0]), row[1] or 'default' + finally: + cur.close() + + +def _load_registry(conn) -> tuple[dict[str, RegistryVessel], dict[str, list[RegistryVessel]]]: + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT id, COALESCE(mmsi, ''), COALESCE(name_cn, ''), COALESCE(name_en, '') + FROM {FLEET_VESSELS} + """ + ) + by_mmsi: dict[str, RegistryVessel] = {} + by_normalized_name: dict[str, list[RegistryVessel]] = {} + for vessel_id, mmsi, name_cn, name_en in cur.fetchall(): + vessel = RegistryVessel( + vessel_id=int(vessel_id), + mmsi=mmsi or '', + name_cn=name_cn or '', + name_en=name_en or '', + ) + if vessel.mmsi: + by_mmsi[vessel.mmsi] = vessel + for raw_name in (vessel.name_cn, vessel.name_en): + normalized = normalize_parent_name(raw_name) + if normalized: + by_normalized_name.setdefault(normalized, []).append(vessel) + return by_mmsi, by_normalized_name + finally: + cur.close() + + +def _json_to_dict(value: Any) -> dict[str, Any]: + if value is None: + return {} + if isinstance(value, dict): + return value + try: + return json.loads(value) + except Exception: + return {} + + +def _load_existing_resolution(conn, group_keys: list[str]) -> dict[tuple[str, int], dict[str, Any]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT group_key, sub_cluster_id, parent_name, normalized_parent_name, + status, selected_parent_mmsi, selected_parent_name, selected_vessel_id, + confidence, decision_source, top_score, second_score, score_margin, + stable_cycles, approved_by, approved_at, manual_comment, + rejected_candidate_mmsi, rejected_at, evidence_summary, + episode_id, continuity_source, continuity_score, prior_bonus_total + FROM {GEAR_GROUP_PARENT_RESOLUTION} + WHERE group_key = ANY(%s) + """, + (group_keys,), + ) + result: dict[tuple[str, int], dict[str, Any]] = {} + for row in cur.fetchall(): + key = (row[0], int(row[1])) + result[key] = { + 'parent_name': row[2], + 'normalized_parent_name': row[3], + 'status': row[4], + 'selected_parent_mmsi': row[5], + 'selected_parent_name': row[6], + 'selected_vessel_id': row[7], + 'confidence': row[8], + 'decision_source': row[9], + 'top_score': row[10] or 0.0, + 'second_score': row[11] or 0.0, + 'score_margin': row[12] or 0.0, + 'stable_cycles': row[13] or 0, + 'approved_by': row[14], + 'approved_at': row[15], + 'manual_comment': row[16], + 'rejected_candidate_mmsi': row[17], + 'rejected_at': row[18], + 'evidence_summary': _json_to_dict(row[19]), + 'episode_id': row[20], + 'continuity_source': row[21], + 'continuity_score': row[22] or 0.0, + 'prior_bonus_total': row[23] or 0.0, + } + return result + finally: + cur.close() + + +def _expire_label_sessions(conn) -> None: + cur = conn.cursor() + try: + cur.execute( + f""" + UPDATE {GEAR_PARENT_LABEL_SESSIONS} + SET status = 'EXPIRED', + updated_at = NOW() + WHERE status = 'ACTIVE' + AND active_until <= NOW() + """ + ) + finally: + cur.close() + + +def _load_active_candidate_exclusions(conn, group_keys: list[str]) -> dict[str, Any]: + result: dict[str, Any] = { + 'global': set(), + 'group': {}, + } + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT scope_type, group_key, sub_cluster_id, candidate_mmsi + FROM {GEAR_PARENT_CANDIDATE_EXCLUSIONS} + WHERE released_at IS NULL + AND active_from <= NOW() + AND (active_until IS NULL OR active_until > NOW()) + AND (scope_type = 'GLOBAL' OR group_key = ANY(%s)) + ORDER BY active_from DESC, id DESC + """, + (group_keys or [''],), + ) + for scope_type, group_key, sub_cluster_id, candidate_mmsi in cur.fetchall(): + if scope_type == 'GLOBAL': + result['global'].add(candidate_mmsi) + continue + key = (group_key, int(sub_cluster_id)) + result['group'].setdefault(key, set()).add(candidate_mmsi) + return result + finally: + cur.close() + + +def _load_active_label_sessions(conn, group_keys: list[str]) -> dict[tuple[str, int], dict[str, Any]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT DISTINCT ON (group_key, sub_cluster_id) + id, group_key, sub_cluster_id, + label_parent_mmsi, label_parent_name, label_parent_vessel_id, + duration_days, active_from, active_until, actor, comment, metadata + FROM {GEAR_PARENT_LABEL_SESSIONS} + WHERE status = 'ACTIVE' + AND active_from <= NOW() + AND active_until > NOW() + AND group_key = ANY(%s) + ORDER BY group_key, sub_cluster_id, active_from DESC, id DESC + """, + (group_keys,), + ) + result: dict[tuple[str, int], dict[str, Any]] = {} + for row in cur.fetchall(): + result[(row[1], int(row[2]))] = { + 'id': int(row[0]), + 'group_key': row[1], + 'sub_cluster_id': int(row[2]), + 'label_parent_mmsi': row[3], + 'label_parent_name': row[4], + 'label_parent_vessel_id': row[5], + 'duration_days': int(row[6]), + 'active_from': row[7], + 'active_until': row[8], + 'actor': row[9], + 'comment': row[10], + 'metadata': _json_to_dict(row[11]), + } + return result + finally: + cur.close() + + +def _load_correlation_scores( + conn, + default_model_id: int, + group_keys: list[str], +) -> dict[tuple[str, int], list[dict[str, Any]]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT group_key, sub_cluster_id, target_mmsi, target_type, COALESCE(target_name, ''), + current_score, streak_count + FROM {GEAR_CORRELATION_SCORES} + WHERE model_id = %s + AND group_key = ANY(%s) + AND target_type = 'VESSEL' + ORDER BY group_key, sub_cluster_id, current_score DESC, last_observed_at DESC + """, + (default_model_id, group_keys), + ) + result: dict[tuple[str, int], list[dict[str, Any]]] = {} + for row in cur.fetchall(): + key = (row[0], int(row[1])) + result.setdefault(key, []).append({ + 'target_mmsi': row[2], + 'target_type': row[3], + 'target_name': row[4] or '', + 'current_score': float(row[5] or 0.0), + 'streak_count': int(row[6] or 0), + }) + return result + finally: + cur.close() + + +def _load_raw_metric_averages(conn, group_keys: list[str]) -> dict[tuple[str, int, str], dict[str, float]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT group_key, + sub_cluster_id, + target_mmsi, + AVG(COALESCE(visit_score, 0)) AS avg_visit, + AVG(COALESCE(proximity_ratio, 0)) AS avg_proximity, + AVG(COALESCE(activity_sync, 0)) AS avg_activity + FROM {GEAR_CORRELATION_RAW_METRICS} + WHERE group_key = ANY(%s) + AND observed_at > NOW() - INTERVAL '6 hours' + GROUP BY group_key, sub_cluster_id, target_mmsi + """, + (group_keys,), + ) + result: dict[tuple[str, int, str], dict[str, float]] = {} + for row in cur.fetchall(): + result[(row[0], int(row[1]), row[2])] = { + 'visit_score_6h': float(row[3] or 0.0), + 'proximity_score_6h': float(row[4] or 0.0), + 'activity_sync_score_6h': float(row[5] or 0.0), + } + return result + finally: + cur.close() + + +def _load_group_center_tracks(conn, group_keys: list[str]) -> dict[tuple[str, int], list[dict[str, Any]]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT group_key, sub_cluster_id, snapshot_time, ST_Y(center_point) AS lat, ST_X(center_point) AS lon + FROM {GROUP_POLYGON_SNAPSHOTS} + WHERE group_key = ANY(%s) + AND resolution = '1h' + AND center_point IS NOT NULL + AND snapshot_time > NOW() - INTERVAL '6 hours' + ORDER BY group_key, sub_cluster_id, snapshot_time ASC + """, + (group_keys,), + ) + result: dict[tuple[str, int], list[dict[str, Any]]] = {} + for row in cur.fetchall(): + result.setdefault((row[0], int(row[1])), []).append({ + 'timestamp': row[2], + 'lat': float(row[3]), + 'lon': float(row[4]), + }) + return result + finally: + cur.close() + + +def _name_match_score(parent_name: str, candidate_name: str, registry: Optional[RegistryVessel]) -> float: + def score_pair(left: str, right: str) -> float: + raw_left = (left or '').strip().upper() + raw_right = (right or '').strip().upper() + normalized_left = normalize_parent_name(left) + normalized_right = normalize_parent_name(right) + alpha_left = ''.join(ch for ch in normalized_left if ch.isalpha()) + alpha_right = ''.join(ch for ch in normalized_right if ch.isalpha()) + if not normalized_left or not normalized_right: + return 0.0 + if raw_left and raw_left == raw_right: + return 1.0 + if normalized_left == normalized_right: + return 0.8 + if normalized_left.startswith(normalized_right) or normalized_right.startswith(normalized_left): + return 0.5 + if normalized_left in normalized_right or normalized_right in normalized_left: + return 0.5 + if alpha_left and alpha_left == alpha_right: + return 0.3 + return 0.0 + + score = score_pair(parent_name, candidate_name) + if registry is not None: + score = max(score, score_pair(parent_name, registry.name_cn)) + score = max(score, score_pair(parent_name, registry.name_en)) + return score + + +def _candidate_name(candidate_mmsi: str, all_positions: dict[str, dict], registry: Optional[RegistryVessel]) -> str: + position_name = (all_positions.get(candidate_mmsi) or {}).get('name', '') + if position_name: + return position_name + if registry is not None: + return registry.name_cn or registry.name_en or candidate_mmsi + return candidate_mmsi + + +def _direct_parent_member(group: dict[str, Any], all_positions: dict[str, dict]) -> Optional[dict[str, Any]]: + members = group.get('members') or [] + for member in members: + if member.get('isParent') and member.get('mmsi'): + return member + + parent_mmsi = group.get('parent_mmsi') + if not parent_mmsi: + return None + + position = all_positions.get(parent_mmsi) or {} + return { + 'mmsi': parent_mmsi, + 'name': position.get('name') or group.get('parent_name') or parent_mmsi, + } + + +def _direct_parent_stable_cycles(existing: Optional[dict[str, Any]], direct_parent_mmsi: str) -> int: + if existing is None or not direct_parent_mmsi: + return 1 + + previous_mmsi = existing.get('selected_parent_mmsi') + if not previous_mmsi: + previous_summary = existing.get('evidence_summary') or {} + previous_mmsi = previous_summary.get('directParentMmsi') or previous_summary.get('topCandidateMmsi') + previous_cycles = int(existing.get('stable_cycles') or 0) + if previous_mmsi == direct_parent_mmsi: + return max(previous_cycles + 1, 1) + return 1 + + +def _build_candidate_scores( + vessel_store, + observed_at: datetime, + group: dict[str, Any], + episode_assignment, + default_model_id: int, + default_model_name: str, + score_rows: list[dict[str, Any]], + raw_metrics: dict[tuple[str, int, str], dict[str, float]], + center_track: list[dict[str, Any]], + all_positions: dict[str, dict], + registry_by_mmsi: dict[str, RegistryVessel], + registry_by_name: dict[str, list[RegistryVessel]], + existing: Optional[dict[str, Any]], + excluded_candidate_mmsis: set[str], + episode_prior_stats: dict[tuple[str, str], dict[str, Any]], + lineage_prior_stats: dict[tuple[str, str], dict[str, Any]], + label_prior_stats: dict[tuple[str, str], dict[str, Any]], +) -> list[CandidateScore]: + group_key = group['parent_name'] + sub_cluster_id = int(group.get('sub_cluster_id', 0)) + normalized_parent_name = normalize_parent_name(group_key) + members = group.get('members') or [] + if members: + gear_center_lat = sum(float(member['lat']) for member in members) / len(members) + gear_center_lon = sum(float(member['lon']) for member in members) / len(members) + else: + gear_center_lat = 0.0 + gear_center_lon = 0.0 + + candidates: dict[str, dict[str, Any]] = {} + score_lookup = {row['target_mmsi']: row for row in score_rows} + center_track_latlon = [ + (float(point['lat']), float(point['lon'])) + for point in center_track + if point.get('lat') is not None and point.get('lon') is not None + ] + # v2: 시간 정렬 비교용 (ts = epoch_ms) + center_track_temporal = [ + {'lat': float(point['lat']), 'lon': float(point['lon']), + 'ts': _to_epoch_ms(point['timestamp'])} + for point in center_track + if point.get('lat') is not None and point.get('lon') is not None and point.get('timestamp') is not None + ] + + for row in score_rows[:_MAX_CORRELATION_CANDIDATES]: + candidates.setdefault(row['target_mmsi'], {'sources': set()})['sources'].add('CORRELATION') + + for vessel in registry_by_name.get(normalized_parent_name, []): + if vessel.mmsi: + candidates.setdefault(vessel.mmsi, {'sources': set()})['sources'].add('REGISTRY_NAME') + + if existing is not None and existing.get('episode_id') == episode_assignment.episode_id: + current_candidate = existing.get('selected_parent_mmsi') or existing.get('evidence_summary', {}).get('topCandidateMmsi') + if current_candidate: + candidates.setdefault(current_candidate, {'sources': set()})['sources'].add('PREVIOUS_SELECTION') + + if existing is not None: + rejected_mmsi = existing.get('rejected_candidate_mmsi') + rejected_at = existing.get('rejected_at') + if rejected_mmsi and rejected_at is not None: + cutoff = datetime.now(timezone.utc) - timedelta(hours=_REJECT_COOLDOWN_HOURS) + if rejected_at >= cutoff and rejected_mmsi in candidates: + candidates.pop(rejected_mmsi, None) + + for excluded_mmsi in excluded_candidate_mmsis: + candidates.pop(excluded_mmsi, None) + + scored: list[CandidateScore] = [] + for candidate_mmsi, meta in candidates.items(): + registry = registry_by_mmsi.get(candidate_mmsi) + score_row = score_lookup.get(candidate_mmsi, {}) + raw = raw_metrics.get((group_key, sub_cluster_id, candidate_mmsi), {}) + vessel_track = _get_vessel_track(vessel_store, candidate_mmsi, hours=6) + raw_track_similarity = 0.0 + vessel_track_temporal: list[dict] = [] + if center_track_temporal and vessel_track: + vessel_track_temporal = [ + {'lat': p['lat'], 'lon': p['lon'], 'cog': p.get('cog'), + 'ts': _to_epoch_ms(p['timestamp'])} + for p in vessel_track if p.get('lat') is not None and p.get('lon') is not None + ] + raw_track_similarity = compute_track_similarity_v2( + center_track_temporal, vessel_track_temporal, + ) + + base_corr_score = float(score_row.get('current_score', 0.0) or 0.0) + streak_count = int(score_row.get('streak_count', 0) or 0) + stability_score = _clamp(streak_count / 18.0) + candidate_name = _candidate_name(candidate_mmsi, all_positions, registry) + name_match_score = _name_match_score(group_key, candidate_name, registry) + registry_bonus = 0.05 if registry is not None else 0.0 + raw_visit_score = float(raw.get('visit_score_6h', 0.0) or 0.0) + raw_proximity_score = float(raw.get('proximity_score_6h', 0.0) or 0.0) + raw_activity_score = float(raw.get('activity_sync_score_6h', 0.0) or 0.0) + coverage_metrics = _build_track_coverage_metrics( + center_track=center_track, + vessel_track=vessel_track, + gear_center_lat=gear_center_lat, + gear_center_lon=gear_center_lon, + ) + track_coverage_factor = float(coverage_metrics['trackCoverageFactor']) + visit_coverage_factor = float(coverage_metrics['visitCoverageFactor']) + activity_coverage_factor = float(coverage_metrics['activityCoverageFactor']) + track_similarity = _clamp(raw_track_similarity * track_coverage_factor) + visit_score = _clamp(raw_visit_score * visit_coverage_factor) + activity_score = _clamp(raw_activity_score * activity_coverage_factor) + + # proximity: 시간 보간 중심점 기반 거리 구간 차등 점수 + proximity_score = 0.0 + if center_track_temporal and vessel_track: + _NM_TO_M = 1852.0 + slots_c = _resample_temporal(center_track_temporal) + slots_v = _resample_temporal(vessel_track_temporal) + map_c = {s['ts']: s for s in slots_c if s is not None} + map_v = {s['ts']: s for s in slots_v if s is not None} + common_ts = sorted(set(map_c.keys()) & set(map_v.keys())) + if len(common_ts) >= 3: + prox_sum = 0.0 + for ts in common_ts: + sc, sv = map_c[ts], map_v[ts] + d_m = haversine_m(sc['lat'], sc['lon'], sv['lat'], sv['lon']) + d_nm = d_m / _NM_TO_M + if d_nm < 2.5: + prox_sum += 1.0 + elif d_nm < 5.0: + prox_sum += 0.5 + elif d_nm < 10.0: + prox_sum += 0.15 + proximity_score = _clamp(prox_sum / len(common_ts) * track_coverage_factor) + + weighted_score = ( + 0.35 * base_corr_score + + 0.15 * name_match_score + + 0.15 * track_similarity + + 0.10 * visit_score + + 0.10 * proximity_score + + 0.05 * activity_score + + 0.10 * stability_score + + registry_bonus + ) + pre_bonus_score, china_mmsi_bonus, final_score = _apply_final_score_bonus( + candidate_mmsi, + weighted_score, + ) + prior_bonus = compute_prior_bonus_components( + observed_at=observed_at, + normalized_parent_name=normalized_parent_name, + episode_id=episode_assignment.episode_id, + candidate_mmsi=candidate_mmsi, + episode_prior_stats=episode_prior_stats, + lineage_prior_stats=lineage_prior_stats, + label_prior_stats=label_prior_stats, + ) + final_score = _clamp(final_score + prior_bonus['priorBonusTotal']) + + evidence = { + 'normalizedParentName': normalized_parent_name, + 'episodeId': episode_assignment.episode_id, + 'continuitySource': episode_assignment.continuity_source, + 'continuityScore': round(float(episode_assignment.continuity_score or 0.0), 6), + 'sources': sorted(meta['sources']), + 'trackAvailable': bool(vessel_track), + 'registryMatched': registry is not None, + 'coverage': coverage_metrics, + 'evidenceConfidence': coverage_metrics['coverageFactor'], + 'scoreBreakdown': { + 'baseCorrScore': round(base_corr_score, 4), + 'nameMatchScore': round(name_match_score, 4), + 'trackSimilarityScore': round(track_similarity, 4), + 'visitScore6h': round(visit_score, 4), + 'proximityScore6h': round(proximity_score, 4), + 'activitySyncScore6h': round(activity_score, 4), + 'stabilityScore': round(stability_score, 4), + 'registryBonus': round(registry_bonus, 4), + 'preBonusScore': round(pre_bonus_score, 4), + 'chinaMmsiBonus': round(china_mmsi_bonus, 4), + 'episodePriorBonus': round(prior_bonus['episodePriorBonus'], 4), + 'lineagePriorBonus': round(prior_bonus['lineagePriorBonus'], 4), + 'labelPriorBonus': round(prior_bonus['labelPriorBonus'], 4), + 'priorBonusTotal': round(prior_bonus['priorBonusTotal'], 4), + }, + 'scoreBreakdownRaw': { + 'trackSimilarityScore': round(raw_track_similarity, 4), + 'visitScore6h': round(raw_visit_score, 4), + 'proximityScore6h': round(raw_proximity_score, 4), + 'activitySyncScore6h': round(raw_activity_score, 4), + }, + 'chinaMmsiBonusApplied': china_mmsi_bonus > 0.0, + } + scored.append(CandidateScore( + mmsi=candidate_mmsi, + name=candidate_name, + vessel_id=registry.vessel_id if registry is not None else None, + target_type='VESSEL', + candidate_source=','.join(sorted(meta['sources'])), + base_corr_score=round(base_corr_score, 6), + name_match_score=round(name_match_score, 6), + track_similarity_score=round(track_similarity, 6), + visit_score_6h=round(visit_score, 6), + proximity_score_6h=round(proximity_score, 6), + activity_sync_score_6h=round(activity_score, 6), + stability_score=round(stability_score, 6), + registry_bonus=round(registry_bonus, 6), + episode_prior_bonus=round(prior_bonus['episodePriorBonus'], 6), + lineage_prior_bonus=round(prior_bonus['lineagePriorBonus'], 6), + label_prior_bonus=round(prior_bonus['labelPriorBonus'], 6), + final_score=round(final_score, 6), + streak_count=streak_count, + model_id=default_model_id, + model_name=default_model_name, + evidence=evidence, + )) + + scored.sort( + key=lambda item: ( + item.final_score, + item.base_corr_score, + item.stability_score, + item.name_match_score, + item.mmsi, + ), + reverse=True, + ) + return scored + + +def _insert_candidate_snapshots(conn, observed_at: datetime, rows: list[tuple]) -> int: + if not rows: + return 0 + cur = conn.cursor() + try: + from psycopg2.extras import execute_values + execute_values( + cur, + f""" + INSERT INTO {GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS} ( + observed_at, group_key, sub_cluster_id, parent_name, normalized_parent_name, episode_id, candidate_mmsi, + candidate_name, candidate_vessel_id, rank, candidate_source, + model_id, model_name, base_corr_score, name_match_score, + track_similarity_score, visit_score_6h, proximity_score_6h, + activity_sync_score_6h, stability_score, registry_bonus, + episode_prior_bonus, lineage_prior_bonus, label_prior_bonus, + final_score, margin_from_top, evidence + ) VALUES %s + """, + rows, + page_size=200, + ) + return len(rows) + finally: + cur.close() + + +def _insert_label_tracking_rows(conn, rows: list[tuple]) -> int: + if not rows: + return 0 + cur = conn.cursor() + try: + from psycopg2.extras import execute_values + execute_values( + cur, + f""" + INSERT INTO {GEAR_PARENT_LABEL_TRACKING_CYCLES} ( + label_session_id, observed_at, candidate_snapshot_observed_at, auto_status, + top_candidate_mmsi, top_candidate_name, top_candidate_score, + top_candidate_margin, candidate_count, labeled_candidate_present, + labeled_candidate_rank, labeled_candidate_score, + labeled_candidate_pre_bonus_score, labeled_candidate_margin_from_top, + matched_top1, matched_top3, evidence_summary + ) VALUES %s + ON CONFLICT (label_session_id, observed_at) DO NOTHING + """, + rows, + page_size=200, + ) + return len(rows) + finally: + cur.close() + + +def _upsert_resolution(conn, row: tuple) -> None: + cur = conn.cursor() + try: + cur.execute( + f""" + INSERT INTO {GEAR_GROUP_PARENT_RESOLUTION} ( + group_key, sub_cluster_id, parent_name, normalized_parent_name, + episode_id, continuity_source, continuity_score, prior_bonus_total, + status, selected_parent_mmsi, selected_parent_name, selected_vessel_id, + confidence, decision_source, top_score, second_score, score_margin, + stable_cycles, last_evaluated_at, last_promoted_at, approved_by, + approved_at, manual_comment, rejected_candidate_mmsi, rejected_at, + evidence_summary, updated_at + ) VALUES ( + %s, %s, %s, %s, + %s, %s, %s, %s, + %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s, + %s, %s, %s, %s, + %s::jsonb, %s + ) + ON CONFLICT (group_key, sub_cluster_id) + DO UPDATE SET + parent_name = EXCLUDED.parent_name, + normalized_parent_name = EXCLUDED.normalized_parent_name, + episode_id = EXCLUDED.episode_id, + continuity_source = EXCLUDED.continuity_source, + continuity_score = EXCLUDED.continuity_score, + prior_bonus_total = EXCLUDED.prior_bonus_total, + status = EXCLUDED.status, + selected_parent_mmsi = EXCLUDED.selected_parent_mmsi, + selected_parent_name = EXCLUDED.selected_parent_name, + selected_vessel_id = EXCLUDED.selected_vessel_id, + confidence = EXCLUDED.confidence, + decision_source = EXCLUDED.decision_source, + top_score = EXCLUDED.top_score, + second_score = EXCLUDED.second_score, + score_margin = EXCLUDED.score_margin, + stable_cycles = EXCLUDED.stable_cycles, + last_evaluated_at = EXCLUDED.last_evaluated_at, + last_promoted_at = EXCLUDED.last_promoted_at, + approved_by = EXCLUDED.approved_by, + approved_at = EXCLUDED.approved_at, + manual_comment = EXCLUDED.manual_comment, + rejected_candidate_mmsi = EXCLUDED.rejected_candidate_mmsi, + rejected_at = EXCLUDED.rejected_at, + evidence_summary = EXCLUDED.evidence_summary, + updated_at = EXCLUDED.updated_at + """, + row, + ) + finally: + cur.close() + + +def _label_tracking_row( + observed_at: datetime, + label_session: dict[str, Any], + auto_status: str, + top_candidate: Optional[CandidateScore], + margin: float, + candidates: list[CandidateScore], +) -> tuple: + labeled_candidate = next( + (candidate for candidate in candidates if candidate.mmsi == label_session['label_parent_mmsi']), + None, + ) + labeled_rank = None + labeled_pre_bonus_score = None + labeled_margin_from_top = None + if labeled_candidate is not None: + for index, candidate in enumerate(candidates, start=1): + if candidate.mmsi == labeled_candidate.mmsi: + labeled_rank = index + break + labeled_pre_bonus_score = ( + labeled_candidate.evidence.get('scoreBreakdown', {}).get('preBonusScore') + if isinstance(labeled_candidate.evidence.get('scoreBreakdown'), dict) + else None + ) + labeled_margin_from_top = round( + (top_candidate.final_score - labeled_candidate.final_score) if top_candidate else 0.0, + 6, + ) + + evidence_summary = { + 'labelParentMmsi': label_session['label_parent_mmsi'], + 'labelParentName': label_session.get('label_parent_name'), + 'topCandidateSources': sorted(_candidate_sources(top_candidate)), + 'candidateMmsis': [candidate.mmsi for candidate in candidates[:5]], + } + + return ( + label_session['id'], + observed_at, + observed_at, + auto_status, + top_candidate.mmsi if top_candidate else None, + top_candidate.name if top_candidate else None, + top_candidate.final_score if top_candidate else None, + margin if top_candidate else 0.0, + len(candidates), + labeled_candidate is not None, + labeled_rank, + labeled_candidate.final_score if labeled_candidate else None, + labeled_pre_bonus_score, + labeled_margin_from_top, + top_candidate is not None and label_session['label_parent_mmsi'] == top_candidate.mmsi, + labeled_rank is not None and labeled_rank <= 3, + json.dumps(evidence_summary, ensure_ascii=False), + ) + + +def run_gear_parent_inference(vessel_store, gear_groups: list[dict], conn) -> dict[str, int]: + """미해결 어구 그룹에 대한 대표 모선 추론 실행.""" + observed_at = datetime.now(timezone.utc) + active_groups = [group for group in gear_groups if group.get('parent_name')] + if not active_groups: + return {'groups': 0, 'candidates': 0, 'promoted': 0, 'review_required': 0, 'skipped': 0, 'no_candidate': 0, 'direct_matched': 0, 'episode_snapshots': 0} + + group_keys = sorted({group['parent_name'] for group in active_groups}) + episode_inputs = [ + group_to_episode_input(group, normalize_parent_name(group['parent_name'])) + for group in active_groups + ] + lineage_keys = sorted({item.normalized_parent_name for item in episode_inputs if item.normalized_parent_name}) + previous_episodes = load_active_episode_states(conn, lineage_keys) + episode_plan = build_episode_plan(episode_inputs, previous_episodes) + episode_prior_stats = load_episode_prior_stats(conn, [assignment.episode_id for assignment in episode_plan.assignments.values()]) + lineage_prior_stats = load_lineage_prior_stats(conn, lineage_keys) + label_prior_stats = load_label_prior_stats(conn, lineage_keys) + registry_by_mmsi, registry_by_name = _load_registry(conn) + _expire_label_sessions(conn) + existing_resolution = _load_existing_resolution(conn, group_keys) + all_positions = vessel_store.get_all_latest_positions() + direct_parent_groups = [ + group for group in active_groups + if _direct_parent_member(group, all_positions) is not None + ] + unresolved_groups = [ + group for group in active_groups + if _direct_parent_member(group, all_positions) is None + ] + + default_model_id, default_model_name = _load_default_model(conn) + correlation_scores = _load_correlation_scores(conn, default_model_id, group_keys) + raw_metric_averages = _load_raw_metric_averages(conn, group_keys) + center_tracks = _load_group_center_tracks(conn, group_keys) + active_exclusions = _load_active_candidate_exclusions(conn, group_keys) + active_label_sessions = _load_active_label_sessions(conn, group_keys) + + snapshot_rows: list[tuple] = [] + label_tracking_rows: list[tuple] = [] + episode_snapshot_payloads: dict[tuple[str, int], dict[str, Any]] = {} + promoted = 0 + review_required = 0 + skipped = 0 + no_candidate = 0 + direct_matched = 0 + + for group in direct_parent_groups: + group_key = group['parent_name'] + sub_cluster_id = int(group.get('sub_cluster_id', 0)) + key = (group_key, sub_cluster_id) + episode_assignment = episode_plan.assignments.get(key) + if episode_assignment is None: + continue + existing = existing_resolution.get(key) + direct_parent = _direct_parent_member(group, all_positions) + if direct_parent is None: + continue + normalized_parent_name = normalize_parent_name(group_key) + direct_parent_mmsi = str(direct_parent.get('mmsi') or '') + direct_parent_name = str(direct_parent.get('name') or group_key or direct_parent_mmsi) + stable_cycles = _direct_parent_stable_cycles(existing, direct_parent_mmsi) + status_reason = _status_reason(_DIRECT_PARENT_MATCH_STATUS) + evidence_summary = { + 'episodeId': episode_assignment.episode_id, + 'continuitySource': episode_assignment.continuity_source, + 'continuityScore': episode_assignment.continuity_score, + 'mergedFromEpisodeIds': episode_assignment.merged_from_episode_ids, + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'normalizedParentName': normalized_parent_name, + 'candidateCount': 0, + 'directParentMmsi': direct_parent_mmsi, + 'directParentName': direct_parent_name, + 'statusReason': status_reason, + 'trackable': is_trackable_parent_name(group_key), + } + + status = _DIRECT_PARENT_MATCH_STATUS + decision_source = 'DIRECT_PARENT_MATCH' + selected_parent_mmsi = direct_parent_mmsi + selected_parent_name = direct_parent_name + selected_vessel_id = registry_by_mmsi.get(direct_parent_mmsi).vessel_id if direct_parent_mmsi in registry_by_mmsi else None + confidence = 1.0 + last_promoted_at = observed_at + + if existing is not None and existing.get('status') == _MANUAL_CONFIRMED_STATUS: + status = _MANUAL_CONFIRMED_STATUS + decision_source = existing.get('decision_source') or 'MANUAL' + selected_parent_mmsi = existing.get('selected_parent_mmsi') or selected_parent_mmsi + selected_parent_name = existing.get('selected_parent_name') or selected_parent_name + selected_vessel_id = existing.get('selected_vessel_id') if existing.get('selected_vessel_id') is not None else selected_vessel_id + confidence = existing.get('confidence') or confidence + last_promoted_at = existing.get('approved_at') or last_promoted_at + evidence_summary['statusReason'] = existing.get('evidence_summary', {}).get('statusReason') or status_reason + + _upsert_resolution( + conn, + ( + group_key, + sub_cluster_id, + group_key, + normalized_parent_name, + episode_assignment.episode_id, + episode_assignment.continuity_source, + episode_assignment.continuity_score, + 0.0, + status, + selected_parent_mmsi, + selected_parent_name, + selected_vessel_id, + confidence, + decision_source, + confidence or 0.0, + 0.0, + confidence or 0.0, + stable_cycles, + observed_at, + last_promoted_at, + (existing or {}).get('approved_by'), + (existing or {}).get('approved_at'), + (existing or {}).get('manual_comment'), + (existing or {}).get('rejected_candidate_mmsi'), + (existing or {}).get('rejected_at'), + json.dumps(evidence_summary, ensure_ascii=False), + observed_at, + ), + ) + episode_snapshot_payloads[key] = { + 'parentEpisodeIds': episode_assignment.merged_from_episode_ids, + 'topCandidateMmsi': selected_parent_mmsi, + 'topCandidateScore': confidence or 1.0, + 'resolutionStatus': status, + 'metadata': { + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'directParentMmsi': direct_parent_mmsi, + }, + } + direct_matched += 1 + + for group in unresolved_groups: + group_key = group['parent_name'] + sub_cluster_id = int(group.get('sub_cluster_id', 0)) + key = (group_key, sub_cluster_id) + episode_assignment = episode_plan.assignments.get(key) + if episode_assignment is None: + continue + existing = existing_resolution.get(key) + normalized_parent_name = normalize_parent_name(group_key) + excluded_candidate_mmsis = set(active_exclusions['global']) + excluded_candidate_mmsis.update(active_exclusions['group'].get(key, set())) + active_label_session = active_label_sessions.get(key) + + if not is_trackable_parent_name(group_key) and (existing or {}).get('status') != _MANUAL_CONFIRMED_STATUS: + skipped += 1 + status_reason = _status_reason(_SHORT_NAME_STATUS) + evidence_summary = { + 'episodeId': episode_assignment.episode_id, + 'continuitySource': episode_assignment.continuity_source, + 'continuityScore': episode_assignment.continuity_score, + 'mergedFromEpisodeIds': episode_assignment.merged_from_episode_ids, + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'skipReason': status_reason, + 'statusReason': status_reason, + 'normalizedParentName': normalized_parent_name, + } + _upsert_resolution( + conn, + ( + group_key, + sub_cluster_id, + group_key, + normalized_parent_name, + episode_assignment.episode_id, + episode_assignment.continuity_source, + episode_assignment.continuity_score, + 0.0, + _SHORT_NAME_STATUS, + None, + None, + None, + None, + 'AUTO_SKIP', + 0.0, + 0.0, + 0.0, + 0, + observed_at, + None, + None, + None, + (existing or {}).get('manual_comment'), + (existing or {}).get('rejected_candidate_mmsi'), + (existing or {}).get('rejected_at'), + json.dumps(evidence_summary, ensure_ascii=False), + observed_at, + ), + ) + episode_snapshot_payloads[key] = { + 'parentEpisodeIds': episode_assignment.merged_from_episode_ids, + 'topCandidateMmsi': None, + 'topCandidateScore': 0.0, + 'resolutionStatus': _SHORT_NAME_STATUS, + 'metadata': {'skipReason': status_reason}, + } + continue + + candidates = _build_candidate_scores( + vessel_store=vessel_store, + observed_at=observed_at, + group=group, + episode_assignment=episode_assignment, + default_model_id=default_model_id, + default_model_name=default_model_name, + score_rows=correlation_scores.get(key, []), + raw_metrics=raw_metric_averages, + center_track=center_tracks.get(key, []), + all_positions=all_positions, + registry_by_mmsi=registry_by_mmsi, + registry_by_name=registry_by_name, + existing=existing, + excluded_candidate_mmsis=excluded_candidate_mmsis, + episode_prior_stats=episode_prior_stats, + lineage_prior_stats=lineage_prior_stats, + label_prior_stats=label_prior_stats, + ) + + top_candidate = candidates[0] if candidates else None + second_score = candidates[1].final_score if len(candidates) > 1 else 0.0 + margin = round((top_candidate.final_score - second_score), 6) if top_candidate else 0.0 + stable_cycles = _top_candidate_stable_cycles(existing, top_candidate) + for rank, candidate in enumerate(candidates, start=1): + snapshot_rows.append(( + observed_at, + group_key, + sub_cluster_id, + group_key, + normalized_parent_name, + episode_assignment.episode_id, + candidate.mmsi, + candidate.name, + candidate.vessel_id, + rank, + candidate.candidate_source, + candidate.model_id, + candidate.model_name, + candidate.base_corr_score, + candidate.name_match_score, + candidate.track_similarity_score, + candidate.visit_score_6h, + candidate.proximity_score_6h, + candidate.activity_sync_score_6h, + candidate.stability_score, + candidate.registry_bonus, + candidate.episode_prior_bonus, + candidate.lineage_prior_bonus, + candidate.label_prior_bonus, + candidate.final_score, + round(top_candidate.final_score - candidate.final_score, 6) if top_candidate else 0.0, + json.dumps(candidate.evidence, ensure_ascii=False), + )) + + status, decision_source = _select_status(top_candidate, margin, stable_cycles) + auto_status = status + selected_parent_mmsi: Optional[str] = None + selected_parent_name: Optional[str] = None + selected_vessel_id: Optional[int] = None + confidence: Optional[float] = None + last_promoted_at: Optional[datetime] = None + + if top_candidate is not None: + if status == _AUTO_PROMOTED_STATUS: + selected_parent_mmsi = top_candidate.mmsi + selected_parent_name = top_candidate.name + selected_vessel_id = top_candidate.vessel_id + confidence = top_candidate.final_score + last_promoted_at = observed_at + promoted += 1 + elif status == _REVIEW_REQUIRED_STATUS: + selected_parent_mmsi = top_candidate.mmsi + selected_parent_name = top_candidate.name + selected_vessel_id = top_candidate.vessel_id + confidence = top_candidate.final_score + review_required += 1 + elif status == _NO_CANDIDATE_STATUS: + no_candidate += 1 + + status_reason = _status_reason(status) + evidence_summary = { + 'episodeId': episode_assignment.episode_id, + 'continuitySource': episode_assignment.continuity_source, + 'continuityScore': episode_assignment.continuity_score, + 'mergedFromEpisodeIds': episode_assignment.merged_from_episode_ids, + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'normalizedParentName': normalized_parent_name, + 'candidateCount': len(candidates), + 'topCandidateMmsi': top_candidate.mmsi if top_candidate else None, + 'topCandidateName': top_candidate.name if top_candidate else None, + 'topCandidateSources': sorted(_candidate_sources(top_candidate)), + 'hasCorrelationCandidate': 'CORRELATION' in _candidate_sources(top_candidate), + 'recentTopCandidateStableCycles': stable_cycles, + 'skipReason': _status_reason(_SHORT_NAME_STATUS) if status == _SHORT_NAME_STATUS else None, + 'statusReason': status_reason, + 'trackable': is_trackable_parent_name(group_key), + 'priorBonusTotal': top_candidate.evidence.get('scoreBreakdown', {}).get('priorBonusTotal') if top_candidate else 0.0, + } + if excluded_candidate_mmsis: + evidence_summary['excludedCandidateMmsis'] = sorted(excluded_candidate_mmsis) + if active_label_session is not None: + evidence_summary['activeLabelSessionId'] = active_label_session['id'] + evidence_summary['activeLabelParentMmsi'] = active_label_session['label_parent_mmsi'] + + if existing is not None and existing.get('status') == _MANUAL_CONFIRMED_STATUS: + status = _MANUAL_CONFIRMED_STATUS + decision_source = existing.get('decision_source') or 'MANUAL' + selected_parent_mmsi = existing.get('selected_parent_mmsi') + selected_parent_name = existing.get('selected_parent_name') + selected_vessel_id = existing.get('selected_vessel_id') + confidence = existing.get('confidence') or confidence + last_promoted_at = existing.get('approved_at') or existing.get('rejected_at') or last_promoted_at + + _upsert_resolution( + conn, + ( + group_key, + sub_cluster_id, + group_key, + normalized_parent_name, + episode_assignment.episode_id, + episode_assignment.continuity_source, + episode_assignment.continuity_score, + top_candidate.evidence.get('scoreBreakdown', {}).get('priorBonusTotal', 0.0) if top_candidate else 0.0, + status, + selected_parent_mmsi, + selected_parent_name, + selected_vessel_id, + confidence, + decision_source, + top_candidate.final_score if top_candidate else 0.0, + second_score, + margin, + stable_cycles, + observed_at, + last_promoted_at, + (existing or {}).get('approved_by'), + (existing or {}).get('approved_at'), + (existing or {}).get('manual_comment'), + (existing or {}).get('rejected_candidate_mmsi'), + (existing or {}).get('rejected_at'), + json.dumps(evidence_summary, ensure_ascii=False), + observed_at, + ), + ) + episode_snapshot_payloads[key] = { + 'parentEpisodeIds': episode_assignment.merged_from_episode_ids, + 'topCandidateMmsi': top_candidate.mmsi if top_candidate else None, + 'topCandidateScore': top_candidate.final_score if top_candidate else 0.0, + 'resolutionStatus': status, + 'metadata': { + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'candidateCount': len(candidates), + 'topCandidateSources': sorted(_candidate_sources(top_candidate)), + }, + } + if active_label_session is not None: + label_tracking_rows.append( + _label_tracking_row( + observed_at=observed_at, + label_session=active_label_session, + auto_status=auto_status, + top_candidate=top_candidate, + margin=margin, + candidates=candidates, + ) + ) + + sync_episode_states(conn, observed_at, episode_plan) + inserted = _insert_candidate_snapshots(conn, observed_at, snapshot_rows) + episode_snapshots_inserted = insert_episode_snapshots(conn, observed_at, episode_plan, episode_snapshot_payloads) + tracking_inserted = _insert_label_tracking_rows(conn, label_tracking_rows) + conn.commit() + logger.info( + 'gear parent inference: %d groups, %d direct-match, %d candidates, %d promoted, %d review, %d skipped, %d no-candidate, %d episode-snapshots, %d label-tracking', + len(active_groups), + direct_matched, + inserted, + promoted, + review_required, + skipped, + no_candidate, + episode_snapshots_inserted, + tracking_inserted, + ) + return { + 'groups': len(active_groups), + 'candidates': inserted, + 'promoted': promoted, + 'review_required': review_required, + 'skipped': skipped, + 'no_candidate': no_candidate, + 'direct_matched': direct_matched, + 'episode_snapshots': episode_snapshots_inserted, + 'label_tracking': tracking_inserted, + } diff --git a/prediction/algorithms/location.py b/prediction/algorithms/location.py new file mode 100644 index 0000000..e2dfddd --- /dev/null +++ b/prediction/algorithms/location.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import json +import math +from pathlib import Path +from typing import List, Optional, Tuple + +EARTH_RADIUS_NM = 3440.065 +TERRITORIAL_SEA_NM = 12.0 +CONTIGUOUS_ZONE_NM = 24.0 + +_baseline_points: Optional[List[Tuple[float, float]]] = None +_zone_polygons: Optional[list] = None + + +def _load_baseline() -> List[Tuple[float, float]]: + global _baseline_points + if _baseline_points is not None: + return _baseline_points + path = Path(__file__).parent.parent / 'data' / 'korea_baseline.json' + with open(path, 'r') as f: + data = json.load(f) + _baseline_points = [(p['lat'], p['lon']) for p in data['points']] + return _baseline_points + + +def haversine_nm(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """두 좌표 간 거리 (해리).""" + R = EARTH_RADIUS_NM + phi1, phi2 = math.radians(lat1), math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def dist_to_baseline(vessel_lat: float, vessel_lon: float, + baseline_points: Optional[List[Tuple[float, float]]] = None) -> float: + """선박 좌표에서 기선까지 최소 거리 (NM).""" + if baseline_points is None: + baseline_points = _load_baseline() + min_dist = float('inf') + for bp_lat, bp_lon in baseline_points: + d = haversine_nm(vessel_lat, vessel_lon, bp_lat, bp_lon) + if d < min_dist: + min_dist = d + return min_dist + + +def _epsg3857_to_wgs84(x: float, y: float) -> Tuple[float, float]: + """EPSG:3857 (Web Mercator) → WGS84 변환.""" + lon = x / (math.pi * 6378137) * 180 + lat = math.atan(math.exp(y / 6378137)) * 360 / math.pi - 90 + return lat, lon + + +def _load_zone_polygons() -> list: + """특정어업수역 Ⅰ~Ⅳ GeoJSON 로드 + EPSG:3857→WGS84 변환.""" + global _zone_polygons + if _zone_polygons is not None: + return _zone_polygons + + zone_dir = Path(__file__).parent.parent / 'data' / 'zones' + zones_meta = [ + ('ZONE_I', '수역Ⅰ(동해)', ['PS', 'FC'], '특정어업수역Ⅰ.json'), + ('ZONE_II', '수역Ⅱ(남해)', ['PT', 'OT', 'GN', 'PS', 'FC'], '특정어업수역Ⅱ.json'), + ('ZONE_III', '수역Ⅲ(서남해)', ['PT', 'OT', 'GN', 'PS', 'FC'], '특정어업수역Ⅲ.json'), + ('ZONE_IV', '수역Ⅳ(서해)', ['GN', 'PS', 'FC'], '특정어업수역Ⅳ.json'), + ] + result = [] + for zone_id, name, allowed, filename in zones_meta: + filepath = zone_dir / filename + if not filepath.exists(): + continue + with open(filepath, 'r') as f: + data = json.load(f) + multi_coords = data['features'][0]['geometry']['coordinates'] + wgs84_polys = [] + for poly in multi_coords: + wgs84_rings = [] + for ring in poly: + wgs84_rings.append([_epsg3857_to_wgs84(x, y) for x, y in ring]) + wgs84_polys.append(wgs84_rings) + result.append({ + 'id': zone_id, 'name': name, 'allowed': allowed, + 'polygons': wgs84_polys, + }) + _zone_polygons = result + return result + + +def _point_in_polygon(lat: float, lon: float, ring: list) -> bool: + """Ray-casting point-in-polygon.""" + n = len(ring) + inside = False + j = n - 1 + for i in range(n): + yi, xi = ring[i] + yj, xj = ring[j] + if ((yi > lat) != (yj > lat)) and (lon < (xj - xi) * (lat - yi) / (yj - yi) + xi): + inside = not inside + j = i + return inside + + +def _point_in_multipolygon(lat: float, lon: float, polygons: list) -> bool: + """MultiPolygon 내 포함 여부 (외곽 링 in + 내곽 링 hole 제외).""" + for poly in polygons: + outer = poly[0] + if _point_in_polygon(lat, lon, outer): + for hole in poly[1:]: + if _point_in_polygon(lat, lon, hole): + return False + return True + return False + + +def classify_zone(vessel_lat: float, vessel_lon: float) -> dict: + """선박 위치 수역 분류 — 특정어업수역 Ⅰ~Ⅳ 폴리곤 기반.""" + zones = _load_zone_polygons() + + for z in zones: + if _point_in_multipolygon(vessel_lat, vessel_lon, z['polygons']): + dist = dist_to_baseline(vessel_lat, vessel_lon) + return { + 'zone': z['id'], + 'zone_name': z['name'], + 'allowed_gears': z['allowed'], + 'dist_from_baseline_nm': round(dist, 2), + 'violation': False, + 'alert_level': 'WATCH', + } + + dist = dist_to_baseline(vessel_lat, vessel_lon) + if dist <= TERRITORIAL_SEA_NM: + return { + 'zone': 'TERRITORIAL_SEA', + 'dist_from_baseline_nm': round(dist, 2), + 'violation': True, + 'alert_level': 'CRITICAL', + } + elif dist <= CONTIGUOUS_ZONE_NM: + return { + 'zone': 'CONTIGUOUS_ZONE', + 'dist_from_baseline_nm': round(dist, 2), + 'violation': False, + 'alert_level': 'WATCH', + } + else: + return { + 'zone': 'EEZ_OR_BEYOND', + 'dist_from_baseline_nm': round(dist, 2), + 'violation': False, + 'alert_level': 'NORMAL', + } + + +def bd09_to_wgs84(bd_lat: float, bd_lon: float) -> tuple[float, float]: + """BD-09 좌표계를 WGS84로 변환.""" + x = bd_lon - 0.0065 + y = bd_lat - 0.006 + z = math.sqrt(x ** 2 + y ** 2) - 0.00002 * math.sin(y * 52.35987756) + theta = math.atan2(y, x) - 0.000003 * math.cos(x * 52.35987756) + gcj_lon = z * math.cos(theta) + gcj_lat = z * math.sin(theta) + wgs_lat = gcj_lat - 0.0023 + wgs_lon = gcj_lon - 0.0059 + return wgs_lat, wgs_lon + + +def compute_bd09_offset(lat: float, lon: float) -> float: + """BD09 좌표와 WGS84 좌표 간 오프셋 (미터).""" + wgs_lat, wgs_lon = bd09_to_wgs84(lat, lon) + dist_nm = haversine_nm(lat, lon, wgs_lat, wgs_lon) + return round(dist_nm * 1852.0, 1) # NM to meters diff --git a/prediction/algorithms/polygon_builder.py b/prediction/algorithms/polygon_builder.py new file mode 100644 index 0000000..1133e95 --- /dev/null +++ b/prediction/algorithms/polygon_builder.py @@ -0,0 +1,558 @@ +"""선단/어구그룹 폴리곤 생성기. + +프론트엔드 FleetClusterLayer.tsx의 어구그룹 탐지 + convexHull/padPolygon 로직을 +Python으로 이관한다. Shapely 라이브러리로 폴리곤 생성. +""" + +from __future__ import annotations + +import logging +import math +import re +from datetime import datetime, timezone +from typing import Optional +from zoneinfo import ZoneInfo + +import pandas as pd + +from algorithms.gear_name_rules import is_trackable_parent_name + +try: + from shapely.geometry import MultiPoint, Point + from shapely import wkt as shapely_wkt + _SHAPELY_AVAILABLE = True +except ImportError: + _SHAPELY_AVAILABLE = False + +from algorithms.location import classify_zone + +logger = logging.getLogger(__name__) + +# 어구 이름 패턴 — _ 필수 (공백만으로는 어구 미판정, fleet_tracker.py와 동일) +GEAR_PATTERN = re.compile(r'^(.+?)_(?=\S*\d)\S+(?:[_ ]\S*)*[_ ]*$|^(\d+)$') +MAX_DIST_DEG = 0.15 # ~10NM +STALE_SEC = 21600 # 6시간 (어구 P75 갭 3.5h, P90 갭 8h 커버) — 그룹 멤버 탐색용 +DISPLAY_STALE_SEC = 3600 # 1시간 — 폴리곤 스냅샷 노출 기준 (프론트엔드 초기 로드 minutes=60과 동기화) + # time_bucket(적재시간) 기반 필터링 — AIS 원본 timestamp는 부표 시계 오류로 부정확할 수 있음 +FLEET_BUFFER_DEG = 0.02 +GEAR_BUFFER_DEG = 0.01 +MIN_GEAR_GROUP_SIZE = 2 # 최소 어구 수 (비허가 구역 외) + +_KST = ZoneInfo('Asia/Seoul') + + +def _get_time_bucket_age(mmsi: str, all_positions: dict, now: datetime) -> float: + """MMSI의 time_bucket 기반 age(초) 반환. 실패 시 inf.""" + pos = all_positions.get(mmsi) + tb = pos.get('time_bucket') if pos else None + if tb is None: + return float('inf') + try: + tb_dt = pd.Timestamp(tb) + if tb_dt.tzinfo is None: + tb_dt = tb_dt.tz_localize(_KST).tz_convert(timezone.utc) + return (now - tb_dt.to_pydatetime()).total_seconds() + except Exception: + return float('inf') + +# 수역 내 어구 색상, 수역 외 어구 색상 +_COLOR_GEAR_IN_ZONE = '#ef4444' +_COLOR_GEAR_OUT_ZONE = '#f97316' + +# classify_zone이 수역 내로 판정하는 zone 값 목록 +_IN_ZONE_PREFIXES = ('ZONE_',) + + +def _is_in_zone(zone_info: dict) -> bool: + """classify_zone 결과가 특정어업수역 내인지 판별.""" + zone = zone_info.get('zone', '') + return any(zone.startswith(prefix) for prefix in _IN_ZONE_PREFIXES) + + +def _cluster_color(seed: int) -> str: + """프론트 clusterColor(id) 이관 — hsl({(seed * 137) % 360}, 80%, 55%).""" + h = (seed * 137) % 360 + return f'hsl({h}, 80%, 55%)' + + +def compute_area_sq_nm(polygon, center_lat: float) -> float: + """Shapely Polygon의 면적(degrees²) → 제곱 해리 변환. + + 1도 위도 ≈ 60 NM, 1도 경도 ≈ 60 * cos(lat) NM + sq_nm = area_deg2 * 60 * 60 * cos(center_lat_rad) + """ + area_deg2 = polygon.area + center_lat_rad = math.radians(center_lat) + sq_nm = area_deg2 * 60.0 * 60.0 * math.cos(center_lat_rad) + return round(sq_nm, 4) + + +def build_group_polygon( + points: list[tuple[float, float]], + buffer_deg: float, +) -> tuple[Optional[str], Optional[str], float, float, float]: + """좌표 목록으로 버퍼 폴리곤을 생성한다. + + Args: + points: (lon, lat) 좌표 목록 — Shapely (x, y) 순서. + buffer_deg: 버퍼 크기(도). + + Returns: + (polygon_wkt, center_wkt, area_sq_nm, center_lat, center_lon) + — polygon_wkt/center_wkt: ST_GeomFromText에 사용할 WKT 문자열. + — 좌표가 없거나 Shapely 미설치 시 (None, None, 0.0, 0.0, 0.0). + """ + if not _SHAPELY_AVAILABLE: + logger.warning('shapely 미설치 — build_group_polygon 건너뜀') + return None, None, 0.0, 0.0, 0.0 + + if not points: + return None, None, 0.0, 0.0, 0.0 + + if len(points) == 1: + geom = Point(points[0]).buffer(buffer_deg) + elif len(points) == 2: + # LineString → buffer로 Polygon 생성 + from shapely.geometry import LineString + geom = LineString(points).buffer(buffer_deg) + else: + # 3점 이상 → convex_hull → buffer + geom = MultiPoint(points).convex_hull.buffer(buffer_deg) + + # 중심 계산 + centroid = geom.centroid + center_lon = centroid.x + center_lat = centroid.y + + area_sq_nm = compute_area_sq_nm(geom, center_lat) + polygon_wkt = shapely_wkt.dumps(geom, rounding_precision=6) + center_wkt = f'POINT({center_lon:.6f} {center_lat:.6f})' + + return polygon_wkt, center_wkt, area_sq_nm, center_lat, center_lon + + +def detect_gear_groups( + vessel_store, + now: Optional[datetime] = None, +) -> list[dict]: + """어구 이름 패턴으로 어구그룹을 탐지한다. + + 프론트엔드 FleetClusterLayer.tsx gearGroupMap useMemo 로직 이관. + 전체 AIS 선박(vessel_store._tracks)에서 어구 패턴을 탐지한다. + + Args: + vessel_store: VesselStore — get_all_latest_positions() + get_vessel_info(). + now: 기준 시각 (None이면 UTC now). + + Returns: + [{parent_name, parent_mmsi, members: [{mmsi, name, lat, lon, sog, cog}]}] + """ + if now is None: + now = datetime.now(timezone.utc) + + # 전체 선박의 최신 위치 가져오기 + all_positions = vessel_store.get_all_latest_positions() + + # 선박명 → mmsi 맵 (모선 탐색용, 어구 패턴이 아닌 선박만) + # 정규화 키(공백 제거) + 원본 이름 모두 등록 + name_to_mmsi: dict[str, str] = {} + for mmsi, pos in all_positions.items(): + name = (pos.get('name') or '').strip() + if name and not GEAR_PATTERN.match(name): + name_to_mmsi[name] = mmsi + name_to_mmsi[name.replace(' ', '')] = mmsi + + # parent 이름 정규화 — 공백 제거 후 같은 모선은 하나로 통합 + def _normalize_parent(raw: str) -> str: + return raw.replace(' ', '') + + # 1단계: 같은 모선명 어구 수집 (60분 이내만, 공백 정규화) + raw_groups: dict[str, list[dict]] = {} + parent_display: dict[str, str] = {} # normalized → 대표 원본 이름 + for mmsi, pos in all_positions.items(): + name = (pos.get('name') or '').strip() + if not name: + continue + + # staleness 체크 + ts = pos.get('timestamp') + if ts is not None: + if isinstance(ts, datetime): + last_dt = ts if ts.tzinfo is not None else ts.replace(tzinfo=timezone.utc) + else: + try: + last_dt = pd.Timestamp(ts).to_pydatetime() + if last_dt.tzinfo is None: + last_dt = last_dt.replace(tzinfo=timezone.utc) + except Exception: + continue + age_sec = (now - last_dt).total_seconds() + if age_sec > STALE_SEC: + continue + + m = GEAR_PATTERN.match(name) + if not m: + continue + + # 한국 국적 선박(MMSI 440/441)은 어구 AIS 미사용 → 제외 + if mmsi.startswith('440') or mmsi.startswith('441'): + continue + + parent_raw = (m.group(1) or name).strip() + if not is_trackable_parent_name(parent_raw): + continue + parent_key = _normalize_parent(parent_raw) + # 대표 이름: 공백 없는 버전 우선 (더 정규화된 형태) + if parent_key not in parent_display or ' ' not in parent_raw: + parent_display[parent_key] = parent_raw + entry = { + 'mmsi': mmsi, + 'name': name, + 'lat': pos['lat'], + 'lon': pos['lon'], + 'sog': pos.get('sog', 0), + 'cog': pos.get('cog', 0), + 'timestamp': ts, + } + raw_groups.setdefault(parent_key, []).append(entry) + + # 2단계: 연결 기반 서브 클러스터링 (각 어구가 클러스터 내 최소 1개와 MAX_DIST_DEG 이내) + # 같은 parent 이름이라도 거리가 먼 어구들은 별도 서브그룹으로 분리 + results: list[dict] = [] + for parent_key, gears in raw_groups.items(): + parent_mmsi = name_to_mmsi.get(parent_key) + display_name = parent_display.get(parent_key, parent_key) + + if not gears: + continue + + # 모선 위치 (있으면 시드 포인트로 활용) + seed_lat: Optional[float] = None + seed_lon: Optional[float] = None + if parent_mmsi and parent_mmsi in all_positions: + p = all_positions[parent_mmsi] + seed_lat, seed_lon = p['lat'], p['lon'] + + # 연결 기반 클러스터링 (Union-Find 방식) + n = len(gears) + parent_uf = list(range(n)) + + def find(x: int) -> int: + while parent_uf[x] != x: + parent_uf[x] = parent_uf[parent_uf[x]] + x = parent_uf[x] + return x + + def union(a: int, b: int) -> None: + ra, rb = find(a), find(b) + if ra != rb: + parent_uf[ra] = rb + + for i in range(n): + for j in range(i + 1, n): + if (abs(gears[i]['lat'] - gears[j]['lat']) <= MAX_DIST_DEG + and abs(gears[i]['lon'] - gears[j]['lon']) <= MAX_DIST_DEG): + union(i, j) + + # 클러스터별 그룹화 + clusters: dict[int, list[int]] = {} + for i in range(n): + clusters.setdefault(find(i), []).append(i) + + # 모선이 있으면 모선과 가장 가까운 클러스터에 연결 (MAX_DIST_DEG 이내만) + seed_cluster_root: Optional[int] = None + if seed_lat is not None and seed_lon is not None: + best_dist = float('inf') + for root, idxs in clusters.items(): + for i in idxs: + d = abs(gears[i]['lat'] - seed_lat) + abs(gears[i]['lon'] - seed_lon) + if d < best_dist: + best_dist = d + seed_cluster_root = root + # 모선이 어느 클러스터와도 MAX_DIST_DEG 초과 → 연결하지 않음 + if best_dist > MAX_DIST_DEG * 2: + seed_cluster_root = None + + # 클러스터마다 서브그룹 생성 (최소 2개 이상이거나 모선 포함) + for ci, (root, idxs) in enumerate(clusters.items()): + has_seed = (root == seed_cluster_root) + if len(idxs) < 2 and not has_seed: + continue + + members = [ + {'mmsi': gears[i]['mmsi'], 'name': gears[i]['name'], + 'lat': gears[i]['lat'], 'lon': gears[i]['lon'], + 'sog': gears[i]['sog'], 'cog': gears[i]['cog']} + for i in idxs + ] + + # group_key는 항상 원본명 유지, 서브클러스터는 별도 ID로 구분 + sub_cluster_id = 0 if len(clusters) == 1 else (ci + 1) + sub_mmsi = parent_mmsi if has_seed else None + + results.append({ + 'parent_name': display_name, + 'parent_key': parent_key, + 'parent_mmsi': sub_mmsi, + 'sub_cluster_id': sub_cluster_id, + 'members': members, + }) + + # 3단계: 동일 parent_key 서브그룹 간 근접 병합 (거리 이내 시) + # prefix 기반 병합은 과도한 그룹화 유발 → 동일 키만 병합 + def _groups_nearby(a: dict, b: dict) -> bool: + for ma in a['members']: + for mb in b['members']: + if abs(ma['lat'] - mb['lat']) <= MAX_DIST_DEG and abs(ma['lon'] - mb['lon']) <= MAX_DIST_DEG: + return True + return False + + merged: list[dict] = [] + skip: set[int] = set() + results.sort(key=lambda g: len(g['members']), reverse=True) + for i, big in enumerate(results): + if i in skip: + continue + for j, small in enumerate(results): + if j <= i or j in skip: + continue + # 동일 parent_key만 병합 (prefix 매칭 제거 — 과도한 병합 방지) + if big['parent_key'] == small['parent_key'] and _groups_nearby(big, small): + existing_mmsis = {m['mmsi'] for m in big['members']} + for m in small['members']: + if m['mmsi'] not in existing_mmsis: + big['members'].append(m) + existing_mmsis.add(m['mmsi']) + if not big['parent_mmsi'] and small['parent_mmsi']: + big['parent_mmsi'] = small['parent_mmsi'] + big['sub_cluster_id'] = 0 # 병합됨 → 단일 클러스터 + skip.add(j) + del big['parent_key'] + merged.append(big) + + return merged + + +def build_all_group_snapshots( + vessel_store, + company_vessels: dict[int, list[str]], + companies: dict[int, dict], +) -> list[dict]: + """선단(FLEET) + 어구그룹(GEAR) 폴리곤 스냅샷을 생성한다. + + Shapely 미설치 시 빈 리스트를 반환한다. + + Args: + vessel_store: VesselStore — get_all_latest_positions() + get_vessel_info(). + company_vessels: {company_id: [mmsi_list]}. + companies: {id: {name_cn, name_en}}. + + Returns: + DB INSERT용 dict 목록. + """ + if not _SHAPELY_AVAILABLE: + logger.warning('shapely 미설치 — build_all_group_snapshots 빈 리스트 반환') + return [] + + now = datetime.now(timezone.utc) + snapshots: list[dict] = [] + all_positions = vessel_store.get_all_latest_positions() + + # ── FLEET 타입: company_vessels 순회 ────────────────────────── + for company_id, mmsi_list in company_vessels.items(): + company_info = companies.get(company_id, {}) + group_label = company_info.get('name_cn') or company_info.get('name_en') or str(company_id) + + # 각 선박의 최신 좌표 추출 + points: list[tuple[float, float]] = [] + members: list[dict] = [] + + for mmsi in mmsi_list: + pos = all_positions.get(mmsi) + if not pos: + continue + lat = pos['lat'] + lon = pos['lon'] + sog = pos.get('sog', 0) + cog = pos.get('cog', 0) + points.append((lon, lat)) + members.append({ + 'mmsi': mmsi, + 'name': pos.get('name', ''), + 'lat': lat, + 'lon': lon, + 'sog': sog, + 'cog': cog, + 'role': 'LEADER' if mmsi == mmsi_list[0] else 'MEMBER', + 'isParent': False, + }) + + newest_age = min( + (_get_time_bucket_age(m['mmsi'], all_positions, now) for m in members), + default=float('inf'), + ) + # 2척 미만 또는 최근 적재가 DISPLAY_STALE_SEC 초과 → 폴리곤 미생성 + if len(points) < 2 or newest_age > DISPLAY_STALE_SEC: + continue + + polygon_wkt, center_wkt, area_sq_nm, center_lat, center_lon = build_group_polygon( + points, FLEET_BUFFER_DEG + ) + + snapshots.append({ + 'group_type': 'FLEET', + 'group_key': str(company_id), + 'group_label': group_label, + 'resolution': '1h', + 'snapshot_time': now, + 'polygon_wkt': polygon_wkt, + 'center_wkt': center_wkt, + 'area_sq_nm': area_sq_nm, + 'member_count': len(members), + 'zone_id': None, + 'zone_name': None, + 'members': members, + 'color': _cluster_color(company_id), + }) + + # ── GEAR 타입: detect_gear_groups 결과 → 1h/6h 듀얼 스냅샷 ──── + gear_groups = detect_gear_groups(vessel_store, now=now) + + # parent_name 기준 전체 1h 활성 멤버 합산 (서브클러스터 분리 전) + parent_active_1h: dict[str, int] = {} + for group in gear_groups: + pn = group['parent_name'] + cnt = sum( + 1 for gm in group['members'] + if _get_time_bucket_age(gm.get('mmsi'), all_positions, now) <= DISPLAY_STALE_SEC + ) + parent_active_1h[pn] = parent_active_1h.get(pn, 0) + cnt + + for group in gear_groups: + parent_name: str = group['parent_name'] + parent_mmsi: Optional[str] = group['parent_mmsi'] + gear_members: list[dict] = group['members'] # 6h STALE 기반 전체 멤버 + + if not gear_members: + continue + + # ── 1h 활성 멤버 필터 (이 서브클러스터 내) ── + active_members_1h = [ + gm for gm in gear_members + if _get_time_bucket_age(gm.get('mmsi'), all_positions, now) <= DISPLAY_STALE_SEC + ] + + # fallback: 서브클러스터 내 1h < 2이면 time_bucket 최신 2개 유지 + display_members_1h = active_members_1h + if len(active_members_1h) < 2 and len(gear_members) >= 2: + sorted_by_age = sorted( + gear_members, + key=lambda gm: _get_time_bucket_age(gm.get('mmsi'), all_positions, now), + ) + display_members_1h = sorted_by_age[:2] + + # ── 6h 전체 멤버 노출 조건: 최신 적재가 STALE_SEC 이내 ── + newest_age_6h = min( + (_get_time_bucket_age(gm.get('mmsi'), all_positions, now) for gm in gear_members), + default=float('inf'), + ) + display_members_6h = gear_members + + # ── resolution별 스냅샷 생성 ── + # 1h-fb: parent_name 전체 1h 활성 < 2 → 리플레이/일치율 추적용, 라이브 현황에서 제외 + # parent_name 전체 기준으로 판단 (서브클러스터 분리로 개별 멤버가 적어져도 그룹 전체가 활성이면 1h) + res_1h = '1h' if parent_active_1h.get(parent_name, 0) >= 2 else '1h-fb' + for resolution, members_for_snap in [(res_1h, display_members_1h), ('6h', display_members_6h)]: + if len(members_for_snap) < 2: + continue + # 6h: 최신 적재가 STALE_SEC(6h) 초과 시 스킵 + if resolution == '6h' and newest_age_6h > STALE_SEC: + continue + + # 수역 분류: anchor(모선 or 첫 멤버) 위치 기준 + anchor_lat: Optional[float] = None + anchor_lon: Optional[float] = None + + if parent_mmsi and parent_mmsi in all_positions: + parent_pos = all_positions[parent_mmsi] + anchor_lat = parent_pos['lat'] + anchor_lon = parent_pos['lon'] + + if anchor_lat is None and members_for_snap: + anchor_lat = members_for_snap[0]['lat'] + anchor_lon = members_for_snap[0]['lon'] + + if anchor_lat is None: + continue + + zone_info = classify_zone(float(anchor_lat), float(anchor_lon)) + in_zone = _is_in_zone(zone_info) + zone_id = zone_info.get('zone') if in_zone else None + zone_name = zone_info.get('zone_name') if in_zone else None + + # 비허가(수역 외) 어구: MIN_GEAR_GROUP_SIZE 미만 제외 + if not in_zone and len(members_for_snap) < MIN_GEAR_GROUP_SIZE: + continue + + # 폴리곤 points: 멤버 좌표 + 모선 좌표 (근접 시에만) + points = [(g['lon'], g['lat']) for g in members_for_snap] + parent_nearby = False + if parent_mmsi and parent_mmsi in all_positions: + parent_pos = all_positions[parent_mmsi] + p_lon, p_lat = parent_pos['lon'], parent_pos['lat'] + if any(abs(g['lat'] - p_lat) <= MAX_DIST_DEG * 2 + and abs(g['lon'] - p_lon) <= MAX_DIST_DEG * 2 for g in members_for_snap): + if (p_lon, p_lat) not in points: + points.append((p_lon, p_lat)) + parent_nearby = True + + polygon_wkt, center_wkt, area_sq_nm, _clat, _clon = build_group_polygon( + points, GEAR_BUFFER_DEG + ) + + # members JSONB 구성 + members_out: list[dict] = [] + if parent_nearby and parent_mmsi and parent_mmsi in all_positions: + parent_pos = all_positions[parent_mmsi] + members_out.append({ + 'mmsi': parent_mmsi, + 'name': parent_name, + 'lat': parent_pos['lat'], + 'lon': parent_pos['lon'], + 'sog': parent_pos.get('sog', 0), + 'cog': parent_pos.get('cog', 0), + 'role': 'PARENT', + 'isParent': True, + }) + for g in members_for_snap: + members_out.append({ + 'mmsi': g['mmsi'], + 'name': g['name'], + 'lat': g['lat'], + 'lon': g['lon'], + 'sog': g['sog'], + 'cog': g['cog'], + 'role': 'GEAR', + 'isParent': False, + }) + + color = _COLOR_GEAR_IN_ZONE if in_zone else _COLOR_GEAR_OUT_ZONE + + snapshots.append({ + 'group_type': 'GEAR_IN_ZONE' if in_zone else 'GEAR_OUT_ZONE', + 'group_key': parent_name, + 'group_label': parent_name, + 'sub_cluster_id': group.get('sub_cluster_id', 0), + 'resolution': resolution, + 'snapshot_time': now, + 'polygon_wkt': polygon_wkt, + 'center_wkt': center_wkt, + 'area_sq_nm': area_sq_nm, + 'member_count': len(members_out), + 'zone_id': zone_id, + 'zone_name': zone_name, + 'members': members_out, + 'color': color, + }) + + return snapshots diff --git a/prediction/algorithms/risk.py b/prediction/algorithms/risk.py new file mode 100644 index 0000000..b4d3505 --- /dev/null +++ b/prediction/algorithms/risk.py @@ -0,0 +1,126 @@ +from typing import Optional, Tuple + +import pandas as pd +from algorithms.location import classify_zone +from algorithms.fishing_pattern import detect_fishing_segments, detect_trawl_uturn +from algorithms.dark_vessel import detect_ais_gaps +from algorithms.spoofing import detect_teleportation + + +def compute_lightweight_risk_score( + zone_info: dict, + sog: float, + is_permitted: Optional[bool] = None, +) -> Tuple[int, str]: + """위치·허가 이력 기반 경량 위험도 (파이프라인 미통과 선박용). + + compute_vessel_risk_score의 1번(위치)+4번(허가) 로직과 동일. + Returns: (risk_score, risk_level) + """ + score = 0 + + # 1. 위치 기반 (최대 40점) + zone = zone_info.get('zone', '') + if zone == 'TERRITORIAL_SEA': + score += 40 + elif zone == 'CONTIGUOUS_ZONE': + score += 10 + elif zone.startswith('ZONE_'): + if is_permitted is not None and not is_permitted: + score += 25 + + # 4. 허가 이력 (최대 20점) + if is_permitted is not None and not is_permitted: + score += 20 + + score = min(score, 100) + + if score >= 70: + level = 'CRITICAL' + elif score >= 50: + level = 'HIGH' + elif score >= 30: + level = 'MEDIUM' + else: + level = 'LOW' + + return score, level + + +def compute_vessel_risk_score( + mmsi: str, + df_vessel: pd.DataFrame, + zone_info: Optional[dict] = None, + is_permitted: Optional[bool] = None, +) -> Tuple[int, str]: + """선박별 종합 위반 위험도 (0~100점). + + Returns: (risk_score, risk_level) + """ + if len(df_vessel) == 0: + return 0, 'LOW' + + score = 0 + + # 1. 위치 기반 (최대 40점) + if zone_info is None: + last = df_vessel.iloc[-1] + zone_info = classify_zone(last['lat'], last['lon']) + + zone = zone_info.get('zone', '') + if zone == 'TERRITORIAL_SEA': + score += 40 + elif zone == 'CONTIGUOUS_ZONE': + score += 10 + elif zone.startswith('ZONE_'): + # 특정어업수역 내 — 무허가면 가산 + if is_permitted is not None and not is_permitted: + score += 25 + + # 2. 조업 행위 (최대 30점) + segs = detect_fishing_segments(df_vessel) + ts_fishing = [s for s in segs if s.get('in_territorial_sea')] + if ts_fishing: + score += 20 + elif segs: + score += 5 + + uturn = detect_trawl_uturn(df_vessel) + if uturn.get('trawl_suspected'): + score += 10 + + # 3. AIS 조작 (최대 35점) + teleports = detect_teleportation(df_vessel) + if teleports: + score += 20 + + from algorithms.spoofing import count_speed_jumps + jumps = count_speed_jumps(df_vessel) + if jumps >= 3: + score += 10 + elif jumps >= 1: + score += 5 + + gaps = detect_ais_gaps(df_vessel) + critical_gaps = [g for g in gaps if g['gap_min'] >= 60] + if critical_gaps: + score += 15 + elif gaps: + score += 5 + + # 4. 허가 이력 (최대 20점) + if is_permitted is not None and not is_permitted: + score += 20 + + score = min(score, 100) + + if score >= 70: + level = 'CRITICAL' + elif score >= 50: + level = 'HIGH' + elif score >= 30: + level = 'MEDIUM' + else: + level = 'LOW' + + return score, level diff --git a/prediction/algorithms/spoofing.py b/prediction/algorithms/spoofing.py new file mode 100644 index 0000000..a75db08 --- /dev/null +++ b/prediction/algorithms/spoofing.py @@ -0,0 +1,82 @@ +import pandas as pd +from algorithms.location import haversine_nm, bd09_to_wgs84, compute_bd09_offset # noqa: F401 + +MAX_FISHING_SPEED_KNOTS = 25.0 + + +def detect_teleportation(df_vessel: pd.DataFrame, + max_speed_knots: float = MAX_FISHING_SPEED_KNOTS) -> list[dict]: + """연속 AIS 포인트 간 물리적 불가능 이동 탐지.""" + if len(df_vessel) < 2: + return [] + + anomalies = [] + records = df_vessel.sort_values('timestamp').to_dict('records') + + for i in range(1, len(records)): + prev, curr = records[i - 1], records[i] + dist_nm = haversine_nm(prev['lat'], prev['lon'], curr['lat'], curr['lon']) + dt_hours = ( + pd.Timestamp(curr['timestamp']) - pd.Timestamp(prev['timestamp']) + ).total_seconds() / 3600 + + if dt_hours <= 0: + continue + + implied_speed = dist_nm / dt_hours + + if implied_speed > max_speed_knots: + anomalies.append({ + 'idx': i, + 'dist_nm': round(dist_nm, 2), + 'implied_kn': round(implied_speed, 1), + 'type': 'TELEPORTATION', + 'confidence': 'HIGH' if implied_speed > 50 else 'MED', + }) + + return anomalies + + +def count_speed_jumps(df_vessel: pd.DataFrame, threshold_knots: float = 10.0) -> int: + """연속 SOG 급변 횟수.""" + if len(df_vessel) < 2: + return 0 + + sog = df_vessel['sog'].values + jumps = 0 + for i in range(1, len(sog)): + if abs(sog[i] - sog[i - 1]) > threshold_knots: + jumps += 1 + return jumps + + +def compute_spoofing_score(df_vessel: pd.DataFrame) -> float: + """종합 GPS 스푸핑 점수 (0~1).""" + if len(df_vessel) < 2: + return 0.0 + + score = 0.0 + n = len(df_vessel) + + # 순간이동 비율 + teleports = detect_teleportation(df_vessel) + if teleports: + score += min(0.4, len(teleports) / n * 10) + + # SOG 급변 비율 + jumps = count_speed_jumps(df_vessel) + if jumps > 0: + score += min(0.3, jumps / n * 5) + + # BD09 오프셋 — 중국 선박(412*)은 좌표계 차이로 항상 ~300m이므로 제외 + mmsi_str = str(df_vessel.iloc[0].get('mmsi', '')) if 'mmsi' in df_vessel.columns else '' + if not mmsi_str.startswith('412'): + mid_idx = len(df_vessel) // 2 + row = df_vessel.iloc[mid_idx] + offset = compute_bd09_offset(row['lat'], row['lon']) + if offset > 300: + score += 0.3 + elif offset > 100: + score += 0.1 + + return round(min(score, 1.0), 4) diff --git a/prediction/algorithms/track_similarity.py b/prediction/algorithms/track_similarity.py new file mode 100644 index 0000000..faef72c --- /dev/null +++ b/prediction/algorithms/track_similarity.py @@ -0,0 +1,394 @@ +"""궤적 유사도 — 시간 정렬 쌍 비교 + DTW(레거시) 지원.""" +import math +from typing import Optional + +_MAX_RESAMPLE_POINTS = 50 +_TEMPORAL_INTERVAL_MS = 300_000 # 5분 +_MAX_GAP_MS = 14_400_000 # 4시간 — 보간 상한 (어구 간헐 수신 허용) +_DECAY_DIST_M = 3000.0 # 지수 감쇠 기준거리 (3km) +_COG_PENALTY_THRESHOLD_DEG = 45.0 # COG 차이 페널티 임계 +_COG_PENALTY_FACTOR = 1.5 # COG 페널티 배수 + + +def haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """두 좌표 간 거리 (미터).""" + R = 6371000 + phi1, phi2 = math.radians(lat1), math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _resample(track: list[tuple[float, float]], n: int) -> list[tuple[float, float]]: + """궤적을 n 포인트로 균등 리샘플링 (선형 보간).""" + if len(track) == 0: + return [] + if len(track) == 1: + return [track[0]] * n + if len(track) <= n: + return list(track) + + # 누적 거리 계산 + cumulative = [0.0] + for i in range(1, len(track)): + d = haversine_m(track[i - 1][0], track[i - 1][1], track[i][0], track[i][1]) + cumulative.append(cumulative[-1] + d) + + total_dist = cumulative[-1] + if total_dist == 0.0: + return [track[0]] * n + + step = total_dist / (n - 1) + result: list[tuple[float, float]] = [] + + seg = 0 + for k in range(n): + target = step * k + # 해당 target 거리에 해당하는 선분 찾기 + while seg < len(cumulative) - 2 and cumulative[seg + 1] < target: + seg += 1 + seg_len = cumulative[seg + 1] - cumulative[seg] + if seg_len == 0.0: + result.append(track[seg]) + else: + t = (target - cumulative[seg]) / seg_len + lat = track[seg][0] + t * (track[seg + 1][0] - track[seg][0]) + lon = track[seg][1] + t * (track[seg + 1][1] - track[seg][1]) + result.append((lat, lon)) + + return result + + +def _dtw_distance( + track_a: list[tuple[float, float]], + track_b: list[tuple[float, float]], +) -> float: + """두 궤적 간 DTW 거리 (미터 단위 평균 거리).""" + n, m = len(track_a), len(track_b) + if n == 0 or m == 0: + return float('inf') + + INF = float('inf') + # 1D 롤링 DP (공간 최적화) + prev = [INF] * (m + 1) + prev[0] = 0.0 + # 첫 행 초기화 + row = [INF] * (m + 1) + row[0] = INF + + dp_prev = [INF] * (m + 1) + dp_curr = [INF] * (m + 1) + dp_prev[0] = 0.0 + for j in range(1, m + 1): + dp_prev[j] = INF + + for i in range(1, n + 1): + dp_curr[0] = INF + for j in range(1, m + 1): + cost = haversine_m(track_a[i - 1][0], track_a[i - 1][1], + track_b[j - 1][0], track_b[j - 1][1]) + min_prev = min(dp_curr[j - 1], dp_prev[j], dp_prev[j - 1]) + dp_curr[j] = cost + min_prev + dp_prev, dp_curr = dp_curr, [INF] * (m + 1) + + # dp_prev는 마지막으로 계산된 행 + total = dp_prev[m] + if total == INF: + return INF + return total / (n + m) + + +# ── 시간 정렬 리샘플 (v2) ───────────────────────────────────── + +def _resample_temporal( + track: list[dict], + interval_ms: int = _TEMPORAL_INTERVAL_MS, + max_gap_ms: int = _MAX_GAP_MS, +) -> list[Optional[dict]]: + """타임스탬프 기반 등간격 리샘플. 갭 > max_gap_ms인 슬롯은 None. + + 입력: [{lat, lon, ts(epoch_ms), cog?}, ...] (ts 정렬 필수 아님) + 반환: [dict | None, ...] 5분 간격 슬롯. None = 보간 불가 구간. + """ + if not track: + return [] + sorted_pts = sorted(track, key=lambda p: p['ts']) + if len(sorted_pts) < 2: + return [sorted_pts[0]] + + t_start = sorted_pts[0]['ts'] + t_end = sorted_pts[-1]['ts'] + if t_end <= t_start: + return [sorted_pts[0]] + + slots: list[Optional[dict]] = [] + seg_idx = 0 + + # 절대 시간 경계로 정렬 (epoch 기준 interval_ms 배수) + t = (t_start // interval_ms) * interval_ms + while t <= t_end: + # seg_idx를 t가 속하는 구간까지 전진 + while seg_idx < len(sorted_pts) - 2 and sorted_pts[seg_idx + 1]['ts'] < t: + seg_idx += 1 + + p0 = sorted_pts[seg_idx] + p1 = sorted_pts[min(seg_idx + 1, len(sorted_pts) - 1)] + gap = p1['ts'] - p0['ts'] + + if gap > max_gap_ms or gap <= 0: + # 갭이 너무 크거나 동일 시점 → 보간 불가 + if abs(t - p0['ts']) < interval_ms: + slots.append(p0) + else: + slots.append(None) + else: + ratio = (t - p0['ts']) / gap + ratio = max(0.0, min(1.0, ratio)) + lat = p0['lat'] + ratio * (p1['lat'] - p0['lat']) + lon = p0['lon'] + ratio * (p1['lon'] - p0['lon']) + cog0 = p0.get('cog') + cog1 = p1.get('cog') + cog = None + if cog0 is not None and cog1 is not None: + # 원형 보간 + diff = (cog1 - cog0 + 540) % 360 - 180 + cog = (cog0 + ratio * diff) % 360 + slots.append({'lat': lat, 'lon': lon, 'ts': t, 'cog': cog}) + + t += interval_ms + + return slots + + +def _angular_diff(a: float, b: float) -> float: + """두 각도의 최소 차이 (0~180).""" + diff = abs(a - b) % 360 + return min(diff, 360 - diff) + + +def compute_track_similarity_v2( + track_a: list[dict], + track_b: list[dict], + interval_ms: int = _TEMPORAL_INTERVAL_MS, + max_gap_ms: int = _MAX_GAP_MS, +) -> float: + """시간 정렬 기반 궤적 유사도 (0~1). + + 입력: [{lat, lon, ts(epoch_ms), cog?}, ...] + - 5분 간격으로 양쪽 리샘플 + - 동일 시각 슬롯만 쌍으로 비교 + - 거리: haversine + COG 페널티 + - 점수: exp(-avg_dist / 3000) + """ + if not track_a or not track_b: + return 0.0 + + slots_a = _resample_temporal(track_a, interval_ms, max_gap_ms) + slots_b = _resample_temporal(track_b, interval_ms, max_gap_ms) + + # 시간 범위 정렬: 공통 구간만 비교 + if not slots_a or not slots_b: + return 0.0 + first_a = next((s for s in slots_a if s is not None), None) + first_b = next((s for s in slots_b if s is not None), None) + if first_a is None or first_b is None: + return 0.0 + + # 양쪽의 시작/끝 시간 + t_start_a = first_a['ts'] + t_start_b = first_b['ts'] + t_start = max(t_start_a, t_start_b) + last_a = next((s for s in reversed(slots_a) if s is not None), None) + last_b = next((s for s in reversed(slots_b) if s is not None), None) + if last_a is None or last_b is None: + return 0.0 + t_end = min(last_a['ts'], last_b['ts']) + + if t_end <= t_start: + return 0.0 + + # 인덱스 매핑 (각 슬롯의 ts → 슬롯) + map_a: dict[int, dict] = {} + for s in slots_a: + if s is not None: + map_a[s['ts']] = s + map_b: dict[int, dict] = {} + for s in slots_b: + if s is not None: + map_b[s['ts']] = s + + total_dist = 0.0 + count = 0 + t = t_start + while t <= t_end: + # 가장 가까운 슬롯 찾기 (interval 반경 내) + sa = map_a.get(t) + sb = map_b.get(t) + if sa is not None and sb is not None: + dist = haversine_m(sa['lat'], sa['lon'], sb['lat'], sb['lon']) + # COG 페널티 + if sa.get('cog') is not None and sb.get('cog') is not None: + cog_diff = _angular_diff(sa['cog'], sb['cog']) + if cog_diff > _COG_PENALTY_THRESHOLD_DEG: + dist *= _COG_PENALTY_FACTOR + total_dist += dist + count += 1 + t += interval_ms + + if count < 3: + return 0.0 + + avg_dist = total_dist / count + return math.exp(-avg_dist / _DECAY_DIST_M) + + +def compute_track_similarity( + track_a: list[tuple[float, float]], + track_b: list[tuple[float, float]], + max_dist_m: float = 10000.0, +) -> float: + """두 궤적의 DTW 거리 기반 유사도 (0~1). + + track이 비어있으면 0.0 반환. + 유사할수록 1.0에 가까움. + """ + if not track_a or not track_b: + return 0.0 + + a = _resample(track_a, _MAX_RESAMPLE_POINTS) + b = _resample(track_b, _MAX_RESAMPLE_POINTS) + + avg_dist = _dtw_distance(a, b) + if avg_dist == float('inf') or max_dist_m <= 0.0: + return 0.0 + + similarity = 1.0 - (avg_dist / max_dist_m) + return max(0.0, min(1.0, similarity)) + + +def match_gear_by_track( + gear_tracks: dict[str, list[tuple[float, float]]], + vessel_tracks: dict[str, list[tuple[float, float]]], + threshold: float = 0.6, +) -> list[dict]: + """어구 궤적을 선단 선박 궤적과 비교하여 매칭. + + Args: + gear_tracks: mmsi → [(lat, lon), ...] — 어구 궤적 + vessel_tracks: mmsi → [(lat, lon), ...] — 선박 궤적 + threshold: 유사도 하한 (이상이면 매칭) + + Returns: + [{gear_mmsi, vessel_mmsi, similarity, match_method: 'TRACK_SIMILAR'}] + """ + results: list[dict] = [] + + for gear_mmsi, g_track in gear_tracks.items(): + if not g_track: + continue + + best_mmsi: str | None = None + best_sim = -1.0 + + for vessel_mmsi, v_track in vessel_tracks.items(): + if not v_track: + continue + sim = compute_track_similarity(g_track, v_track) + if sim > best_sim: + best_sim = sim + best_mmsi = vessel_mmsi + + if best_mmsi is not None and best_sim >= threshold: + results.append({ + 'gear_mmsi': gear_mmsi, + 'vessel_mmsi': best_mmsi, + 'similarity': best_sim, + 'match_method': 'TRACK_SIMILAR', + }) + + return results + + +def compute_sog_correlation( + sog_a: list[float], + sog_b: list[float], +) -> float: + """두 SOG 시계열의 피어슨 상관계수 (0~1 정규화). + + 시계열 길이가 다르면 짧은 쪽 기준으로 자름. + 데이터 부족(< 3점)이면 0.0 반환. + """ + n = min(len(sog_a), len(sog_b)) + if n < 3: + return 0.0 + + a = sog_a[:n] + b = sog_b[:n] + + mean_a = sum(a) / n + mean_b = sum(b) / n + + cov = sum((a[i] - mean_a) * (b[i] - mean_b) for i in range(n)) + var_a = sum((x - mean_a) ** 2 for x in a) + var_b = sum((x - mean_b) ** 2 for x in b) + + denom = (var_a * var_b) ** 0.5 + if denom < 1e-12: + return 0.0 + + corr = cov / denom # -1 ~ 1 + return max(0.0, (corr + 1.0) / 2.0) # 0 ~ 1 정규화 + + +def compute_heading_coherence( + cog_a: list[float], + cog_b: list[float], + threshold_deg: float = 30.0, +) -> float: + """두 COG 시계열의 방향 동조율 (0~1). + + angular diff < threshold_deg 인 비율. + 시계열 길이가 다르면 짧은 쪽 기준. + 데이터 부족(< 3점)이면 0.0 반환. + """ + n = min(len(cog_a), len(cog_b)) + if n < 3: + return 0.0 + + coherent = 0 + for i in range(n): + diff = abs(cog_a[i] - cog_b[i]) + if diff > 180.0: + diff = 360.0 - diff + if diff < threshold_deg: + coherent += 1 + + return coherent / n + + +def compute_proximity_ratio( + track_a: list[tuple[float, float]], + track_b: list[tuple[float, float]], + threshold_nm: float = 10.0, +) -> float: + """두 궤적의 근접 지속비 (0~1). + + 시간 정렬된 포인트 쌍에서 haversine < threshold_nm 비율. + 시계열 길이가 다르면 짧은 쪽 기준. + 데이터 부족(< 2점)이면 0.0 반환. + """ + n = min(len(track_a), len(track_b)) + if n < 2: + return 0.0 + + close = 0 + threshold_m = threshold_nm * 1852.0 + + for i in range(n): + dist = haversine_m(track_a[i][0], track_a[i][1], + track_b[i][0], track_b[i][1]) + if dist < threshold_m: + close += 1 + + return close / n diff --git a/prediction/algorithms/transshipment.py b/prediction/algorithms/transshipment.py new file mode 100644 index 0000000..9e26b95 --- /dev/null +++ b/prediction/algorithms/transshipment.py @@ -0,0 +1,234 @@ +"""환적(Transshipment) 의심 선박 탐지 — 서버사이드 O(n log n) 구현. + +프론트엔드 useKoreaFilters.ts의 O(n²) 근접 탐지를 대체한다. +scipy 미설치 환경을 고려하여 그리드 기반 공간 인덱스를 사용한다. + +알고리즘 개요: +1. 후보 선박 필터: sog < 2kn, 선종 (tanker/cargo/fishing), 외국 해안선 제외 +2. 그리드 셀 기반 근접 쌍 탐지: O(n log n) ← 셀 분할 + 인접 9셀 조회 +3. pair_history dict로 쌍별 최초 탐지 시각 영속화 (호출 간 유지) +4. 60분 이상 지속 근접 시 의심 쌍으로 판정 +""" + +from __future__ import annotations + +import logging +import math +from datetime import datetime, timezone +from typing import Optional + +import pandas as pd + +logger = logging.getLogger(__name__) + +# ────────────────────────────────────────────────────────────── +# 상수 +# ────────────────────────────────────────────────────────────── + +SOG_THRESHOLD_KN = 2.0 # 정박/표류 기준 속도 (노트) +PROXIMITY_DEG = 0.001 # 근접 판정 임계값 (~110m) +SUSPECT_DURATION_MIN = 60 # 의심 판정 최소 지속 시간 (분) +PAIR_EXPIRY_MIN = 120 # pair_history 항목 만료 기준 (분) + +# 외국 해안 근접 제외 경계 +_CN_LON_MAX = 123.5 # 중국 해안: 경도 < 123.5 +_JP_LON_MIN = 130.5 # 일본 해안: 경도 > 130.5 +_TSUSHIMA_LAT_MIN = 33.8 # 대마도: 위도 > 33.8 AND 경도 > 129.0 +_TSUSHIMA_LON_MIN = 129.0 + +# 탐지 대상 선종 (소문자 정규화 후 비교) +_CANDIDATE_TYPES: frozenset[str] = frozenset({'tanker', 'cargo', 'fishing'}) + +# 그리드 셀 크기 = PROXIMITY_DEG (셀 하나 = 근접 임계와 동일 크기) +_GRID_CELL_DEG = PROXIMITY_DEG + + +# ────────────────────────────────────────────────────────────── +# 내부 헬퍼 +# ────────────────────────────────────────────────────────────── + +def _is_near_foreign_coast(lat: float, lon: float) -> bool: + """외국 해안 근처 여부 — 중국/일본/대마도 경계 확인.""" + if lon < _CN_LON_MAX: + return True + if lon > _JP_LON_MIN: + return True + if lat > _TSUSHIMA_LAT_MIN and lon > _TSUSHIMA_LON_MIN: + return True + return False + + +def _cell_key(lat: float, lon: float) -> tuple[int, int]: + """위도/경도를 그리드 셀 인덱스로 변환.""" + return (int(math.floor(lat / _GRID_CELL_DEG)), + int(math.floor(lon / _GRID_CELL_DEG))) + + +def _build_grid(records: list[dict]) -> dict[tuple[int, int], list[int]]: + """선박 리스트를 그리드 셀로 분류. + + Returns: {(row, col): [record index, ...]} + """ + grid: dict[tuple[int, int], list[int]] = {} + for idx, rec in enumerate(records): + key = _cell_key(rec['lat'], rec['lon']) + if key not in grid: + grid[key] = [] + grid[key].append(idx) + return grid + + +def _within_proximity(a: dict, b: dict) -> bool: + """두 선박이 PROXIMITY_DEG 이내인지 확인 (위경도 직교 근사).""" + dlat = abs(a['lat'] - b['lat']) + if dlat >= PROXIMITY_DEG: + return False + cos_lat = math.cos(math.radians((a['lat'] + b['lat']) / 2.0)) + dlon_scaled = abs(a['lon'] - b['lon']) * cos_lat + return dlon_scaled < PROXIMITY_DEG + + +def _normalize_type(raw: Optional[str]) -> str: + """선종 문자열 소문자 정규화.""" + if not raw: + return '' + return raw.strip().lower() + + +def _pair_key(mmsi_a: str, mmsi_b: str) -> tuple[str, str]: + """MMSI 순서를 정규화하여 중복 쌍 방지.""" + return (mmsi_a, mmsi_b) if mmsi_a < mmsi_b else (mmsi_b, mmsi_a) + + +def _evict_expired_pairs( + pair_history: dict[tuple[str, str], datetime], + now: datetime, +) -> None: + """PAIR_EXPIRY_MIN 이상 갱신 없는 pair_history 항목 제거.""" + expired = [ + key for key, first_seen in pair_history.items() + if (now - first_seen).total_seconds() / 60 > PAIR_EXPIRY_MIN + ] + for key in expired: + del pair_history[key] + + +# ────────────────────────────────────────────────────────────── +# 공개 API +# ────────────────────────────────────────────────────────────── + +def detect_transshipment( + df: pd.DataFrame, + pair_history: dict[tuple[str, str], datetime], +) -> list[tuple[str, str, int]]: + """환적 의심 쌍 탐지. + + Args: + df: 선박 위치 DataFrame. + 필수 컬럼: mmsi, lat, lon, sog + 선택 컬럼: ship_type (없으면 전체 선종 허용) + pair_history: 쌍별 최초 탐지 시각을 저장하는 영속 dict. + 스케줄러에서 호출 간 유지하여 전달해야 한다. + 키: (mmsi_a, mmsi_b) — mmsi_a < mmsi_b 정규화 적용. + 값: 최초 탐지 시각 (UTC datetime, timezone-aware). + + Returns: + [(mmsi_a, mmsi_b, duration_minutes), ...] — 60분 이상 지속된 의심 쌍. + mmsi_a < mmsi_b 정규화 적용. + """ + if df.empty: + return [] + + required_cols = {'mmsi', 'lat', 'lon', 'sog'} + missing = required_cols - set(df.columns) + if missing: + logger.error('detect_transshipment: missing required columns: %s', missing) + return [] + + now = datetime.now(timezone.utc) + + # ── 1. 후보 선박 필터 ────────────────────────────────────── + has_type_col = 'ship_type' in df.columns + + candidate_mask = df['sog'] < SOG_THRESHOLD_KN + + if has_type_col: + type_mask = df['ship_type'].apply(_normalize_type).isin(_CANDIDATE_TYPES) + candidate_mask = candidate_mask & type_mask + + candidates = df[candidate_mask].copy() + + if candidates.empty: + _evict_expired_pairs(pair_history, now) + return [] + + # 외국 해안 근처 제외 + coast_mask = candidates.apply( + lambda row: not _is_near_foreign_coast(row['lat'], row['lon']), + axis=1, + ) + candidates = candidates[coast_mask] + + if len(candidates) < 2: + _evict_expired_pairs(pair_history, now) + return [] + + records = candidates[['mmsi', 'lat', 'lon']].to_dict('records') + for rec in records: + rec['mmsi'] = str(rec['mmsi']) + + # ── 2. 그리드 기반 근접 쌍 탐지 ────────────────────────── + grid = _build_grid(records) + active_pairs: set[tuple[str, str]] = set() + + for (row, col), indices in grid.items(): + # 현재 셀 내부 쌍 + for i in range(len(indices)): + for j in range(i + 1, len(indices)): + a = records[indices[i]] + b = records[indices[j]] + if _within_proximity(a, b): + active_pairs.add(_pair_key(a['mmsi'], b['mmsi'])) + + # 인접 셀 (우측 3셀 + 아래 3셀 = 중복 없는 방향성 순회) + for dr, dc in ((0, 1), (1, -1), (1, 0), (1, 1)): + neighbor_key = (row + dr, col + dc) + if neighbor_key not in grid: + continue + for ai in indices: + for bi in grid[neighbor_key]: + a = records[ai] + b = records[bi] + if _within_proximity(a, b): + active_pairs.add(_pair_key(a['mmsi'], b['mmsi'])) + + # ── 3. pair_history 갱신 ───────────────────────────────── + # 현재 활성 쌍 → 최초 탐지 시각 등록 + for pair in active_pairs: + if pair not in pair_history: + pair_history[pair] = now + + # 비활성 쌍 → pair_history에서 제거 (다음 접근 시 재시작) + inactive = [key for key in pair_history if key not in active_pairs] + for key in inactive: + del pair_history[key] + + # 만료 항목 정리 (비활성 제거 후 잔여 방어용) + _evict_expired_pairs(pair_history, now) + + # ── 4. 의심 쌍 판정 ────────────────────────────────────── + suspects: list[tuple[str, str, int]] = [] + + for pair, first_seen in pair_history.items(): + duration_min = int((now - first_seen).total_seconds() / 60) + if duration_min >= SUSPECT_DURATION_MIN: + suspects.append((pair[0], pair[1], duration_min)) + + if suspects: + logger.info( + 'transshipment detection: %d suspect pairs (candidates=%d)', + len(suspects), + len(candidates), + ) + + return suspects diff --git a/prediction/cache/__init__.py b/prediction/cache/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prediction/cache/vessel_store.py b/prediction/cache/vessel_store.py new file mode 100644 index 0000000..7ba95da --- /dev/null +++ b/prediction/cache/vessel_store.py @@ -0,0 +1,463 @@ +import logging +from datetime import datetime, timezone +from typing import Optional +from zoneinfo import ZoneInfo + +import numpy as np + +_KST = ZoneInfo('Asia/Seoul') +import pandas as pd +from time_bucket import compute_initial_window_start, compute_safe_bucket + +logger = logging.getLogger(__name__) + +_STATIC_REFRESH_INTERVAL_MIN = 60 +_PERMIT_REFRESH_INTERVAL_MIN = 30 +_EARTH_RADIUS_NM = 3440.065 +_MAX_REASONABLE_SOG = 30.0 +_CHINESE_MMSI_PREFIX = '412' + + +def _compute_sog_cog(df: pd.DataFrame) -> pd.DataFrame: + """Compute SOG (knots) and COG (degrees) from consecutive lat/lon/timestamp points.""" + df = df.sort_values(['mmsi', 'timestamp']).copy() + + lat1 = np.radians(df['lat'].values[:-1]) + lon1 = np.radians(df['lon'].values[:-1]) + lat2 = np.radians(df['lat'].values[1:]) + lon2 = np.radians(df['lon'].values[1:]) + + # Haversine distance (nautical miles) + dlat = lat2 - lat1 + dlon = lon2 - lon1 + a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2 + dist_nm = _EARTH_RADIUS_NM * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)) + + # Time difference (hours) + ts = df['timestamp'].values + dt_sec = (ts[1:] - ts[:-1]).astype('timedelta64[s]').astype(float) + dt_hours = dt_sec / 3600.0 + dt_hours[dt_hours <= 0] = np.nan + + # SOG = dist / time (knots) + computed_sog = dist_nm / dt_hours + computed_sog = np.clip(np.nan_to_num(computed_sog, nan=0.0), 0, _MAX_REASONABLE_SOG) + + # COG = bearing (degrees) + x = np.sin(dlon) * np.cos(lat2) + y = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(dlon) + bearing = (np.degrees(np.arctan2(x, y)) + 360) % 360 + + # Append last value (copy from previous) + sog_arr = np.append(computed_sog, computed_sog[-1:] if len(computed_sog) > 0 else [0]) + cog_arr = np.append(bearing, bearing[-1:] if len(bearing) > 0 else [0]) + + # Reset at MMSI boundaries + mmsi_vals = df['mmsi'].values + boundary = np.where(mmsi_vals[:-1] != mmsi_vals[1:])[0] + for idx in boundary: + sog_arr[idx + 1] = df['raw_sog'].iloc[idx + 1] if 'raw_sog' in df.columns else 0 + cog_arr[idx + 1] = 0 + + # Where computed SOG is 0 or NaN, fall back to raw_sog + df['sog'] = sog_arr + if 'raw_sog' in df.columns: + mask = (df['sog'] == 0) | np.isnan(df['sog']) + df.loc[mask, 'sog'] = df.loc[mask, 'raw_sog'].fillna(0) + + df['cog'] = cog_arr + return df + + +class VesselStore: + """In-memory vessel trajectory store for Korean waters vessel data. + + Maintains a 24-hour sliding window of all vessel tracks and supports + incremental 5-minute updates. Chinese vessel (MMSI 412*) filtering + is applied only at analysis target selection time. + """ + + def __init__(self) -> None: + self._tracks: dict[str, pd.DataFrame] = {} + self._last_bucket: Optional[datetime] = None + self._static_info: dict[str, dict] = {} + self._permit_set: set[str] = set() + self._static_refreshed_at: Optional[datetime] = None + self._permit_refreshed_at: Optional[datetime] = None + + # ------------------------------------------------------------------ + # Public load / update methods + # ------------------------------------------------------------------ + + def load_initial(self, hours: int = 24) -> None: + """Load all Korean waters vessel data for the past N hours. + + Fetches a bulk DataFrame from snpdb, groups by MMSI, and stores + each vessel's track separately. Also triggers static info and + permit registry refresh. + """ + from db import snpdb + + logger.info('loading initial vessel tracks (last %dh)...', hours) + try: + df_all = snpdb.fetch_all_tracks(hours) + except Exception as e: + logger.error('fetch_all_tracks failed: %s', e) + return + + if df_all.empty: + logger.warning('fetch_all_tracks returned empty DataFrame') + return + + # Rename sog column to raw_sog to preserve original AIS-reported speed + if 'sog' in df_all.columns and 'raw_sog' not in df_all.columns: + df_all = df_all.rename(columns={'sog': 'raw_sog'}) + + self._tracks = {} + for mmsi, group in df_all.groupby('mmsi'): + self._tracks[str(mmsi)] = group.reset_index(drop=True) + + # last_bucket 설정 — incremental fetch 시작점 + # snpdb time_bucket은 tz-naive KST이므로 UTC 변환하지 않고 그대로 유지 + if 'time_bucket' in df_all.columns and not df_all['time_bucket'].dropna().empty: + max_bucket = pd.to_datetime(df_all['time_bucket'].dropna()).max() + if hasattr(max_bucket, 'to_pydatetime'): + max_bucket = max_bucket.to_pydatetime() + if isinstance(max_bucket, datetime) and max_bucket.tzinfo is not None: + max_bucket = max_bucket.replace(tzinfo=None) + self._last_bucket = max_bucket + elif 'timestamp' in df_all.columns and not df_all['timestamp'].dropna().empty: + max_ts = pd.to_datetime(df_all['timestamp'].dropna()).max() + if hasattr(max_ts, 'to_pydatetime'): + max_ts = max_ts.to_pydatetime() + # timestamp는 UTC aware → KST wall-clock naive로 변환 + if isinstance(max_ts, datetime) and max_ts.tzinfo is not None: + max_ts = max_ts.astimezone(_KST).replace(tzinfo=None) + self._last_bucket = max_ts + + vessel_count = len(self._tracks) + point_count = sum(len(v) for v in self._tracks.values()) + logger.info( + 'initial load complete: %d vessels, %d total points, last_bucket=%s', + vessel_count, + point_count, + self._last_bucket, + ) + + self.refresh_static_info() + self.refresh_permit_registry() + + def merge_incremental(self, df_new: pd.DataFrame) -> None: + """Merge a new batch of vessel positions into the in-memory store. + + Deduplicates by timestamp within each MMSI and updates _last_bucket. + """ + if df_new.empty: + logger.debug('merge_incremental called with empty DataFrame, skipping') + return + + if 'sog' in df_new.columns and 'raw_sog' not in df_new.columns: + df_new = df_new.rename(columns={'sog': 'raw_sog'}) + + new_buckets: list[datetime] = [] + + for mmsi, group in df_new.groupby('mmsi'): + mmsi_str = str(mmsi) + if mmsi_str in self._tracks: + combined = pd.concat([self._tracks[mmsi_str], group], ignore_index=True) + combined = combined.sort_values(['timestamp', 'time_bucket']) + combined = combined.drop_duplicates(subset=['timestamp'], keep='last') + self._tracks[mmsi_str] = combined.reset_index(drop=True) + else: + self._tracks[mmsi_str] = group.sort_values(['timestamp', 'time_bucket']).reset_index(drop=True) + + if 'time_bucket' in group.columns and not group['time_bucket'].empty: + bucket_vals = pd.to_datetime(group['time_bucket'].dropna()) + if not bucket_vals.empty: + new_buckets.append(bucket_vals.max().to_pydatetime()) + + if new_buckets: + latest = max(new_buckets) + if isinstance(latest, datetime) and latest.tzinfo is not None: + latest = latest.replace(tzinfo=None) + if self._last_bucket is None or latest > self._last_bucket: + self._last_bucket = latest + + logger.debug( + 'incremental merge done: %d mmsis in batch, store has %d vessels', + df_new['mmsi'].nunique(), + len(self._tracks), + ) + + def evict_stale(self, hours: int = 24) -> None: + """Remove track points older than N hours and evict empty MMSI entries.""" + import datetime as _dt + + safe_bucket = compute_safe_bucket() + cutoff_bucket = compute_initial_window_start(hours, safe_bucket) + now = datetime.now(timezone.utc) + cutoff_aware = now - _dt.timedelta(hours=hours) + cutoff_naive = cutoff_aware.replace(tzinfo=None) + + before_total = sum(len(v) for v in self._tracks.values()) + evicted_mmsis: list[str] = [] + + for mmsi in list(self._tracks.keys()): + df = self._tracks[mmsi] + if 'time_bucket' in df.columns and not df['time_bucket'].dropna().empty: + bucket_col = pd.to_datetime(df['time_bucket'], errors='coerce') + mask = bucket_col >= pd.Timestamp(cutoff_bucket) + else: + ts_col = df['timestamp'] + # Handle tz-aware and tz-naive timestamps uniformly + if hasattr(ts_col.dtype, 'tz') and ts_col.dtype.tz is not None: + mask = ts_col >= pd.Timestamp(cutoff_aware) + else: + mask = ts_col >= pd.Timestamp(cutoff_naive) + filtered = df[mask].reset_index(drop=True) + if filtered.empty: + del self._tracks[mmsi] + evicted_mmsis.append(mmsi) + else: + self._tracks[mmsi] = filtered + + after_total = sum(len(v) for v in self._tracks.values()) + logger.info( + 'eviction complete: removed %d points, evicted %d mmsis (threshold=%dh, cutoff_bucket=%s)', + before_total - after_total, + len(evicted_mmsis), + hours, + cutoff_bucket, + ) + + def refresh_static_info(self) -> None: + """Fetch vessel static info (type, name, dimensions) from snpdb. + + Skips refresh if called within the last 60 minutes. + """ + now = datetime.now(timezone.utc) + if self._static_refreshed_at is not None: + elapsed_min = (now - self._static_refreshed_at).total_seconds() / 60 + if elapsed_min < _STATIC_REFRESH_INTERVAL_MIN: + logger.debug( + 'static info refresh skipped (%.1f min since last refresh)', + elapsed_min, + ) + return + + if not self._tracks: + logger.debug('no tracks in store, skipping static info refresh') + return + + from db import snpdb + + mmsi_list = list(self._tracks.keys()) + try: + info = snpdb.fetch_static_info(mmsi_list) + self._static_info.update(info) + self._static_refreshed_at = now + logger.info('static info refreshed: %d vessels', len(info)) + except Exception as e: + logger.error('fetch_static_info failed: %s', e) + + def refresh_permit_registry(self) -> None: + """Fetch permitted Chinese fishing vessel MMSIs from snpdb. + + Skips refresh if called within the last 30 minutes. + """ + now = datetime.now(timezone.utc) + if self._permit_refreshed_at is not None: + elapsed_min = (now - self._permit_refreshed_at).total_seconds() / 60 + if elapsed_min < _PERMIT_REFRESH_INTERVAL_MIN: + logger.debug( + 'permit registry refresh skipped (%.1f min since last refresh)', + elapsed_min, + ) + return + + from db import snpdb + + try: + mmsis = snpdb.fetch_permit_mmsis() + self._permit_set = set(mmsis) + self._permit_refreshed_at = now + logger.info('permit registry refreshed: %d permitted vessels', len(self._permit_set)) + except Exception as e: + logger.error('fetch_permit_mmsis failed: %s', e) + + # ------------------------------------------------------------------ + # Analysis target selection + # ------------------------------------------------------------------ + + def select_analysis_targets(self) -> pd.DataFrame: + """Build a combined DataFrame of Chinese vessel tracks with computed SOG/COG. + + Filters to MMSI starting with '412', computes SOG and COG from + consecutive lat/lon/timestamp pairs using the haversine formula, + and falls back to raw_sog where computed values are zero or NaN. + + Returns: + DataFrame with columns: mmsi, timestamp, lat, lon, sog, cog + """ + chinese_mmsis = [m for m in self._tracks if m.startswith(_CHINESE_MMSI_PREFIX)] + if not chinese_mmsis: + logger.info('no Chinese vessels (412*) found in store') + return pd.DataFrame(columns=['mmsi', 'timestamp', 'lat', 'lon', 'sog', 'cog']) + + frames = [self._tracks[m] for m in chinese_mmsis] + combined = pd.concat(frames, ignore_index=True) + + required_cols = {'mmsi', 'timestamp', 'lat', 'lon'} + missing = required_cols - set(combined.columns) + if missing: + logger.error('combined DataFrame missing required columns: %s', missing) + return pd.DataFrame(columns=['mmsi', 'timestamp', 'lat', 'lon', 'sog', 'cog']) + + result = _compute_sog_cog(combined) + + output_cols = ['mmsi', 'timestamp', 'lat', 'lon', 'sog', 'cog'] + available = [c for c in output_cols if c in result.columns] + return result[available].reset_index(drop=True) + + # ------------------------------------------------------------------ + # Lookup helpers + # ------------------------------------------------------------------ + + def is_permitted(self, mmsi: str) -> bool: + """Return True if the given MMSI is in the permitted Chinese fishing vessel registry.""" + return mmsi in self._permit_set + + def get_vessel_info(self, mmsi: str) -> dict: + """Return static vessel info dict for the given MMSI, or empty dict if not found.""" + return self._static_info.get(mmsi, {}) + + def get_all_latest_positions(self) -> dict[str, dict]: + """모든 선박의 최신 위치 반환. {mmsi: {lat, lon, sog, cog, timestamp, name}} + cog는 마지막 2점의 좌표로 bearing 계산.""" + import math + result: dict[str, dict] = {} + for mmsi, df in self._tracks.items(): + if df is None or len(df) == 0: + continue + last = df.iloc[-1] + info = self._static_info.get(mmsi, {}) + + # COG: 마지막 2점으로 bearing 계산 + cog = 0.0 + if len(df) >= 2: + prev = df.iloc[-2] + lat1 = math.radians(float(prev['lat'])) + lat2 = math.radians(float(last['lat'])) + dlon = math.radians(float(last['lon']) - float(prev['lon'])) + x = math.sin(dlon) * math.cos(lat2) + y = math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(dlon) + cog = (math.degrees(math.atan2(x, y)) + 360) % 360 + + result[mmsi] = { + 'lat': float(last['lat']), + 'lon': float(last['lon']), + 'sog': float(last.get('sog', 0) or last.get('raw_sog', 0) or 0), + 'cog': cog, + 'timestamp': last.get('timestamp'), + 'time_bucket': last.get('time_bucket'), + 'name': info.get('name', ''), + } + return result + + def get_vessel_tracks(self, mmsis: list[str], hours: int = 24) -> dict[str, list[dict]]: + """Return track points for given MMSIs within the specified hours window. + + Returns dict mapping mmsi to list of {ts, lat, lon, sog, cog} dicts, + sorted by timestamp ascending. + """ + import datetime as _dt + + now = datetime.now(timezone.utc) + cutoff_aware = now - _dt.timedelta(hours=hours) + cutoff_naive = cutoff_aware.replace(tzinfo=None) + + result: dict[str, list[dict]] = {} + for mmsi in mmsis: + df = self._tracks.get(mmsi) + if df is None or len(df) == 0: + continue + + ts_col = df['timestamp'] + if hasattr(ts_col.dtype, 'tz') and ts_col.dtype.tz is not None: + mask = ts_col >= pd.Timestamp(cutoff_aware) + else: + mask = ts_col >= pd.Timestamp(cutoff_naive) + + filtered = df[mask].sort_values('timestamp') + if filtered.empty: + continue + + # Compute SOG/COG for this vessel's track + if len(filtered) >= 2: + track_with_sog = _compute_sog_cog(filtered.copy()) + else: + track_with_sog = filtered.copy() + if 'sog' not in track_with_sog.columns: + track_with_sog['sog'] = track_with_sog.get('raw_sog', 0) + if 'cog' not in track_with_sog.columns: + track_with_sog['cog'] = 0 + + points = [] + for _, row in track_with_sog.iterrows(): + ts = row['timestamp'] + # Convert to epoch ms + if hasattr(ts, 'timestamp'): + epoch_ms = int(ts.timestamp() * 1000) + else: + epoch_ms = int(pd.Timestamp(ts).timestamp() * 1000) + + points.append({ + 'ts': epoch_ms, + 'lat': float(row['lat']), + 'lon': float(row['lon']), + 'sog': float(row.get('sog', 0) or 0), + 'cog': float(row.get('cog', 0) or 0), + }) + + if points: + result[mmsi] = points + + return result + + def get_chinese_mmsis(self) -> set: + """Return the set of all Chinese vessel MMSIs (412*) currently in the store.""" + return {m for m in self._tracks if m.startswith(_CHINESE_MMSI_PREFIX)} + + # ------------------------------------------------------------------ + # Properties + # ------------------------------------------------------------------ + + @property + def last_bucket(self) -> Optional[datetime]: + """Return the latest time bucket seen across all merged incremental batches.""" + return self._last_bucket + + # ------------------------------------------------------------------ + # Diagnostics + # ------------------------------------------------------------------ + + def stats(self) -> dict: + """Return store statistics for health/status reporting.""" + total_points = sum(len(v) for v in self._tracks.values()) + chinese_count = sum(1 for m in self._tracks if m.startswith(_CHINESE_MMSI_PREFIX)) + + # Rough memory estimate: each row ~200 bytes across columns + memory_mb = round((total_points * 200) / (1024 * 1024), 2) + + return { + 'vessels': len(self._tracks), + 'points': total_points, + 'memory_mb': memory_mb, + 'last_bucket': self._last_bucket.isoformat() if self._last_bucket else None, + 'targets': chinese_count, + 'permitted': len(self._permit_set), + } + + +# Module-level singleton +vessel_store = VesselStore() diff --git a/prediction/chat/__init__.py b/prediction/chat/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prediction/chat/cache.py b/prediction/chat/cache.py new file mode 100644 index 0000000..662594a --- /dev/null +++ b/prediction/chat/cache.py @@ -0,0 +1,90 @@ +"""Redis 캐시 유틸 — 분석 컨텍스트 + 대화 히스토리.""" + +import json +import logging +from typing import Optional + +import redis + +from config import settings + +logger = logging.getLogger(__name__) + +_redis: Optional[redis.Redis] = None + + +def _get_redis() -> redis.Redis: + global _redis + if _redis is None: + _redis = redis.Redis( + host=settings.REDIS_HOST, + port=settings.REDIS_PORT, + password=settings.REDIS_PASSWORD or None, + decode_responses=True, + socket_connect_timeout=3, + ) + return _redis + + +# ── 분석 컨텍스트 캐시 (전역, 5분 주기 갱신) ── + +CONTEXT_KEY = 'kcg:chat:context' +CONTEXT_TTL = 360 # 6분 (5분 주기 + 1분 버퍼) + + +def cache_analysis_context(context_dict: dict): + """스케줄러에서 분석 완료 후 호출 — Redis에 요약 데이터 캐싱.""" + try: + r = _get_redis() + r.setex(CONTEXT_KEY, CONTEXT_TTL, json.dumps(context_dict, ensure_ascii=False, default=str)) + logger.debug('cached analysis context (%d bytes)', len(json.dumps(context_dict))) + except Exception as e: + logger.warning('failed to cache analysis context: %s', e) + + +def get_cached_context() -> Optional[dict]: + """Redis에서 캐시된 분석 컨텍스트 조회.""" + try: + r = _get_redis() + data = r.get(CONTEXT_KEY) + return json.loads(data) if data else None + except Exception as e: + logger.warning('failed to read cached context: %s', e) + return None + + +# ── 대화 히스토리 (계정별, 24h TTL) ── + +HISTORY_TTL = 86400 # 24시간 +MAX_HISTORY = 50 + + +def save_chat_history(user_id: str, messages: list[dict]): + """대화 히스토리 저장 (최근 50개 메시지만 유지).""" + try: + r = _get_redis() + key = f'kcg:chat:history:{user_id}' + trimmed = messages[-MAX_HISTORY:] + r.setex(key, HISTORY_TTL, json.dumps(trimmed, ensure_ascii=False)) + except Exception as e: + logger.warning('failed to save chat history for %s: %s', user_id, e) + + +def load_chat_history(user_id: str) -> list[dict]: + """대화 히스토리 로드.""" + try: + r = _get_redis() + data = r.get(f'kcg:chat:history:{user_id}') + return json.loads(data) if data else [] + except Exception as e: + logger.warning('failed to load chat history for %s: %s', user_id, e) + return [] + + +def clear_chat_history(user_id: str): + """대화 히스토리 삭제.""" + try: + r = _get_redis() + r.delete(f'kcg:chat:history:{user_id}') + except Exception as e: + logger.warning('failed to clear chat history for %s: %s', user_id, e) diff --git a/prediction/chat/context_builder.py b/prediction/chat/context_builder.py new file mode 100644 index 0000000..953116d --- /dev/null +++ b/prediction/chat/context_builder.py @@ -0,0 +1,140 @@ +"""vessel_store + kcgdb 분석 데이터 + 도메인 지식을 기반으로 LLM 시스템 프롬프트를 구성.""" + +import logging +import re +from datetime import datetime, timezone + +from chat.cache import get_cached_context +from chat.domain_knowledge import build_compact_prompt + +logger = logging.getLogger(__name__) + + +def _build_realtime_context(ctx: dict) -> str: + """Redis 캐시 데이터로 실시간 현황 프롬프트 구성 (간소화).""" + stats = ctx.get('vessel_stats', {}) + risk = ctx.get('risk_distribution', {}) + now = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC') + + return f"""## 현황 ({now}) +전체 {stats.get('vessels', 0)}척, 중국 {stats.get('chinese', 0)}척, 분석완료 {stats.get('targets', 0)}척, 허가 {stats.get('permitted', 0)}/906척 +CRITICAL {risk.get('CRITICAL', 0)} / HIGH {risk.get('HIGH', 0)} / MEDIUM {risk.get('MEDIUM', 0)} / LOW {risk.get('LOW', 0)} +다크 {ctx.get('dark_count', 0)} / 스푸핑 {ctx.get('spoofing_count', 0)} / 환적 {ctx.get('transship_count', 0)} +영해 {risk.get('TERRITORIAL_SEA', 0)} / 접속 {risk.get('CONTIGUOUS_ZONE', 0)} / I {risk.get('ZONE_I', 0)} / II {risk.get('ZONE_II', 0)} / III {risk.get('ZONE_III', 0)} / IV {risk.get('ZONE_IV', 0)} / EEZ {risk.get('EEZ_OR_BEYOND', 0)} +(상세 데이터는 query_vessels 도구로 조회)""" + + +def _build_fallback_context() -> str: + """Redis 캐시가 없을 때 vessel_store + kcgdb에서 직접 구성.""" + try: + from cache.vessel_store import vessel_store + stats = vessel_store.stats() + + from db import kcgdb + summary = kcgdb.fetch_analysis_summary() + top_risk = kcgdb.fetch_recent_high_risk(10) + polygon_summary = kcgdb.fetch_polygon_summary() + + ctx = { + 'vessel_stats': stats, + 'risk_distribution': summary.get('risk_distribution', {}), + 'dark_count': summary.get('dark_count', 0), + 'spoofing_count': summary.get('spoofing_count', 0), + 'transship_count': summary.get('transship_count', 0), + 'top_risk_vessels': top_risk, + 'polygon_summary': polygon_summary, + } + + from chat.cache import cache_analysis_context + cache_analysis_context(ctx) + + return _build_realtime_context(ctx) + except Exception as e: + logger.error('fallback context build failed: %s', e) + return '(실시간 데이터를 불러올 수 없습니다. 일반 해양 감시 지식으로 답변합니다.)' + + +# ── RAG: 사용자 질문에서 MMSI를 추출하여 선박별 상세 컨텍스트 주입 ── + +_MMSI_PATTERN = re.compile(r'\b(\d{9})\b') + + +def _extract_mmsis(text: str) -> list[str]: + """사용자 메시지에서 9자리 MMSI 추출.""" + return _MMSI_PATTERN.findall(text) + + +def _build_vessel_detail(mmsi: str) -> str: + """특정 MMSI의 분석 결과를 상세 컨텍스트로 구성 (RAG).""" + try: + from cache.vessel_store import vessel_store + info = vessel_store.get_vessel_info(mmsi) + positions = vessel_store.get_all_latest_positions() + pos = positions.get(mmsi) + + from db import kcgdb + high_risk = kcgdb.fetch_recent_high_risk(100) + vessel_data = next((v for v in high_risk if v['mmsi'] == mmsi), None) + + if not vessel_data and not pos: + return f'\n(MMSI {mmsi}: 분석 데이터 없음)\n' + + lines = [f'\n## 선박 상세: {mmsi}'] + + if info: + name = info.get('name', 'N/A') + lines.append(f'- 선명: {name}') + + if pos: + lines.append(f"- 위치: {pos.get('lat', 'N/A')}°N, {pos.get('lon', 'N/A')}°E") + lines.append(f"- SOG: {pos.get('sog', 'N/A')} knots, COG: {pos.get('cog', 'N/A')}°") + + is_permitted = vessel_store.is_permitted(mmsi) + lines.append(f"- 허가 여부: {'허가어선' if is_permitted else '미허가/미등록'}") + + if vessel_data: + lines.append(f"- 위험도: {vessel_data.get('risk_score', 'N/A')}점 ({vessel_data.get('risk_level', 'N/A')})") + lines.append(f"- 수역: {vessel_data.get('zone', 'N/A')}") + lines.append(f"- 활동: {vessel_data.get('activity_state', 'N/A')}") + lines.append(f"- 다크베셀: {'Y' if vessel_data.get('is_dark') else 'N'}") + lines.append(f"- 환적 의심: {'Y' if vessel_data.get('is_transship') else 'N'}") + lines.append(f"- 스푸핑 점수: {vessel_data.get('spoofing_score', 0):.2f}") + + return '\n'.join(lines) + except Exception as e: + logger.warning('vessel detail build failed for %s: %s', mmsi, e) + return f'\n(MMSI {mmsi}: 상세 조회 실패)\n' + + +class MaritimeContextBuilder: + """도메인 지식 + 실시간 데이터 + 선박별 RAG를 결합하여 시스템 프롬프트 구성.""" + + def build_system_prompt(self, user_message: str = '') -> str: + """시스템 프롬프트 구성. + + 구조: + 1) 압축 도메인 지식 (~500토큰: 역할+핵심용어+도구목록) + 2) 실시간 현황 (Redis 캐시 → DB fallback) + 3) RAG: 사용자 질문에 포함된 MMSI의 선박별 상세 데이터 + + 상세 도메인 지식은 LLM이 get_knowledge 도구로 필요 시 조회. + """ + parts = [] + + # 1) 압축 도메인 지식 (~500토큰) + parts.append(build_compact_prompt()) + + # 2) 실시간 현황 + cached = get_cached_context() + if cached: + parts.append(_build_realtime_context(cached)) + else: + parts.append(_build_fallback_context()) + + # 3) RAG: MMSI 기반 선박 상세 + if user_message: + mmsis = _extract_mmsis(user_message) + for mmsi in mmsis[:3]: # 최대 3척 + parts.append(_build_vessel_detail(mmsi)) + + return '\n\n'.join(parts) diff --git a/prediction/chat/domain_knowledge.py b/prediction/chat/domain_knowledge.py new file mode 100644 index 0000000..991ffa8 --- /dev/null +++ b/prediction/chat/domain_knowledge.py @@ -0,0 +1,471 @@ +"""해양 감시 도메인 전문 지식 — LLM 시스템 프롬프트 보강용. + +수집 출처: +- 한중어업협정 (2001.6.30 발효, 한국민족문화대백과사전) +- 해양수산부 한중어업공동위원회 결과 공표 +- UNCLOS 해양법협약 (영해/접속수역/EEZ 기준) +- Global Fishing Watch 환적 탐지 기준 +- 해양경찰청 불법조업 단속 현황 +- MarineTraffic AIS/GNSS 스푸핑 가이드 +""" + +from config import settings + +# ── 역할 정의 ── +ROLE_DEFINITION = """당신은 대한민국 해양경찰청의 **해양상황 분석 AI 어시스턴트**입니다. +Python AI 분석 파이프라인(7단계 + 8개 알고리즘)의 실시간 결과를 기반으로, +해양 감시 전문가 수준의 분석과 조치 권고를 제공합니다. + +당신이 접근하는 데이터: +- 14,000척 이상의 AIS 실시간 위치 (24시간 슬라이딩 윈도우) +- 중국 어선(412* MMSI) 대상 AI 분석 결과 (28개 필드, 5분 주기 갱신) +- 선단/어구 그룹 폴리곤 (Shapely 기반, 5분 주기) +- 한중어업협정 허가어선 DB (906척 등록)""" + +# ── 해양 수역 법적 체계 ── +MARITIME_ZONES = """## 해양 수역 법적 체계 (UNCLOS + 국내법) + +| 수역 | 범위 | 법적 지위 | 단속 권한 | +|------|------|----------|----------| +| **영해** (TERRITORIAL_SEA) | 기선~12해리 | 완전한 주권 | 즉시 나포 가능 | +| **접속수역** (CONTIGUOUS_ZONE) | 12~24해리 | 관세·출입국 통제 | 정선·검색 가능 | +| **EEZ** (EEZ_OR_BEYOND) | 24~200해리 | 자원 주권적 권리 | 어업법 적용 | + +- 1해리 = 1,852m, 기선은 서해·남해 직선기선, 동해 통상기선 +- 서해는 한중 간 중간선이 200해리 미만이므로 EEZ 경계 미확정 +- 독도·울릉도·제주도는 각 섬 해안에서 12해리 + +### 특정어업수역 (한중어업협정) +- **수역 I~IV**: 한국 EEZ 내 중국 허가어선 조업 가능 구역 +- **잠정조치수역**: 약 83,000km², 한중 공동 관리 (북위 37°~32°11') +- **과도수역**: 잠정조치수역 좌우 20해리 (2005.6.30부터 연차 감축) +- 수역 외 조업 = **불법** (무허가 조업)""" + +# ── 한중어업협정 상세 ── +FISHING_AGREEMENT = """## 한중어업협정 상세 (2001.6.30 발효) + +### 허가어선 현황 (총 906척) +| 어구코드 | 어구명 | 허가 수 | 비고 | +|---------|--------|---------|------| +| PT | 쌍끌이 저인망 | 323쌍 (646척) | 2척 1조 운영 | +| GN | 유자망 (길그물) | 200척 | | +| PS | 위망 (선망) | 16척 | | +| OT | 기선인망 (외끌이) | 13척 | 1척 단독 | +| FC | 운반선 | 31척 | 어획물 운반 전용 | + +### 휴어기 (조업 금지 기간) +| 어구 | 기간 | 비고 | +|------|------|------| +| PT (저인망) | 4/16 ~ 10/15 (6개월) | 산란기 보호 | +| OT (외끌이) | 4/16 ~ 10/15 (6개월) | PT와 동일 | +| GN (유자망) | 6/2 ~ 8/31 (3개월) | 하절기 | + +### 어구별 조업 속도 기준 (UCAF 판정 참조) +| 어구 | 조업 속도 | 항행 속도 | 판별 기준 | +|------|----------|----------|----------| +| PT/OT (저인망) | 2.5~4.5 knots | 6+ knots | 그물 끌기 중 | +| GN (유자망) | 0.5~2.0 knots | 5+ knots | 그물 투망/양망 | +| PS (위망) | 1.0~3.0 knots | 7+ knots | 그물 투·양망 | +| TRAP (통발) | 0.5~2.0 knots | 5+ knots | 통발 투·양 | +| LONGLINE (연승) | 1.0~3.0 knots | 6+ knots | 줄 투·양승 | + +### 2024.5.1 시행 신규 합의사항 +- 한국 EEZ 내 모든 중국어선 **AIS 의무 장착·가동** +- 자망어선: 어구마다 부표/깃대 설치 의무 (30×20cm 표지) +- 위반 시: 허가 취소 + 벌금 + 3년 이내 재허가 불가""" + +# ── 알고리즘 해석 가이드 ── +ALGORITHM_GUIDE = """## AI 분석 알고리즘 해석 가이드 (8개 알고리즘) + +### ALGO 01: 위치 분석 (location) +- `zone`: 선박이 현재 위치한 해양 수역 + - TERRITORIAL_SEA (영해): **즉각 주의** — 외국어선 영해 침범 + - CONTIGUOUS_ZONE (접속수역): 감시 강화 필요 + - ZONE_I~IV (특정어업수역): 허가 여부 확인 필수 + - EEZ_OR_BEYOND: 일반 감시 +- `dist_to_baseline_nm`: 기선까지 거리 (NM) + - <12NM: 영해 내 — 최고 위험 + - 12~24NM: 접속수역 — 높은 경계 + - >24NM: EEZ 이원 + +### ALGO 02: 활동 패턴 (activity) +- `activity_state`: STATIONARY(정박) / FISHING(조업) / SAILING(항행) + - SOG ≤1.0 → STATIONARY + - SOG 1.0~5.0 → FISHING (어구에 따라 다름) + - SOG >5.0 → SAILING +- `ucaf_score` (0~1): 어구별 조업속도 매칭률 + - >0.7: 높은 확률로 해당 어구 사용 중 + - 0.3~0.7: 불확실 + - <0.3: 비매칭 (다른 어구이거나 항행 중) +- `ucft_score` (0~1): 조업-항행 구분 신뢰도 + - >0.8: 명확히 조업/항행 구분됨 + - <0.5: 패턴 불명확 + +### ALGO 03: 다크베셀 (dark_vessel) +- `is_dark`: AIS 신호 의도적 차단 의심 +- `gap_duration_min`: AIS 최장 공백 시간 (분) + - 30~60분: 경미한 갭 (기술적 원인 가능) + - 60~180분: 의심 수준 — 의도적 차단 가능성 + - 180분+: **높은 의심** — 불법조업 은폐 목적 추정 +- 참고: 2024.5.1부터 한국 EEZ 내 중국어선 AIS 의무화 + - AIS 차단 자체가 **협정 위반** + +### ALGO 04: GPS 스푸핑 (gps_spoofing) +- `spoofing_score` (0~1): 종합 스푸핑 의심도 + - >0.7: **높은 스푸핑 의심** — 위치 조작 추정 + - 0.3~0.7: 중간 의심 + - <0.3: 정상 +- `bd09_offset_m`: 바이두(BD-09) 좌표계 오프셋 (미터) + - 중국 선박 특유의 GPS 좌표 변환 오차 + - 412* MMSI는 기본 제외 (중국 위성항법 특성) +- `speed_jump_count`: 비현실적 속도 점프 횟수 + - 0: 정상 + - 1~2: 일시적 GPS 오류 가능 + - 3+: **스푸핑 강력 의심** — 위치 은폐 목적 + +### ALGO 05-06: 선단 분석 (fleet/cluster) +- `cluster_id`: 선단 그룹 ID (-1 = 미소속) +- `cluster_size`: 같은 선단 소속 선박 수 + - 2~5: 소규모 선단 + - 5~15: 중규모 선단 (일반적) + - 15+: 대규모 선단 — 조직적 조업 +- `fleet_role`: 선단 내 역할 + - LEADER: 선단 지휘선 (이동 경로 결정) + - FOLLOWER: 추종선 (리더 경로 따름) + - PROCESS_VESSEL: 가공선 (어획물 처리) + - FUEL_VESSEL: 급유선 + - NOISE: 미분류 + +### ALGO 07: 위험도 종합 (risk_score) +- 0~100점 종합 점수, 4개 영역 합산: + - **위치** (최대 40점): 영해 내=40, 접속수역=10 + - **조업 행위** (최대 30점): 영해 내 조업=20, 기타 조업=5, U-turn 패턴=10 + - **AIS 조작** (최대 35점): 순간이동=20, 장시간 갭=15, 단시간 갭=5 + - **허가 이력** (최대 20점): 미허가 어선=20 +- 등급: CRITICAL(≥70) / HIGH(≥50) / MEDIUM(≥30) / LOW(<30) + - 프론트엔드 표시: WATCH=HIGH, MONITOR=MEDIUM, NORMAL=LOW + +### ALGO 08: 환적 의심 (transshipment) +- `is_transship_suspect`: 해상 환적 의심 여부 +- `transship_pair_mmsi`: 상대 선박 MMSI +- `transship_duration_min`: 접촉 지속 시간 (분) +- 탐지 기준 (Global Fishing Watch 참조): + - 두 선박 500m 이내 접근 + - 속도 2노트 미만 + - 2시간 이상 지속 + - 정박지에서 10km 이상 떨어진 해상""" + +# ── 대응 절차 가이드 ── +RESPONSE_GUIDE = """## 위험도별 대응 절차 권고 + +### CRITICAL (≥70점) — 즉각 대응 +1. 해당 선박 위치·항적 실시간 추적 +2. 인근 경비함정 긴급 출동 지시 +3. VHF 채널 16 경고방송 (한국어+중국어) +4. 정선명령 → 승선검색 → 나포 +5. 상급기관 즉시 보고 + +### WATCH/HIGH (≥50점) — 강화 감시 +1. 감시 우선순위 상향 +2. 항적 지속 추적 (15분 간격) +3. 인근 해역 순찰 함정에 정보 공유 +4. 위험도 변화 시 CRITICAL 대응 전환 준비 + +### MONITOR/MEDIUM (≥30점) — 일반 감시 +1. 정기 모니터링 대상 등록 +2. 1시간 간격 위치·상태 확인 +3. 패턴 변화(조업→이동, 군집화 등) 시 알림 + +### NORMAL/LOW (<30점) — 기본 감시 +1. 시스템 자동 모니터링 +2. 일일 요약 보고에 포함 + +### 불법조업 유형별 조치 +| 유형 | 해당 알고리즘 | 즉시 조치 | +|------|-------------|----------| +| 영해 침범 | zone=TERRITORIAL_SEA | 나포 (영해법 위반) | +| 무허가 조업 | is_permitted=False + zone=ZONE_* | 정선·검색 | +| AIS 차단 | is_dark=True, gap>60min | 위치 추적 + 출동 | +| GPS 위치조작 | spoofing_score>0.7 | 실제 위치 특정 후 출동 | +| 불법 환적 | is_transship_suspect=True | 쌍방 정선·검색 | +| 휴어기 위반 | 어구+날짜 크로스체크 | 정선·어구 확인 |""" + +# ── 응답 규칙 ── +RESPONSE_RULES = """## 응답 규칙 +- 한국어로 답변 +- 데이터 기반 분석 (추측 최소화, 근거 수치 명시) +- 구체적 MMSI, 좌표, 점수, 수역명 제시 +- 불법조업 의심 시 **법적 근거 + 알고리즘 근거 + 조치 권고** 3가지를 함께 제시 +- 위험도 등급 언급 시 점수도 함께 표기 (예: "CRITICAL(82점)") +- 마크다운 형식으로 구조화 (표, 목록, 강조 활용) +- "~일 수 있습니다" 대신 데이터에 근거한 단정적 분석 제공 +- 선박 특정 질문 시 해당 선박의 모든 알고리즘 결과를 종합 제시""" + + +# ── DB 스키마 + Tool Calling 가이드 ── +DB_SCHEMA_AND_TOOLS = """## 데이터 조회 도구 (Tool Calling) + +사용자 질문에 답하기 위해 실시간 DB 조회가 필요하면, 다음 도구를 호출할 수 있습니다. +도구 호출 시 반드시 아래 형식을 사용하세요: + +### 사용 가능한 도구 + +#### 1. query_vessels — 선박 분석 결과 조회 +조건에 맞는 선박 목록을 조회합니다. +```json +{"tool": "query_vessels", "params": {"zone": "ZONE_I", "activity": "FISHING", "risk_level": "CRITICAL", "is_dark": true, "limit": 20}} +``` +- 모든 파라미터는 선택적 (조합 가능) +- zone 값: TERRITORIAL_SEA, CONTIGUOUS_ZONE, ZONE_I, ZONE_II, ZONE_III, ZONE_IV, EEZ_OR_BEYOND +- activity 값: STATIONARY, FISHING, SAILING +- risk_level 값: CRITICAL, HIGH, MEDIUM, LOW +- is_dark: true/false +- is_transship: true/false +- vessel_type 값: TRAWL, PURSE, LONGLINE, TRAP, UNKNOWN +- limit: 최대 반환 수 (기본 20) + +#### 2. query_vessel_detail — 특정 선박 상세 +```json +{"tool": "query_vessel_detail", "params": {"mmsi": "412236758"}} +``` + +#### 3. query_fleet_group — 선단/어구 그룹 조회 +```json +{"tool": "query_fleet_group", "params": {"group_type": "FLEET", "zone_id": "ZONE_I"}} +``` +- group_type: FLEET, GEAR_IN_ZONE, GEAR_OUT_ZONE + +#### 4. query_vessel_history — 선박 항적 이력 (snpdb daily) +```json +{"tool": "query_vessel_history", "params": {"mmsi": "412236758", "days": 7}} +``` +- 일별 이동거리, 평균/최대 속도, AIS 포인트 수 +- 최대 30일까지 조회 + +#### 5. query_vessel_static — 선박 정적정보 + 변경 이력 (snpdb) +```json +{"tool": "query_vessel_static", "params": {"mmsi": "412236758", "limit": 10}} +``` +- 최신 선명/선종/제원/목적지/상태 + 변경 이력 감지 +- 선명·목적지·상태 변경 시점과 이전/이후 값 표시 + +### DB 스키마 참조 (쿼리 조합 시 참고) + +#### kcg.vessel_analysis_results (5분 주기 갱신, 48시간 보존) +| 컬럼 | 타입 | 값 예시 | +|------|------|---------| +| mmsi | varchar | '412236758' (중국=412*) | +| timestamp | timestamptz | 분석 시점 | +| vessel_type | varchar | TRAWL/PURSE/LONGLINE/TRAP/UNKNOWN | +| zone | varchar | TERRITORIAL_SEA/CONTIGUOUS_ZONE/ZONE_I~IV/EEZ_OR_BEYOND | +| dist_to_baseline_nm | float | 기선까지 거리(NM) | +| activity_state | varchar | STATIONARY/FISHING/SAILING | +| ucaf_score | float | 0~1 (어구 매칭률) | +| is_dark | boolean | AIS 차단 의심 | +| gap_duration_min | int | AIS 최장 공백(분) | +| spoofing_score | float | 0~1 | +| risk_score | int | 0~100 | +| risk_level | varchar | CRITICAL(≥70)/HIGH(≥50)/MEDIUM(≥30)/LOW(<30) | +| cluster_id | int | 선단 ID (-1=미소속) | +| cluster_size | int | 선단 규모 | +| fleet_role | varchar | LEADER/FOLLOWER/PROCESS_VESSEL/FUEL_VESSEL/NOISE | +| is_transship_suspect | boolean | 환적 의심 | +| transship_pair_mmsi | varchar | 상대 선박 | +| analyzed_at | timestamptz | WHERE 조건에 사용 (> NOW() - '1 hour') | +- PK: (mmsi, timestamp), 인덱스: mmsi, timestamp DESC + +#### kcg.fleet_vessels (허가어선 등록부) +| 컬럼 | 타입 | 설명 | +|------|------|------| +| mmsi | varchar | 매칭된 MMSI (NULL 가능) | +| permit_no | varchar | 허가번호 | +| name_cn | text | 중국어 선명 | +| gear_code | varchar | PT/GN/PS/OT/FC | +| company_id | int | → fleet_companies.id | +| tonnage | int | 톤수 | + +#### kcg.group_polygon_snapshots (선단/어구 폴리곤, 5분 APPEND, 7일 보존) +| 컬럼 | 타입 | 설명 | +|------|------|------| +| group_type | varchar | FLEET/GEAR_IN_ZONE/GEAR_OUT_ZONE | +| group_key | varchar | 그룹 식별자 | +| group_label | text | 표시 라벨 | +| snapshot_time | timestamptz | 스냅샷 시점 | +| member_count | int | 소속 선박 수 | +| zone_id | varchar | 수역 ID | +| members | jsonb | [{mmsi, name, lat, lon, sog, cog, ...}] | + +### snpdb 테이블 상세 (signal 스키마, 읽기 전용) + +#### signal.t_vessel_tracks_5min — 실시간 항적 (5분 집계) +| 컬럼 | 타입 | 설명 | +|------|------|------| +| mmsi | varchar | 선박 ID | +| time_bucket | timestamp | 5분 버킷 시점 | +| track_geom | LineStringM | 타임스탬프 포함 궤적 | +| distance_nm | numeric | 이동 거리(NM) | +| avg_speed | numeric | 평균 속도(knots) | +| max_speed | numeric | 최대 속도(knots) | +| point_count | int | AIS 포인트 수 | +| start_position | jsonb | {lat, lon, sog, cog, timestamp} | +| end_position | jsonb | {lat, lon, sog, cog, timestamp} | +- PK: (mmsi, time_bucket), 인덱스: mmsi, time_bucket +- **일별 파티셔닝**: t_vessel_tracks_5min_YYMMDD (예: _260326 = 2026-03-26) +- 하루 약 850만 건, vessel_store에 24시간 인메모리 캐시 +- **활용**: 최근 수 시간 ~ 24시간 내 세밀한 이동 패턴 분석 + +#### signal.t_vessel_tracks_hourly — 시간별 항적 집계 +| 컬럼 | 타입 | 설명 | +|------|------|------| +| mmsi | varchar | 선박 ID | +| time_bucket | timestamp | 1시간 버킷 | +| track_geom | LineStringM | 시간별 궤적 | +| distance_nm | numeric | 시간당 이동 거리 | +| avg_speed | numeric | 평균 속도 | +| max_speed | numeric | 최대 속도 | +| point_count | int | AIS 포인트 수 | +| start_position | jsonb | 시작 위치 | +| end_position | jsonb | 종료 위치 | +- **월별 파티셔닝**: t_vessel_tracks_hourly_YYYY_MM (예: _2026_03) +- 월 약 1.2억 건 +- **활용**: 수일~수주 단위 이동 경로 추적, 패턴 비교 + +#### signal.t_vessel_tracks_daily — 일별 항적 요약 +| 컬럼 | 타입 | 설명 | +|------|------|------| +| mmsi | varchar | 선박 ID | +| time_bucket | date | 날짜 | +| track_geom | LineStringM | 하루 궤적 | +| distance_nm | numeric | 일일 이동 거리(NM) | +| avg_speed | numeric | 일 평균 속도 | +| max_speed | numeric | 일 최대 속도 | +| point_count | int | AIS 포인트 수 | +| operating_hours | numeric | 운항 시간 | +| port_visits | jsonb | 입출항 기록 | +| start_position | jsonb | 일 시작 위치 | +| end_position | jsonb | 일 종료 위치 | +- **월별 파티셔닝**: t_vessel_tracks_daily_YYYY_MM (예: _2026_03) +- 월 약 800만 건, **2015년 8월~현재** 11년+ 이력 +- **활용**: 장기 행동 패턴, 계절별 어장 이동, 기간 비교 분석 + +#### signal.t_vessel_static — 선박 정적정보 (1시간 주기 스냅샷) +| 컬럼 | 타입 | 설명 | 값 예시 | +|------|------|------|---------| +| mmsi | varchar | 선박 ID | '412236758' | +| time_bucket | timestamptz | 스냅샷 시점 (1시간 간격) | | +| imo | bigint | IMO 번호 | | +| name | varchar | 선명 (AIS 브로드캐스트) | 'LU_RONG_YU_55759' | +| callsign | varchar | 호출부호 | | +| vessel_type | varchar | 선종 | Cargo/Tanker/Vessel/Fishing/N/A 등 | +| extra_info | varchar | 추가 정보 | | +| length | int | 선장(m) | | +| width | int | 선폭(m) | | +| draught | float | 흘수(m) | | +| destination | varchar | 목적지 (AIS 입력) | 'PU TIAN' | +| eta | timestamptz | 도착 예정 시각 | | +| status | varchar | 항해 상태 | Under way using engine/Moored/Anchored/Engaged in fishing | +| class_type | varchar | AIS 클래스 | A/B | +- PK: (mmsi, time_bucket) +- **변경 이력 보존**: 동일 MMSI라도 1시간마다 스냅샷 저장. name, destination, status 등이 변경되면 히스토리로 추적 가능 +- **활용 예시**: + - 선명 변경 이력 추적 (위장/은폐 탐지) + - 목적지(destination) 변경 패턴 분석 + - AIS 상태(status) 시계열 — 'Engaged in fishing' ↔ 'Under way' 전환 빈도 + - 선박 제원(length/width/draught) 불일치 탐지 + +### snpdb 테이블 활용 가이드 + +| 분석 목적 | 사용 테이블 | 조회 범위 | 쿼리 팁 | +|----------|-----------|----------|---------| +| **실시간 위치 추적** | 5min (오늘 파티션) | 최근 수 시간 | `_YYMMDD` 파티션 직접 지정 | +| **최근 항적 패턴** | 5min | 최근 24h | vessel_store 인메모리 캐시 우선 | +| **수일간 이동 경로** | hourly | 최근 7일 | `_YYYY_MM` 월 파티션 | +| **장기 행동 패턴** | daily | 수개월~수년 | 월 파티션, distance_nm 집계 | +| **선명/목적지 변경** | static | 변경 이력 | mmsi 기준 time_bucket DESC | +| **선박 제원 확인** | static | 최신 1건 | MAX(time_bucket) | +| **AIS 상태 시계열** | static | 최근 수일 | status 변화 패턴 | +| **계절 조업 패턴** | daily | 연 단위 | 월별 distance_nm, avg_speed 비교 | + +### 파티션 테이블 쿼리 시 주의 +- 5min: `signal.t_vessel_tracks_5min_YYMMDD` (날짜 6자리) +- hourly: `signal.t_vessel_tracks_hourly_YYYY_MM` (연_월) +- daily: `signal.t_vessel_tracks_daily_YYYY_MM` (연_월) +- **부모 테이블 직접 조회 가능** (PostgreSQL이 파티션 프루닝 수행) +- 대량 조회 시 파티션 직접 지정이 성능에 유리 + +### 데이터 흐름 +``` +snpdb (AIS 원본 항적) → vessel_store (인메모리 24h) → 7단계 파이프라인 + → kcgdb.vessel_analysis_results (분석 결과, 48h 보존) + → kcgdb.group_polygon_snapshots (선단/어구 폴리곤, 7일 보존) + → Redis (채팅 컨텍스트 캐시, 6분 TTL) +``` + +### 도구 호출 규칙 +- 답변에 필요한 구체적 선박 목록이 시스템 프롬프트에 없으면 도구를 호출하세요 +- 도구 호출 결과를 받은 후, 그 데이터를 기반으로 답변하세요 +- 한 번에 최대 2개 도구 호출 가능 +- 집계 데이터(몇 척인지)는 이미 시스템 프롬프트에 있으므로 도구 불필요 +- 대부분의 질문은 kcgdb로 충분 — snpdb 직접 조회는 특수한 항적 분석에만 사용""" + +DB_SCHEMA_AND_TOOLS = DB_SCHEMA_AND_TOOLS.replace('kcg.', f'{settings.KCGDB_SCHEMA}.') + + +# ── 지식 섹션 레지스트리 (키워드 → 상세 텍스트) ── +KNOWLEDGE_SECTIONS: dict[str, str] = { + 'maritime_zones': MARITIME_ZONES, + 'fishing_agreement': FISHING_AGREEMENT, + 'algorithm_guide': ALGORITHM_GUIDE, + 'response_guide': RESPONSE_GUIDE, + 'db_schema': DB_SCHEMA_AND_TOOLS, +} + + +def get_knowledge_section(key: str) -> str: + """키워드로 특정 도메인 지식 섹션을 반환.""" + return KNOWLEDGE_SECTIONS.get(key, f'(알 수 없는 지식 키: {key})') + + +# ── 압축 시스템 프롬프트 (항상 포함, ~500토큰) ── +COMPACT_SYSTEM_PROMPT = """당신은 대한민국 해양경찰청의 해양상황 분석 AI 어시스턴트입니다. +14,000척 AIS 실시간 모니터링 + AI 분석 파이프라인(8개 알고리즘) 결과를 기반으로 답변합니다. + +핵심 용어: +- 수역: 영해(TERRITORIAL_SEA, 12NM이내), 접속수역(CONTIGUOUS_ZONE, 12~24NM), 특정어업수역(ZONE_I~IV), EEZ +- 위험도: CRITICAL(≥70) / HIGH/WATCH(≥50) / MEDIUM/MONITOR(≥30) / LOW/NORMAL(<30) +- 다크베셀: AIS 의도적 차단 (gap_duration_min), 2024.5.1부터 AIS 의무화 +- 허가어선: 906척 등록 (PT 저인망 323쌍, GN 유자망 200, PS 위망 16, OT 외끌이 13, FC 운반 31) +- 휴어기: PT/OT 4/16~10/15, GN 6/2~8/31 + +도구를 호출하여 데이터를 조회하거나 상세 지식에 접근할 수 있습니다: +- query_vessels: 조건별 선박 목록 조회 (zone, activity, risk_level, is_dark, vessel_type) +- query_vessel_detail: MMSI별 상세 분석 결과 +- query_fleet_group: 선단/어구 그룹 조회 +- query_vessel_history: 일별 항적 이력 (snpdb, 최대 30일) +- query_vessel_static: 선박 정적정보 + 변경 이력 (snpdb) +- get_knowledge: 상세 도메인 지식 조회 (키: maritime_zones, fishing_agreement, algorithm_guide, response_guide, db_schema) + +도구 호출 형식: +```json +{"tool": "도구명", "params": {"key": "value"}} +``` + +응답 규칙: 한국어, 데이터 기반, 구체적 수치 명시, 마크다운 형식, 불법 의심 시 근거+조치 권고""" + + +def build_domain_knowledge() -> str: + """전체 도메인 지식 반환 (레거시 호환용).""" + return '\n\n'.join([ + ROLE_DEFINITION, + MARITIME_ZONES, + FISHING_AGREEMENT, + ALGORITHM_GUIDE, + RESPONSE_GUIDE, + RESPONSE_RULES, + DB_SCHEMA_AND_TOOLS, + ]) + + +def build_compact_prompt() -> str: + """압축 시스템 프롬프트 반환 (~500토큰).""" + return COMPACT_SYSTEM_PROMPT diff --git a/prediction/chat/router.py b/prediction/chat/router.py new file mode 100644 index 0000000..93542f6 --- /dev/null +++ b/prediction/chat/router.py @@ -0,0 +1,236 @@ +"""AI 해양분석 채팅 엔드포인트 — 사전 쿼리 + SSE 스트리밍 + Tool Calling.""" + +import json +import logging + +import httpx +from fastapi import APIRouter +from fastapi.responses import StreamingResponse +from pydantic import BaseModel + +from chat.cache import load_chat_history, save_chat_history, clear_chat_history +from chat.context_builder import MaritimeContextBuilder +from chat.tools import detect_prequery, execute_prequery, parse_tool_calls, execute_tool_call +from config import settings + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix='/api/v1/chat', tags=['chat']) + + +class ChatRequest(BaseModel): + message: str + user_id: str = 'anonymous' + stream: bool = True + + +class ChatResponse(BaseModel): + role: str = 'assistant' + content: str + + +@router.post('') +async def chat(req: ChatRequest): + """해양분석 채팅 — 사전 쿼리 + 분석 컨텍스트 + Ollama SSE 스트리밍.""" + history = load_chat_history(req.user_id) + + builder = MaritimeContextBuilder() + system_prompt = builder.build_system_prompt(user_message=req.message) + + # ── 사전 쿼리: 키워드 패턴 매칭으로 DB 조회 후 컨텍스트 보강 ── + prequery_params = detect_prequery(req.message) + prequery_result = '' + if prequery_params: + prequery_result = execute_prequery(prequery_params) + logger.info('prequery: params=%s, results=%d chars', prequery_params, len(prequery_result)) + + # 시스템 프롬프트에 사전 쿼리 결과 추가 + if prequery_result: + system_prompt += '\n\n' + prequery_result + + messages = [ + {'role': 'system', 'content': system_prompt}, + *history[-10:], + {'role': 'user', 'content': req.message}, + ] + + ollama_payload = { + 'model': settings.OLLAMA_MODEL, + 'messages': messages, + 'stream': req.stream, + 'options': { + 'temperature': 0.3, + 'num_predict': 1024, + 'num_ctx': 2048, + }, + } + + if req.stream: + return StreamingResponse( + _stream_with_tools(ollama_payload, req.user_id, history, req.message), + media_type='text/event-stream', + headers={ + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'X-Accel-Buffering': 'no', + }, + ) + + return await _call_with_tools(ollama_payload, req.user_id, history, req.message) + + +async def _stream_with_tools(payload: dict, user_id: str, history: list[dict], user_message: str): + """SSE 스트리밍 — 1차 응답 후 Tool Call 감지 시 2차 호출.""" + accumulated = '' + try: + async with httpx.AsyncClient(timeout=httpx.Timeout(settings.OLLAMA_TIMEOUT_SEC)) as client: + # 1차 LLM 호출 + async with client.stream( + 'POST', + f'{settings.OLLAMA_BASE_URL}/api/chat', + json=payload, + ) as response: + async for line in response.aiter_lines(): + if not line: + continue + try: + chunk = json.loads(line) + content = chunk.get('message', {}).get('content', '') + done = chunk.get('done', False) + accumulated += content + + sse_data = json.dumps({ + 'content': content, + 'done': False, # 아직 done 보내지 않음 (tool call 가능) + }, ensure_ascii=False) + yield f'data: {sse_data}\n\n' + + if done: + break + except json.JSONDecodeError: + continue + + # Tool Call 감지 + tool_calls = parse_tool_calls(accumulated) + if tool_calls: + # Tool 실행 + tool_results = [] + for tc in tool_calls: + result = execute_tool_call(tc) + tool_results.append(result) + logger.info('tool call: %s → %d chars', tc.get('tool'), len(result)) + + tool_context = '\n'.join(tool_results) + + # 2차 LLM 호출 (tool 결과 포함) + payload['messages'].append({'role': 'assistant', 'content': accumulated}) + payload['messages'].append({ + 'role': 'user', + 'content': f'도구 조회 결과입니다. 이 데이터를 기반으로 사용자 질문에 답변하세요:\n{tool_context}', + }) + + # 구분자 전송 + separator = json.dumps({'content': '\n\n---\n_데이터 조회 완료. 분석 결과:_\n\n', 'done': False}, ensure_ascii=False) + yield f'data: {separator}\n\n' + + accumulated_2 = '' + async with client.stream( + 'POST', + f'{settings.OLLAMA_BASE_URL}/api/chat', + json=payload, + ) as response2: + async for line in response2.aiter_lines(): + if not line: + continue + try: + chunk = json.loads(line) + content = chunk.get('message', {}).get('content', '') + done = chunk.get('done', False) + accumulated_2 += content + + sse_data = json.dumps({ + 'content': content, + 'done': done, + }, ensure_ascii=False) + yield f'data: {sse_data}\n\n' + + if done: + break + except json.JSONDecodeError: + continue + + # 히스토리에는 최종 답변만 저장 + accumulated = accumulated_2 or accumulated + + except httpx.TimeoutException: + err_msg = json.dumps({'content': '\n\n[응답 시간 초과]', 'done': True}) + yield f'data: {err_msg}\n\n' + except Exception as e: + logger.error('ollama stream error: %s', e) + err_msg = json.dumps({'content': f'[오류: {e}]', 'done': True}) + yield f'data: {err_msg}\n\n' + + if accumulated: + updated = history + [ + {'role': 'user', 'content': user_message}, + {'role': 'assistant', 'content': accumulated}, + ] + save_chat_history(user_id, updated) + + yield 'data: [DONE]\n\n' + + +async def _call_with_tools( + payload: dict, user_id: str, history: list[dict], user_message: str, +) -> ChatResponse: + """비스트리밍 — Tool Calling 포함.""" + try: + async with httpx.AsyncClient(timeout=httpx.Timeout(settings.OLLAMA_TIMEOUT_SEC)) as client: + # 1차 호출 + response = await client.post( + f'{settings.OLLAMA_BASE_URL}/api/chat', + json=payload, + ) + data = response.json() + content = data.get('message', {}).get('content', '') + + # Tool Call 감지 + tool_calls = parse_tool_calls(content) + if tool_calls: + tool_results = [execute_tool_call(tc) for tc in tool_calls] + tool_context = '\n'.join(tool_results) + + payload['messages'].append({'role': 'assistant', 'content': content}) + payload['messages'].append({ + 'role': 'user', + 'content': f'도구 조회 결과입니다. 이 데이터를 기반으로 답변하세요:\n{tool_context}', + }) + + response2 = await client.post( + f'{settings.OLLAMA_BASE_URL}/api/chat', + json=payload, + ) + data2 = response2.json() + content = data2.get('message', {}).get('content', content) + + updated = history + [ + {'role': 'user', 'content': user_message}, + {'role': 'assistant', 'content': content}, + ] + save_chat_history(user_id, updated) + + return ChatResponse(content=content) + except Exception as e: + logger.error('ollama sync error: %s', e) + return ChatResponse(content=f'오류: AI 서버 연결 실패 ({e})') + + +@router.get('/history') +async def get_history(user_id: str = 'anonymous'): + return load_chat_history(user_id) + + +@router.delete('/history') +async def delete_history(user_id: str = 'anonymous'): + clear_chat_history(user_id) + return {'ok': True} diff --git a/prediction/chat/tools.py b/prediction/chat/tools.py new file mode 100644 index 0000000..dc05fb7 --- /dev/null +++ b/prediction/chat/tools.py @@ -0,0 +1,420 @@ +"""LLM Tool Calling 실행기 — 사전 쿼리 + 동적 DB 조회.""" + +import json +import logging +import re +from typing import Optional + +from config import qualified_table + +logger = logging.getLogger(__name__) +VESSEL_ANALYSIS_RESULTS = qualified_table('vessel_analysis_results') +FLEET_VESSELS = qualified_table('fleet_vessels') +GROUP_POLYGON_SNAPSHOTS = qualified_table('group_polygon_snapshots') +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +CORRELATION_PARAM_MODELS = qualified_table('correlation_param_models') + +# ── 사전 쿼리 패턴 (키워드 기반, 1회 왕복으로 해결) ── + +_ZONE_MAP = { + '수역1': 'ZONE_I', '수역 1': 'ZONE_I', '수역I': 'ZONE_I', 'ZONE_I': 'ZONE_I', '수역i': 'ZONE_I', + '수역2': 'ZONE_II', '수역 2': 'ZONE_II', '수역II': 'ZONE_II', 'ZONE_II': 'ZONE_II', + '수역3': 'ZONE_III', '수역 3': 'ZONE_III', '수역III': 'ZONE_III', 'ZONE_III': 'ZONE_III', + '수역4': 'ZONE_IV', '수역 4': 'ZONE_IV', '수역IV': 'ZONE_IV', 'ZONE_IV': 'ZONE_IV', + '영해': 'TERRITORIAL_SEA', '접속수역': 'CONTIGUOUS_ZONE', +} + +_ACTIVITY_MAP = { + '조업': 'FISHING', '어로': 'FISHING', '조업중': 'FISHING', '조업활동': 'FISHING', + '정박': 'STATIONARY', '정지': 'STATIONARY', '대기': 'STATIONARY', + '항행': 'SAILING', '이동': 'SAILING', '항해': 'SAILING', +} + +_RISK_MAP = { + '크리티컬': 'CRITICAL', 'critical': 'CRITICAL', '긴급': 'CRITICAL', + '워치': 'HIGH', 'watch': 'HIGH', '경고': 'HIGH', '고위험': 'HIGH', + '모니터': 'MEDIUM', 'monitor': 'MEDIUM', '주의': 'MEDIUM', + '위험': None, # 위험 선박 → CRITICAL+HIGH +} + +_DARK_KEYWORDS = ['다크', '다크베셀', 'dark', 'ais 차단', 'ais차단', '신호차단'] +_TRANSSHIP_KEYWORDS = ['환적', 'transshipment', '전재'] +_SPOOF_KEYWORDS = ['스푸핑', 'spoofing', 'gps 조작', 'gps조작', '위치조작'] + + +def detect_prequery(message: str) -> Optional[dict]: + """사용자 메시지에서 사전 쿼리 패턴을 감지하여 DB 조회 파라미터 반환.""" + msg = message.lower().strip() + params: dict = {} + + # 수역 감지 + for keyword, zone in _ZONE_MAP.items(): + if keyword.lower() in msg: + params['zone'] = zone + break + + # 활동 감지 + for keyword, activity in _ACTIVITY_MAP.items(): + if keyword in msg: + params['activity'] = activity + break + + # 위험도 감지 + for keyword, level in _RISK_MAP.items(): + if keyword in msg: + if level: + params['risk_level'] = level + else: + params['risk_levels'] = ['CRITICAL', 'HIGH'] + break + + # 다크베셀 감지 + if any(k in msg for k in _DARK_KEYWORDS): + params['is_dark'] = True + + # 환적 감지 + if any(k in msg for k in _TRANSSHIP_KEYWORDS): + params['is_transship'] = True + + # 스푸핑 감지 + if any(k in msg for k in _SPOOF_KEYWORDS): + params['spoofing'] = True + + return params if params else None + + +def execute_prequery(params: dict) -> str: + """사전 쿼리 패턴에 해당하는 DB 조회를 실행하여 결과를 텍스트로 반환.""" + try: + from db import kcgdb + + conditions = ["analyzed_at > NOW() - INTERVAL '1 hour'"] + bind_params: list = [] + + if 'zone' in params: + conditions.append('zone = %s') + bind_params.append(params['zone']) + + if 'activity' in params: + conditions.append('activity_state = %s') + bind_params.append(params['activity']) + + if 'risk_level' in params: + conditions.append('risk_level = %s') + bind_params.append(params['risk_level']) + elif 'risk_levels' in params: + placeholders = ','.join(['%s'] * len(params['risk_levels'])) + conditions.append(f'risk_level IN ({placeholders})') + bind_params.extend(params['risk_levels']) + + if params.get('is_dark'): + conditions.append('is_dark = TRUE') + + if params.get('is_transship'): + conditions.append('is_transship_suspect = TRUE') + + if params.get('spoofing'): + conditions.append('spoofing_score > 0.5') + + where = ' AND '.join(conditions) + + query = f""" + SELECT v.mmsi, v.risk_score, v.risk_level, v.zone, v.activity_state, + v.vessel_type, v.is_dark, v.gap_duration_min, v.spoofing_score, + v.cluster_id, v.cluster_size, v.dist_to_baseline_nm, + v.is_transship_suspect, v.transship_pair_mmsi, + fv.permit_no, fv.name_cn, fv.gear_code + FROM {VESSEL_ANALYSIS_RESULTS} v + LEFT JOIN {FLEET_VESSELS} fv ON v.mmsi = fv.mmsi + WHERE {where} + ORDER BY v.risk_score DESC + LIMIT 30 + """ + + with kcgdb.get_conn() as conn: + with conn.cursor() as cur: + cur.execute(query, bind_params) + rows = cur.fetchall() + + if not rows: + return '\n## 조회 결과\n해당 조건에 맞는 선박이 없습니다.\n' + + # 결과를 간략 테이블로 구성 (토큰 절약) + lines = [f'\n## 조회 결과 ({len(rows)}척)'] + lines.append('| MMSI | 점수 | 수역 | 활동 | 허가 | 다크 |') + lines.append('|---|---|---|---|---|---|') + + for row in rows[:15]: # 최대 15척 + mmsi, risk_score, risk_level, zone, activity, vtype, is_dark, gap, spoof, \ + cid, csize, dist_nm, is_trans, trans_pair, permit, name_cn, gear = row + permit_str = 'Y' if permit else 'N' + dark_str = 'Y' if is_dark else '-' + lines.append(f'| {mmsi} | {risk_score} | {zone} | {activity} | {permit_str} | {dark_str} |') + + return '\n'.join(lines) + except Exception as e: + logger.error('prequery execution failed: %s', e) + return f'\n(DB 조회 실패: {e})\n' + + +# ── LLM Tool Calling 응답 파싱 + 실행 ── + +_TOOL_CALL_PATTERN = re.compile( + r'\{"tool"\s*:\s*"(\w+)"\s*,\s*"params"\s*:\s*(\{[^}]+\})\}', +) + + +def parse_tool_calls(llm_response: str) -> list[dict]: + """LLM 응답에서 tool call JSON을 추출.""" + calls = [] + for match in _TOOL_CALL_PATTERN.finditer(llm_response): + try: + tool_name = match.group(1) + params = json.loads(match.group(2)) + calls.append({'tool': tool_name, 'params': params}) + except json.JSONDecodeError: + continue + return calls[:2] # 최대 2개 + + +def execute_tool_call(call: dict) -> str: + """단일 tool call 실행.""" + tool = call.get('tool', '') + params = call.get('params', {}) + + if tool == 'query_vessels': + return execute_prequery(params) + + if tool == 'query_vessel_detail': + mmsi = params.get('mmsi', '') + if mmsi: + from chat.context_builder import _build_vessel_detail + return _build_vessel_detail(mmsi) + return '(MMSI 미지정)' + + if tool == 'query_fleet_group': + return _query_fleet_group(params) + + if tool == 'query_vessel_history': + return _query_vessel_history(params) + + if tool == 'query_vessel_static': + return _query_vessel_static(params) + + if tool == 'get_knowledge': + return _get_knowledge(params) + + if tool == 'query_gear_correlation': + return _query_gear_correlation(params) + + return f'(알 수 없는 도구: {tool})' + + +def _get_knowledge(params: dict) -> str: + """도메인 지식 섹션 조회.""" + key = params.get('key', '') + if not key: + return '(key 미지정. 사용 가능: maritime_zones, fishing_agreement, algorithm_guide, response_guide, db_schema)' + from chat.domain_knowledge import get_knowledge_section + return get_knowledge_section(key) + + +def _query_fleet_group(params: dict) -> str: + """선단/어구 그룹 조회.""" + try: + from db import kcgdb + + conditions = [f"snapshot_time = (SELECT MAX(snapshot_time) FROM {GROUP_POLYGON_SNAPSHOTS})"] + bind_params: list = [] + + if 'group_type' in params: + conditions.append('group_type = %s') + bind_params.append(params['group_type']) + if 'zone_id' in params: + conditions.append('zone_id = %s') + bind_params.append(params['zone_id']) + + where = ' AND '.join(conditions) + query = f""" + SELECT group_type, group_key, group_label, member_count, zone_name, members + FROM {GROUP_POLYGON_SNAPSHOTS} + WHERE {where} + ORDER BY member_count DESC + LIMIT 20 + """ + + with kcgdb.get_conn() as conn: + with conn.cursor() as cur: + cur.execute(query, bind_params) + rows = cur.fetchall() + + if not rows: + return '\n(해당 조건의 그룹 없음)\n' + + lines = [f'\n## 그룹 조회 결과 ({len(rows)}건)'] + lines.append('| 유형 | 키 | 라벨 | 선박수 | 수역 |') + lines.append('|---|---|---|---|---|') + for row in rows: + gtype, gkey, glabel, mcount, zname, members = row + lines.append(f'| {gtype} | {gkey} | {glabel or "-"} | {mcount} | {zname or "-"} |') + + return '\n'.join(lines) + except Exception as e: + logger.error('fleet group query failed: %s', e) + return f'\n(그룹 조회 실패: {e})\n' + + +def _query_vessel_history(params: dict) -> str: + """snpdb에서 선박 항적 이력 조회 (daily 집계).""" + try: + from db import snpdb + + mmsi = params.get('mmsi', '') + days = min(params.get('days', 7), 30) # 최대 30일 + + if not mmsi: + return '(MMSI 미지정)' + + query = """ + SELECT time_bucket, distance_nm, avg_speed, max_speed, point_count, + start_position, end_position + FROM signal.t_vessel_tracks_daily + WHERE mmsi = %s AND time_bucket >= CURRENT_DATE - %s + ORDER BY time_bucket DESC + """ + + with snpdb.get_conn() as conn: + with conn.cursor() as cur: + cur.execute(query, (mmsi, days)) + rows = cur.fetchall() + + if not rows: + return f'\n(MMSI {mmsi}: 최근 {days}일 항적 데이터 없음)\n' + + lines = [f'\n## 항적 이력: {mmsi} (최근 {days}일)'] + lines.append('| 날짜 | 이동거리(NM) | 평균속도 | 최대속도 | AIS포인트 |') + lines.append('|---|---|---|---|---|') + for row in rows: + dt, dist, avg_spd, max_spd, pts, start_pos, end_pos = row + lines.append( + f"| {dt} | {float(dist or 0):.1f} | {float(avg_spd or 0):.1f}kt " + f"| {float(max_spd or 0):.1f}kt | {pts or 0} |" + ) + + return '\n'.join(lines) + except Exception as e: + logger.error('vessel history query failed: %s', e) + return f'\n(항적 이력 조회 실패: {e})\n' + + +def _query_vessel_static(params: dict) -> str: + """snpdb에서 선박 정적정보 + 변경 이력 조회.""" + try: + from db import snpdb + + mmsi = params.get('mmsi', '') + limit = min(params.get('limit', 10), 24) + + if not mmsi: + return '(MMSI 미지정)' + + query = """ + SELECT time_bucket, name, vessel_type, length, width, draught, + destination, status, callsign, imo + FROM signal.t_vessel_static + WHERE mmsi = %s + ORDER BY time_bucket DESC + LIMIT %s + """ + + with snpdb.get_conn() as conn: + with conn.cursor() as cur: + cur.execute(query, (mmsi, limit)) + rows = cur.fetchall() + + if not rows: + return f'\n(MMSI {mmsi}: 정적정보 없음)\n' + + # 최신 정보 + latest = rows[0] + lines = [f'\n## 선박 정적정보: {mmsi}'] + lines.append(f'- 선명: {latest[1] or "N/A"}') + lines.append(f'- 선종: {latest[2] or "N/A"}') + lines.append(f'- 제원: L={latest[3] or 0}m × W={latest[4] or 0}m, 흘수={latest[5] or 0}m') + lines.append(f'- 목적지: {latest[6] or "N/A"}') + lines.append(f'- 상태: {latest[7] or "N/A"}') + lines.append(f'- 호출부호: {latest[8] or "N/A"}, IMO: {latest[9] or "N/A"}') + + # 변경 이력 감지 + changes = [] + for i in range(len(rows) - 1): + curr, prev = rows[i], rows[i + 1] + diffs = [] + if curr[1] != prev[1]: + diffs.append(f'선명: {prev[1]}→{curr[1]}') + if curr[6] != prev[6]: + diffs.append(f'목적지: {prev[6]}→{curr[6]}') + if curr[7] != prev[7]: + diffs.append(f'상태: {prev[7]}→{curr[7]}') + if diffs: + changes.append(f'- {curr[0].strftime("%m/%d %H:%M")}: {", ".join(diffs)}') + + if changes: + lines.append(f'\n### 변경 이력 (최근 {len(changes)}건)') + lines.extend(changes[:10]) + + return '\n'.join(lines) + except Exception as e: + logger.error('vessel static query failed: %s', e) + return f'\n(정적정보 조회 실패: {e})\n' + + +def _query_gear_correlation(params: dict) -> str: + """어구 그룹의 연관 선박/어구 조회.""" + from db import kcgdb + + group_key = params.get('group_key', '') + limit = int(params.get('limit', 10)) + + with kcgdb.get_conn() as conn: + cur = conn.cursor() + try: + cur.execute( + 'SELECT target_name, target_mmsi, target_type, current_score, ' + 'streak_count, observation_count, proximity_ratio, visit_score, ' + 'heading_coherence, freeze_state ' + f'FROM {GEAR_CORRELATION_SCORES} s ' + f'JOIN {CORRELATION_PARAM_MODELS} m ON s.model_id = m.id ' + 'WHERE s.group_key = %s AND m.is_default = TRUE AND s.current_score >= 0.3 ' + 'ORDER BY s.current_score DESC LIMIT %s', + (group_key, limit), + ) + rows = cur.fetchall() + except Exception: + return f'어구 그룹 "{group_key}"에 대한 연관성 데이터가 없습니다 (테이블 미생성).' + finally: + cur.close() + + if not rows: + return f'어구 그룹 "{group_key}"에 대한 연관성 데이터가 없습니다.' + + lines = [f'## {group_key} 연관 분석 (상위 {len(rows)}개, default 모델)'] + for r in rows: + name, mmsi, ttype, score, streak, obs, prox, visit, heading, state = r + pct = score * 100 + disp_name = name or mmsi + detail_parts = [] + if prox is not None: + detail_parts.append(f'근접 {prox*100:.0f}%') + if visit is not None: + detail_parts.append(f'방문 {visit*100:.0f}%') + if heading is not None: + detail_parts.append(f'COG동조 {heading*100:.0f}%') + detail = ', '.join(detail_parts) if detail_parts else '' + + lines.append( + f'- **{disp_name}** ({mmsi}, {ttype}): ' + f'일치율 {pct:.1f}% (연속 {streak}회, 관측 {obs}회) ' + f'[{detail}] 상태: {state}' + ) + return '\n'.join(lines) diff --git a/prediction/config.py b/prediction/config.py new file mode 100644 index 0000000..9c3498b --- /dev/null +++ b/prediction/config.py @@ -0,0 +1,66 @@ +import re +from typing import Optional + +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + # snpdb (궤적 데이터 소스) + SNPDB_HOST: str = '211.208.115.83' + SNPDB_PORT: int = 5432 + SNPDB_NAME: str = 'snpdb' + SNPDB_USER: str = 'snp' + SNPDB_PASSWORD: str = 'snp#8932' + + # kcgdb (분석 결과 저장 — kcgaidb 통합 DB) + KCGDB_HOST: str = '211.208.115.83' + KCGDB_PORT: int = 5432 + KCGDB_NAME: str = 'kcgaidb' + KCGDB_SCHEMA: str = 'kcg' + KCGDB_USER: str = 'kcg-app' + KCGDB_PASSWORD: str = 'Kcg2026ai' + + # 스케줄러 + SCHEDULER_INTERVAL_MIN: int = 5 + + # 인메모리 캐시 + CACHE_WINDOW_HOURS: int = 24 + INITIAL_LOAD_HOURS: int = 24 + STATIC_INFO_REFRESH_MIN: int = 60 + PERMIT_REFRESH_MIN: int = 30 + SNPDB_SAFE_DELAY_MIN: int = 12 + SNPDB_BACKFILL_BUCKETS: int = 3 + + # 파이프라인 + TRAJECTORY_HOURS: int = 6 + MMSI_PREFIX: str = '412' + MIN_TRAJ_POINTS: int = 100 + + # Ollama (LLM) + OLLAMA_BASE_URL: str = 'http://localhost:11434' + OLLAMA_MODEL: str = 'qwen3:14b' # CPU-only: 14b 권장, GPU 있으면 32b + OLLAMA_TIMEOUT_SEC: int = 300 + + # Redis + REDIS_HOST: str = 'localhost' + REDIS_PORT: int = 6379 + REDIS_PASSWORD: str = '' + + # 로깅 + LOG_LEVEL: str = 'INFO' + + model_config = {'env_file': '.env', 'env_file_encoding': 'utf-8', 'extra': 'ignore'} + + +settings = Settings() + +_SQL_IDENTIFIER = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$') + + +def qualified_table(table_name: str, schema: Optional[str] = None) -> str: + resolved_schema = schema or settings.KCGDB_SCHEMA + if not _SQL_IDENTIFIER.fullmatch(resolved_schema): + raise ValueError(f'Invalid schema name: {resolved_schema!r}') + if not _SQL_IDENTIFIER.fullmatch(table_name): + raise ValueError(f'Invalid table name: {table_name!r}') + return f'{resolved_schema}.{table_name}' diff --git a/prediction/data/korea_baseline.json b/prediction/data/korea_baseline.json new file mode 100644 index 0000000..9b20cd7 --- /dev/null +++ b/prediction/data/korea_baseline.json @@ -0,0 +1 @@ +{"points": [{"lat": 37.0, "lon": 124.0}, {"lat": 35.0, "lon": 129.0}]} \ No newline at end of file diff --git a/prediction/data/zones/특정어업수역Ⅰ.json b/prediction/data/zones/특정어업수역Ⅰ.json new file mode 100644 index 0000000..f0454ef --- /dev/null +++ b/prediction/data/zones/특정어업수역Ⅰ.json @@ -0,0 +1 @@ +{"type": "FeatureCollection", "name": "\ud2b9\uc815\uc5b4\uc5c5\uc218\uc5ed1", "crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}}, "features": [{"type": "Feature", "properties": {"fid": 0, "GML_ID": null, "OBJECTID": null, "ZONE_NM": null, "MNCT_NO": null, "MNCT_SCALE": null, "MNCT_NM": null, "RELREGLTN": null, "RELGOAG": null, "REVIYR": null, "ZONE_DESC": null, "PHOTO1_PAT": null, "ID": -2147483647, "CATE_CD": null, "ADR_CD": null, "ADR_KNM": null, "ORIGIN": null, "ORIYR": null, "ORIORG": null, "NAME": "\ud2b9\uc815\uc5b4\uc5c5\uc218\uc5ed\u2160", "WARD_NM": null, "WARD_ID": null, "GISID": null, "FID_2": null, "NAME_2": null, "FID_3": null, "NAME_3": null, "GID": null, "NAME_4": null, "FID_4": null, "NAME_5": null, "FID_5": null, "NAME_6": null}, "geometry": {"type": "MultiPolygon", "coordinates": [[[[14612352.95900835, 4323569.555957972], [14550748.752774281, 4260105.381317261], [14544627.066163512, 4252568.169285575], [14439940.71936106, 4252568.1692174645], [14440259.902536998, 4254382.900417306], [14440565.249736432, 4256577.976660408], [14441200.37191117, 4258322.323996074], [14442128.627396706, 4261947.246114864], [14442446.188484557, 4263842.912458916], [14443081.310658677, 4265407.837348879], [14443838.571713375, 4268086.787104008], [14444461.480000762, 4270299.674084436], [14445414.16326165, 4272528.068283305], [14446488.98540456, 4275811.262361098], [14447111.893690424, 4279125.58051958], [14447441.668665636, 4283375.409280503], [14447441.668666717, 4285908.011073243], [14447747.015866045, 4287008.670616378], [14449298.17963799, 4289692.937620907], [14451325.68504222, 4294897.478106175], [14452583.715503562, 4299470.4800484385], [14452583.715504179, 4299666.724555172], [14452864.634927392, 4301297.200756734], [14452803.565487338, 4303864.187591692], [14452864.634926995, 4306733.892102277], [14452681.426607274, 4309982.105854211], [14452229.512752429, 4313034.803597244], [14451289.043378348, 4315906.938650241], [14450165.365684237, 4319883.836509038], [14448650.843575954, 4323816.808519151], [14447172.963130116, 4326268.076469068], [14445646.22713406, 4328477.720500556], [14443166.807874365, 4331384.242207033], [14440455.324744267, 4333928.090897115], [14438366.74990012, 4335578.885027033], [14435545.341778146, 4337381.416707463], [14435212.858055448, 4337568.367409996], [14433713.258582642, 4338411.570116835], [14431881.17538587, 4339153.947573591], [14430305.583837628, 4339729.704068462], [14430281.156061549, 4340669.162281877], [14432430.800344449, 4344124.648762426], [14433664.40302976, 4347050.554454509], [14434299.525204673, 4348582.044935347], [14435398.775122227, 4352055.241843149], [14436168.250064695, 4355377.820309184], [14436473.59726421, 4359156.794673912], [14436632.377808044, 4361358.014828757], [14437096.505551115, 4363255.98064219], [14438036.97492467, 4367341.541713113], [14438354.536012871, 4371595.823810485], [14438183.541581359, 4375213.2915699165], [14437218.644430902, 4379455.492532148], [14436754.516687542, 4381676.095802414], [14437218.644430652, 4383410.311934654], [14438940.802635401, 4387670.983955953], [14440333.18586435, 4392085.61379955], [14440821.741384165, 4395862.331199512], [14440968.308038985, 4399000.459396788], [14441114.874694768, 4403419.77764905], [14441273.655239271, 4409426.891672738], [14440772.885832038, 4413972.594613021], [14439991.197000964, 4416642.956092686], [14438891.94708353, 4419329.253028348], [14437621.702733777, 4422642.269553811], [14436046.111184767, 4426582.592213162], [14435117.855699727, 4428767.199310407], [14434946.861267168, 4430203.479647634], [14434946.86126783, 4432709.795922216], [14434470.519636003, 4435537.753788483], [14434617.086292291, 4437433.672085769], [14434617.08629216, 4439314.635756483], [14434922.433492135, 4440431.137925814], [14435545.341778962, 4443276.456208943], [14435862.902865293, 4448617.316293129], [14435374.347346198, 4453195.1551512005], [14434433.877972556, 4456978.343560426], [14433493.582733309, 4459337.341378763], [14433493.408598367, 4459337.778245751], [14432540.725337084, 4461222.640432275], [14430134.5894049, 4464840.066412149], [14429071.98115172, 4466066.593934474], [14429377.32835093, 4468244.031514878], [14429487.253342932, 4471664.436166196], [14429487.253342314, 4474871.114707357], [14428962.056159778, 4478201.569995016], [14428339.14787256, 4480581.111676515], [14427423.10627465, 4482961.1914115725], [14426482.636900885, 4485234.2862444], [14424332.992617503, 4488229.992534473], [14422684.117740594, 4490519.594851016], [14421218.451183844, 4492071.890874874], [14420192.484594172, 4493424.573415368], [14418641.32082132, 4495484.674666911], [14415856.554362778, 4498806.286043997], [14415123.721083565, 4501082.812019949], [14413291.637888283, 4505437.2749514375], [14411545.051908031, 4508454.147134834], [14409053.418760045, 4511333.299393252], [14407502.25498812, 4512996.485903551], [14405743.455119297, 4516308.245790257], [14404192.291346865, 4519020.04815391], [14402445.705366325, 4520776.931720719], [14401199.88879329, 4522117.909890488], [14401163.247128874, 4522441.620044785], [14400674.691610316, 4526804.977928812], [14399135.74172542, 4530630.196696397], [14397889.92515128, 4533530.893432845], [14396448.686370509, 4536216.299453886], [14394482.250406215, 4538609.053820163], [14393969.267111044, 4540878.818581772], [14393248.647720557, 4543272.6434066], [14391697.483947853, 4546578.5701886], [14389950.897967545, 4549576.439029636], [14387996.67589144, 4552080.475715166], [14386433.298231108, 4553935.706287525], [14384686.712251175, 4555296.4190314105], [14384063.803963741, 4555698.481870257], [14382817.987391062, 4558497.888716806], [14382023.990793852, 4559433.839678707], [14379825.584836785, 4562025.286767265], [14377993.501640612, 4563882.293207642], [14375709.504589545, 4565538.404034689], [14373230.085329972, 4569687.579878523], [14370848.377174843, 4572490.760700769], [14369101.79119466, 4574132.75433843], [14367978.113501683, 4575372.167021386], [14367025.430239363, 4578161.387517195], [14365584.191457871, 4580563.818041922], [14364155.16656508, 4582966.805917671], [14362591.788904771, 4585230.77563233], [14360136.797420906, 4586983.366182029], [14359501.67524686, 4589031.016082314], [14357755.089266032, 4591730.810433944], [14356924.544884088, 4593049.931812809], [14355397.808887746, 4596589.112554951], [14353321.447931753, 4599694.655522917], [14351355.011967713, 4602086.560165967], [14350548.895361273, 4602909.876326975], [14349058.80102805, 4604059.521970236], [14348362.609413402, 4605286.987737952], [14347006.867848393, 4607571.395078686], [14345260.281868543, 4610058.400036733], [14344344.240270587, 4614925.339107906], [14344178.729650684, 4615426.598601238], [14406264.563155375, 4615426.598601238], [14471145.302268442, 4615426.598601238], [14489820.50078106, 4579817.049246806], [14657058.866457367, 4579819.039140932], [14657058.866471501, 4498513.035634587], [14653280.330118885, 4484660.955595197], [14653257.89496764, 4484604.528273547], [14653111.328311926, 4484251.265963233], [14652952.547767457, 4483867.298646529], [14652805.981111629, 4483498.703204842], [14652793.767223712, 4483437.271887497], [14652732.697783664, 4483283.695161308], [14652573.91724023, 4482899.763155043], [14652463.992248593, 4482531.201607391], [14652317.425592588, 4482147.2970551355], [14652183.072824666, 4481778.761862163], [14652085.361720739, 4481394.884757528], [14651963.222840805, 4481011.021654676], [14651853.297849245, 4480627.172552061], [14651731.158969458, 4480243.337446124], [14651645.661754325, 4479828.811251124], [14651560.164537868, 4479460.357225606], [14651450.239546517, 4479061.213248133], [14651376.956218421, 4478677.435232205], [14651303.672890497, 4478278.320934065], [14651230.38956299, 4477894.571451181], [14651181.534010744, 4477495.486823346], [14651120.464571165, 4477096.417317753], [14651059.395131014, 4476712.710899764], [14650998.325691152, 4476313.671051835], [14650996.99554685, 4476290.271738564], [14645926.917379338, 4457703.411105691], [14630424.731020536, 4444761.216899179], [14601399.121065676, 4420528.823331998], [14513278.61218791, 4420528.823356817], [14513278.612126667, 4323569.5559741575], [14612352.95900835, 4323569.555957972]], [[14531705.281810218, 4513797.373626424], [14531693.067921922, 4513597.145949486], [14531680.854034334, 4513381.520424651], [14531680.854033662, 4513196.702081734], [14531656.42625728, 4512981.084804853], [14531631.99848197, 4512596.064997208], [14531631.998481335, 4512395.860290817], [14531631.998482231, 4512180.259503853], [14531619.784594133, 4512010.861999429], [14531619.784593917, 4511795.269138], [14531619.784594417, 4511579.680716071], [14531619.784594513, 4511394.89417161], [14531619.784594564, 4511194.712427556], [14531631.998481907, 4510979.136366602], [14531631.99848219, 4510794.36041791], [14531631.998481516, 4510578.792596463], [14531656.426257836, 4510193.861093197], [14531680.854033632, 4509993.7023007], [14531680.854033832, 4509793.547333112], [14531693.067922262, 4509593.396189629], [14531705.28180977, 4509393.248870632], [14531741.923473405, 4509193.105373775], [14531754.137361716, 4508992.965701181], [14531778.565137729, 4508608.0924608875], [14531827.420689235, 4508207.83928627], [14531888.490129804, 4507807.60139917], [14531937.345681723, 4507407.378797934], [14531986.201233227, 4507022.563787677], [14532071.698448502, 4506622.371163723], [14532144.981777469, 4506222.193819813], [14532218.26510484, 4505822.031752334], [14532291.54843238, 4505437.274939068], [14532389.259536454, 4505052.532246742], [14532486.970639894, 4504667.803672876], [14532584.681744624, 4504283.089215654], [14532682.3928476, 4503898.388874675], [14532804.5317276, 4503498.31548994], [14532816.74561534, 4503467.54124596], [14532841.173391888, 4503236.737298468], [14532890.028944094, 4502836.689154722], [14532951.098383617, 4502436.656270566], [14533036.595599566, 4502036.638641951], [14533085.451150997, 4501652.020693331], [14533170.94836721, 4501252.032985861], [14533256.445583222, 4500882.827099174], [14533329.728910444, 4500467.486007207], [14533427.440014655, 4500082.925582424], [14533512.937230268, 4499698.379251896], [14533537.365005817, 4499636.85314637], [14533635.07610988, 4499313.847011714], [14533720.573326208, 4498929.328863146], [14533842.71220557, 4498544.824803407], [14533964.851085538, 4498160.334828932], [14534086.989965722, 4497775.8589393], [14534221.342733555, 4497406.77533439], [14534343.481613029, 4497022.3270443855], [14534490.048267843, 4496653.269931789], [14534636.614923632, 4496284.225792352], [14534795.395467635, 4495899.818607236], [14534941.962123044, 4495530.800950158], [14535088.528779598, 4495161.796260659], [14535271.737098787, 4494808.178934617], [14535442.73153039, 4494454.573515013], [14535625.939850742, 4494085.606640488], [14535821.362058103, 4493732.025548209], [14535992.356489455, 4493363.084053765], [14536187.77869705, 4493024.899068225], [14536370.987016352, 4492671.353679356], [14536578.623112632, 4492333.190962355], [14536798.473096136, 4491979.668849295], [14537006.10919119, 4491656.8981793765], [14537225.959175108, 4491303.398821744], [14537421.381382758, 4490980.648924496], [14537653.44525381, 4490642.540617242], [14537909.936901638, 4490319.811016954], [14538129.786885347, 4489997.091326332], [14538374.0646448, 4489659.014660098], [14538618.342404164, 4489351.681672938], [14538850.406276468, 4489044.357672096], [14539131.325698882, 4488721.677141822], [14539363.389571583, 4488429.736623869], [14539644.308994643, 4488122.43957564], [14539900.800642235, 4487815.151508973], [14539986.297858382, 4487722.966840681], [14540010.725633759, 4487692.23879659], [14540267.217281476, 4487400.32686548], [14540511.495040976, 4487077.696790041], [14540780.200575706, 4486770.43925529], [14541073.333887419, 4486493.915150635], [14541329.825535271, 4486186.674672224], [14541598.531071173, 4485910.165917298], [14541867.23660634, 4485618.303450893], [14542148.156029876, 4485326.449084601], [14542453.503228514, 4485049.962943723], [14542734.422652928, 4484773.484071729], [14543039.76985148, 4484512.371808684], [14543332.903163565, 4484251.266027703], [14543638.250362527, 4483974.808145135], [14543943.59756212, 4483713.715705612], [14544248.944761822, 4483467.987564921], [14544566.505848715, 4483222.26516165], [14544871.85304831, 4482976.548497614], [14545213.841912381, 4482746.194336815], [14545519.189110842, 4482515.845218762], [14545836.750198007, 4482254.788980392], [14546190.952949973, 4482024.450618634], [14546508.514037313, 4481824.828116437], [14546838.28901309, 4481609.854272898], [14547180.27787654, 4481394.88481907], [14547510.052850928, 4481179.919756588], [14547876.469490254, 4480995.667482875], [14548218.458354343, 4480796.06449436], [14548560.447216801, 4480596.465289407], [14548926.863856547, 4480412.223228773], [14549293.280496065, 4480212.631302457], [14549647.483247736, 4480043.748782027], [14549989.472111017, 4479874.868972281], [14550355.88875037, 4479721.344221679], [14550624.594284926, 4479598.526033944], [14550856.658157144, 4479491.061295501], [14551198.647020763, 4479291.48683264], [14551565.063659478, 4479107.26761045], [14551919.266411318, 4478923.051610395], [14552285.683050882, 4478769.540739722], [14552627.671913499, 4478600.681367721], [14553006.302441431, 4478431.8247021455], [14553372.719079891, 4478278.320992562], [14553739.135719031, 4478124.8195214365], [14554117.766246844, 4477986.670110905], [14554496.39677429, 4477833.172889764], [14554875.02730134, 4477710.376725644], [14555253.657828972, 4477572.232751319], [14555620.074468583, 4477464.788689573], [14555998.704995206, 4477326.647937234], [14556377.335522415, 4477234.555108875], [14556511.688290423, 4477157.811699739], [14556878.10492996, 4476958.281453992], [14557220.093792727, 4476804.799222015], [14557598.724319693, 4476651.319227265], [14557952.92707147, 4476482.493814672], [14558331.557598298, 4476329.018516568], [14558697.974237733, 4476175.54545325], [14559076.604765655, 4476022.0746261515], [14559443.02140455, 4475883.952793535], [14559821.65193224, 4475761.179352402], [14560200.282459686, 4475638.407340859], [14560578.912986, 4475500.290538945], [14560957.54351379, 4475377.521568426], [14561348.38792862, 4475270.099892532], [14561727.01845645, 4475147.333604416], [14562117.862871993, 4475055.259828022], [14562484.27951158, 4474963.186855402], [14562899.551702326, 4474871.1146875555], [14563290.396117546, 4474809.733688274], [14563669.026644476, 4474733.007943433], [14564059.871060286, 4474656.282757424], [14564450.7154752, 4474579.558129849], [14564853.773778029, 4474518.178830546], [14565244.618193747, 4474456.79988746], [14565635.462609466, 4474395.4213030925], [14565843.09870468, 4474380.076712145], [14566026.307024052, 4474364.732145027], [14566429.365327647, 4474303.354096425], [14566624.787535438, 4474303.354096845], [14566820.209743189, 4474288.009639743], [14567040.059725929, 4474272.665205484], [14567235.481933901, 4474241.97640493], [14567430.904141523, 4474241.9764056895], [14567638.540236901, 4474226.632038639], [14567821.748556953, 4474226.632038577], [14568029.384652914, 4474211.287694249], [14568212.592971867, 4474211.287693342], [14568420.229068192, 4474195.943371697], [14568627.865163937, 4474195.943371153], [14568811.073482776, 4474195.943371623], [14569018.709579367, 4474195.94337142], [14569214.131786728, 4474195.943370581], [14569238.559562922, 4474195.943371392], [14569409.553994717, 4474195.943371547], [14569629.403978188, 4474195.943370894], [14569824.826185605, 4474211.287692922], [14570020.248392954, 4474211.287693488], [14570215.670600649, 4474211.287693272], [14570423.306696696, 4474226.632037414], [14570618.728903888, 4474226.632037795], [14570814.151111197, 4474241.97640376], [14571021.787206706, 4474272.665204512], [14571204.995527001, 4474288.009638165], [14571412.631621836, 4474288.009637828], [14571620.267718533, 4474303.354094652], [14572023.326021364, 4474364.732141971], [14572194.32045334, 4474380.076710127], [14572414.170436617, 4474395.421300662], [14572805.01485221, 4474456.799885332], [14573208.073155506, 4474518.178827347], [14573598.917569762, 4474579.558126053], [14573989.761985833, 4474656.282754001], [14574368.392513085, 4474733.007939714], [14574759.236928629, 4474809.733685015], [14575162.295230972, 4474871.114683236], [14575553.139646066, 4474963.186850651], [14575931.770173518, 4475055.259823188], [14576322.614588926, 4475147.3335995795], [14576701.245116178, 4475270.0998873925], [14577079.875643862, 4475377.521562786], [14577470.720059728, 4475500.290533416], [14577837.13669783, 4475638.407335261], [14578215.767225962, 4475761.179345143], [14578594.39775321, 4475883.95278683], [14578973.028280452, 4476022.074619254], [14579327.231032163, 4476175.54544557], [14579705.861558419, 4476329.018508264], [14580084.492086556, 4476482.493807283], [14580450.908725094, 4476651.3192181215], [14580805.111476777, 4476804.799213971], [14581183.742004093, 4476958.281445817], [14581525.730867168, 4477157.811690843], [14581879.933618782, 4477326.647927245], [14582234.136370221, 4477526.185151994], [14582588.3391218, 4477710.376715044], [14582930.327984763, 4477909.9212099165], [14583296.744624889, 4478094.119485319], [14583626.51959986, 4478309.021544471], [14583968.508463632, 4478539.278618473], [14584298.28343856, 4478754.189764326], [14584615.844526524, 4478969.105295983], [14584957.833389819, 4479199.376805741], [14585275.39447627, 4479429.653352519], [14585592.955563627, 4479659.934935787], [14585922.730539948, 4479890.221556971], [14586240.291627208, 4480135.866171305], [14586557.8527144, 4480396.869855059], [14586863.199913831, 4480642.526293688], [14587156.33322539, 4480888.188466244], [14587473.894312968, 4481164.565263089], [14587767.027623786, 4481425.59445718], [14588060.160935674, 4481701.985366741], [14588353.294247115, 4481978.383535463], [14588646.427559184, 4482254.788964908], [14588939.56086965, 4482546.558126468], [14589208.266406132, 4482822.978480075], [14589476.971941242, 4483114.763399049], [14589733.46358897, 4483421.914160338], [14590014.383012347, 4483698.35751204], [14590185.377443707, 4483898.0155623555], [14590331.944099901, 4484020.883937888], [14590661.719074445, 4484251.266009965], [14590979.280162634, 4484497.0124473], [14591296.841249231, 4484727.404948229], [14591602.18844864, 4484973.162509527], [14591919.749536166, 4485234.28621192], [14592212.882847624, 4485495.41639687], [14592518.23004711, 4485756.553064018], [14592823.577246739, 4486017.696217524], [14593116.710558899, 4486294.207799355], [14593409.843869546, 4486570.726653404], [14593690.763293965, 4486847.252777424], [14593983.89660544, 4487139.149354413], [14594252.602139814, 4487431.054034741], [14594533.52156419, 4487692.238776791], [14594802.227099039, 4487999.523250918], [14595058.718747094, 4488306.8167062495], [14595339.638170302, 4488598.753809731], [14595583.915929569, 4488906.064781649], [14595828.19368928, 4489213.384739871], [14596096.899225544, 4489536.080365369], [14596341.176984914, 4489858.785899267], [14596573.240855644, 4490181.501342655], [14596805.304727584, 4490488.858596873], [14597037.368599355, 4490826.961959009], [14597257.21858321, 4491149.707135566], [14597489.282454617, 4491472.462225323], [14597696.918549843, 4491825.967271368], [14597928.98242226, 4492148.743134879], [14598136.618516896, 4492502.270936908], [14598319.826836484, 4492840.439096991], [14598527.462932773, 4493193.990176503], [14598722.885140764, 4493547.553156926], [14598906.093460135, 4493901.128039849], [14599101.515668057, 4494254.714825081], [14599260.296211885, 4494623.687640952], [14599431.290642768, 4494977.2987543205], [14599590.07118727, 4495330.921775762], [14599773.279505912, 4495715.308132935], [14599919.846161587, 4496068.956009824], [14600078.626705699, 4496453.369387598], [14600225.193361096, 4496822.419472711], [14600347.33224078, 4497206.860440532], [14600481.685009632, 4497575.937016725], [14600616.037777228, 4497960.4055835055], [14600750.39054488, 4498344.888232693], [14600860.31553649, 4498714.004829062], [14600982.454416526, 4499098.51508797], [14601080.165519364, 4499483.0394360265], [14601202.304399468, 4499867.577874515], [14601300.01550408, 4500267.512801375], [14601385.512719708, 4500652.079992499], [14601458.796047695, 4501052.044824842], [14601471.009935707, 4501175.113995761], [14601654.218255237, 4501498.177436593], [14601800.784910476, 4501867.404980572], [14601971.779342758, 4502236.64552286], [14602118.34599798, 4502590.513240334], [14602277.126541303, 4502959.779238895], [14602423.693198, 4503344.4451490035], [14602558.045964886, 4503698.350247777], [14602680.184845533, 4504083.043249815], [14602814.537612794, 4504467.750366676], [14602936.676492875, 4504852.471599208], [14603058.815372452, 4505237.206950171], [14603180.954252187, 4505606.56617041], [14603266.451468613, 4506022.110849317], [14603388.590348229, 4506391.497726757], [14603474.087563867, 4506791.681535702], [14603547.37089101, 4507161.095535717], [14603645.08199488, 4507561.308730727], [14603718.365323769, 4507946.143524667], [14603791.648650708, 4508330.992452457], [14603864.931978848, 4508746.64517058], [14603926.001419289, 4509131.523501303], [14603987.070858913, 4509531.8119634325], [14604035.92641036, 4509916.719140585], [14604084.781962857, 4510332.43477583], [14604109.20973883, 4510717.37137208], [14604145.85140303, 4510932.942045082], [14604158.065290203, 4511117.720440137], [14604170.279178778, 4511317.900712452], [14604170.279177956, 4511533.483745455], [14604182.493066223, 4511718.272735513], [14604219.134730231, 4511933.864012004], [14604219.134729845, 4512318.859471839], [14604231.348618282, 4512519.062705031], [14604231.348618418, 4512719.269766486], [14604231.348617738, 4512904.079682778], [14604231.348618373, 4513119.695373501], [14604231.348618187, 4513319.913919597], [14604231.348618748, 4513520.136295258], [14604231.348618748, 4513720.362499544], [14604231.348618232, 4513920.592533981], [14604231.348618407, 4514120.826397525], [14604219.134730808, 4514336.467148453], [14604219.134730032, 4514721.550967597], [14604182.493066857, 4514906.396233077], [14604170.27917791, 4515122.053169531], [14604170.279178878, 4515322.310016387], [14604158.065290527, 4515507.165891377], [14604145.851402044, 4515722.835206429], [14604109.209738161, 4515923.103548852], [14604084.781962737, 4516308.245749079], [14604035.926410299, 4516708.808674816], [14603987.070858993, 4517093.9797944045], [14603926.00141845, 4517509.9805284925], [14603864.931978678, 4517895.181143359], [14603791.64865124, 4518295.804826711], [14603718.365322556, 4518681.034375972], [14603645.081995493, 4519097.098223365], [14603547.370891701, 4519466.946648656], [14603474.08756368, 4519867.630533778], [14603388.59034769, 4520237.506207204], [14603266.451468341, 4520638.219615408], [14603180.95425151, 4521008.12254481], [14603058.815372169, 4521408.865484192], [14602936.676491957, 4521778.79567707], [14602814.537612109, 4522164.153545156], [14602680.184844451, 4522534.110462632], [14602558.045964777, 4522919.496172441], [14602423.693197738, 4523289.47982265], [14602277.126541242, 4523674.893382433], [14602118.345997736, 4524044.903771376], [14601971.779341936, 4524414.927259076], [14601800.784909926, 4524784.963848068], [14601654.218254454, 4525155.013540586], [14601471.009935107, 4525525.076335961], [14601300.015503855, 4525864.3120796885], [14601129.021072082, 4526234.399998049], [14600933.598864602, 4526573.658772662], [14600750.390544403, 4526928.350179163], [14600542.754449246, 4527267.63148988], [14600347.332241392, 4527637.769124864], [14600151.910033902, 4527961.6503126025], [14599944.273938052, 4528316.388852858], [14599712.210066758, 4528640.29108562], [14599504.573970841, 4528964.203363799], [14599272.510098685, 4529318.975958536], [14599052.660115397, 4529627.483662931], [14598808.382356219, 4529951.426563321], [14598588.532372601, 4530275.379513526], [14598344.254612468, 4530599.342514882], [14598087.762964793, 4530907.88805286], [14597831.271317031, 4531216.442710067], [14597586.993558556, 4531525.006485532], [14597318.288022641, 4531833.57938278], [14597061.796375385, 4532126.732084111], [14596793.090839129, 4532419.893019322], [14596512.171415407, 4532713.062188578], [14596231.251992663, 4533021.670209015], [14595950.332568703, 4533299.42523091], [14595669.413144821, 4533577.187645228], [14595644.985369092, 4533623.48209964], [14595498.41871387, 4533777.7984313145], [14595376.279834235, 4533916.685081554], [14595119.788186248, 4534225.328697573], [14594838.868762594, 4534518.548590586], [14594582.377115823, 4534811.776722892], [14594289.243803782, 4535089.579398674], [14594008.324380705, 4535367.389470016], [14593727.404956257, 4535660.64146122], [14593446.48553373, 4535923.031808229], [14593153.352221377, 4536200.864075813], [14592848.005022287, 4536478.703743617], [14592567.08559909, 4536741.114669866], [14592249.524511898, 4537003.532198354], [14591944.177311765, 4537250.519432793], [14591614.402336147, 4537497.512515288], [14591309.055137279, 4537729.073843565], [14591003.707938092, 4537991.5162313925], [14590686.146850547, 4538238.5268653], [14590368.585762527, 4538454.665970274], [14590014.383011634, 4538686.248553525], [14589684.608036457, 4538917.836279545], [14589354.833060294, 4539118.549804093], [14589025.058085084, 4539319.267192334], [14588683.069222417, 4539535.42869948], [14588328.866470784, 4539736.154114145], [14587986.877606917, 4539952.324265979], [14587657.102632334, 4540137.616535028], [14587290.685992181, 4540338.353544246], [14586936.483240709, 4540508.210955269], [14586570.066601887, 4540678.071133686], [14586228.077738127, 4540863.376303522], [14585861.661099326, 4541033.242270953], [14585495.244460465, 4541187.668278762], [14585128.827820107, 4541342.096575501], [14584750.197293801, 4541496.527160034], [14584395.994541308, 4541650.960032226], [14584347.138989441, 4541666.403445655], [14584286.06955062, 4541697.29033999], [14583919.652910553, 4541851.726188135], [14583565.450158978, 4542021.60826263], [14583186.819632547, 4542176.048916555], [14582844.83076899, 4542330.491858846], [14582466.20024182, 4542484.937089604], [14582087.569714688, 4542608.49492355], [14581708.939186862, 4542762.94427407], [14581330.308658957, 4542901.950648047], [14580963.892020464, 4543010.067999098], [14580585.261492236, 4543149.07766965], [14580206.630964926, 4543257.1975837825], [14579803.57266225, 4543365.318620336], [14579424.942134961, 4543457.994687216], [14579192.878263632, 4543519.779190642], [14579034.09771989, 4543550.671579929], [14578655.467192937, 4543658.795661787], [14578264.622776985, 4543751.474338827], [14577873.77836218, 4543828.707200758], [14577495.147835061, 4543905.940633789], [14577092.089531014, 4543983.174640503], [14576701.245115522, 4544029.515319117], [14576310.400701316, 4544106.750241843], [14575919.556285223, 4544137.6443707235], [14575699.706302246, 4544183.98573635], [14575504.284093758, 4544199.432905103], [14575113.439679246, 4544230.327308818], [14574722.595264157, 4544276.669086714], [14574514.959168296, 4544292.1163917575], [14574331.750849022, 4544307.563720322], [14574124.11475319, 4544307.563719708], [14573916.478657782, 4544338.458445055], [14573733.270338044, 4544353.905841484], [14573525.634242143, 4544353.905841748], [14573330.212034652, 4544369.353261512], [14573134.789826233, 4544369.353261464], [14572939.367619064, 4544369.353262303], [14572719.517635329, 4544369.353261675], [14572524.095428342, 4544369.353261389], [14572328.673220538, 4544384.800704624], [14572121.037124906, 4544384.800704819], [14571937.828804424, 4544369.353261591], [14571730.192709187, 4544369.35326217], [14571522.556614075, 4544369.353262826], [14571339.34829391, 4544369.353262385], [14571131.712198837, 4544353.905842261], [14570948.503878202, 4544353.905843028], [14570740.867782762, 4544338.45844611], [14570545.44557518, 4544338.458445838], [14570350.023367973, 4544307.563721146], [14570130.173384072, 4544292.116393631], [14569934.751176836, 4544276.669089307], [14569543.906761209, 4544230.327311454], [14569153.062345682, 4544199.432906603], [14568945.42625058, 4544183.985738961], [14568762.21793041, 4544137.6443743445], [14568359.15962792, 4544106.750244441], [14567968.315212548, 4544029.515323136], [14567577.470797084, 4543983.174644471], [14567186.6263823, 4543905.940638149], [14566783.568079067, 4543828.707204437], [14566392.72366387, 4543751.474343973], [14566014.093135444, 4543658.795666206], [14565623.24872121, 4543550.671584686], [14565244.618193153, 4543457.994693514], [14564853.773778267, 4543365.3186267475], [14564475.14325144, 4543257.19758965], [14564096.512723364, 4543149.07767554], [14563693.454420516, 4543010.068005312], [14563314.823893422, 4542901.950654995], [14562936.193365432, 4542762.944282519], [14562606.418390313, 4542716.609237178], [14562398.782294482, 4542685.71932119], [14562191.146199709, 4542670.274397315], [14561812.51567194, 4542608.494931122], [14561421.671257151, 4542562.160572617], [14561030.826841386, 4542500.381747122], [14560639.982426064, 4542423.158731613], [14560236.924122458, 4542345.936288569], [14559833.86581993, 4542268.714416368], [14559467.449180512, 4542176.048925813], [14559076.604765026, 4542083.384260142], [14558697.97423732, 4541990.720416876], [14558307.129823012, 4541867.169908754], [14557928.499294864, 4541774.507987912], [14557525.440991675, 4541650.960043525], [14557146.81046419, 4541542.856792612], [14556768.179936875, 4541403.868545181], [14556389.549410133, 4541264.882152807], [14556010.918883575, 4541141.3402487645], [14555644.502244113, 4541017.7998076], [14555290.299492834, 4540863.376315873], [14554911.668964645, 4540708.95511347], [14554557.466213938, 4540523.652687981], [14554178.835686168, 4540384.678031769], [14553824.632935008, 4540214.822632615], [14553458.216296038, 4540044.970003144], [14553116.227432424, 4539844.238643887], [14552749.810793048, 4539689.83253557], [14552407.821930347, 4539504.548224409], [14552249.041385714, 4539427.347399758], [14551882.624746233, 4539303.827270621], [14551503.99422004, 4539195.748356934], [14551381.855340248, 4539149.4291654825], [14551125.36369234, 4539072.230970899], [14550746.73316546, 4538948.715047952], [14550368.102638047, 4538825.200587222], [14549989.472110914, 4538686.24856897], [14549623.055470902, 4538531.8596091075], [14549244.42494422, 4538377.472935194], [14548902.436080279, 4538238.526881961], [14548523.805553462, 4538084.144551083], [14548157.38891405, 4537914.3266261555], [14547803.186162714, 4537729.073861541], [14547436.769523405, 4537559.261718393], [14547094.7806594, 4537404.8894423675], [14546728.364020523, 4537204.208898579], [14546361.947381083, 4537018.9687477425], [14546032.172405548, 4536818.295628578], [14545677.969654717, 4536633.062331327], [14545323.76690356, 4536432.396637806], [14544993.991928555, 4536216.299437661], [14544652.003064753, 4536000.206715432], [14544322.228089612, 4535799.553195227], [14544004.667001592, 4535568.034697714], [14543662.678138765, 4535351.9554023], [14543345.117051568, 4535120.44683895], [14543027.555963451, 4534858.076675749], [14542697.780988807, 4534626.579069244], [14542380.219901314, 4534395.086599067], [14542074.87270228, 4534132.734675581], [14541781.73939042, 4533870.389347416], [14541464.178302774, 4533623.482121268], [14541158.831103068, 4533345.718475012], [14540877.911680002, 4533098.82366223], [14540572.564480469, 4532821.073979021], [14540291.645057205, 4532543.331687368], [14540010.725633612, 4532265.5967861395], [14539729.806210512, 4531972.440185714], [14539448.886786574, 4531679.291817728], [14539192.395139629, 4531417.008149412], [14538899.261827474, 4531108.447566406], [14538630.55629233, 4530815.323457989], [14538374.064645067, 4530506.780656056], [14538129.786885653, 4530198.2469714265], [14537873.295237642, 4529874.296414274], [14537616.803589916, 4529565.781417542], [14537396.953606762, 4529257.275535442], [14537152.67584749, 4528933.354167178], [14536932.825863078, 4528624.866966413], [14536676.334216729, 4528270.117950614], [14536444.27034455, 4527946.227197342], [14536248.848136436, 4527606.923846032], [14536028.998153169, 4527267.631515431], [14535821.362057582, 4526928.3502052585], [14535601.512074055, 4526589.079913595], [14535418.30375431, 4526249.820638423], [14535210.667658564, 4525910.572378158], [14535015.245451855, 4525555.915520818], [14534832.037131598, 4525185.851631445], [14534673.256587537, 4524831.219369989], [14534490.048268745, 4524476.599140016], [14534331.267724525, 4524106.573469703], [14534160.273292877, 4523751.977827433], [14533989.278861778, 4523366.561424789], [14533842.71220565, 4523027.406744612], [14533696.145549532, 4522642.01705591], [14533561.79278178, 4522287.47108967], [14533403.01223859, 4521902.108676457], [14533268.659471177, 4521532.174121515], [14533134.306703577, 4521146.839544288], [14533036.59559936, 4520776.931708183], [14532914.45671886, 4520376.21298474], [14532804.531727992, 4520006.332405602], [14532682.392847996, 4519621.054045723], [14532596.895632427, 4519220.379606621], [14532499.184527747, 4518835.130195337], [14532413.687312467, 4518449.894972147], [14532303.762320925, 4518049.265387172], [14532230.478993248, 4517664.059100537], [14532157.19566512, 4517278.866996087], [14532108.340113258, 4516878.282249036], [14532035.056785649, 4516477.712836037], [14531973.987345573, 4516092.564399366], [14531973.987345243, 4516030.941964629], [14531937.345681304, 4515784.455855288], [14531888.490129516, 4515383.928306678], [14531827.420689756, 4514983.416085525], [14531778.56513731, 4514582.919189623], [14531741.923473246, 4513982.2025730265], [14531705.281810218, 4513797.373626424]]], [[[14339432.408530401, 4075075.6362608722], [14339458.685080042, 4075084.437250298], [14339751.818391822, 4076223.538724107], [14338652.568473613, 4084644.61678336], [14338506.00181847, 4086228.895738871], [14336759.41583827, 4097428.8401910467], [14341315.196051706, 4104501.207588504], [14341938.104338527, 4105925.114876204], [14342707.57928172, 4107200.8516809675], [14343501.482000086, 4108610.2642060244], [14352735.181309098, 4124392.929620267], [14356472.63102952, 4130694.3846049826], [14357889.442034634, 4133221.908237402], [14359770.380782299, 4136374.679519459], [14361968.880617706, 4140004.4420440258], [14364619.294308105, 4144900.5395199214], [14371984.268757481, 4157187.447747604], [14376991.962826528, 4165401.170055259], [14381535.529153243, 4172024.2679253733], [14386396.65656731, 4178188.490732939], [14390109.678512042, 4182503.992319043], [14410071.323094087, 4181759.7407464916], [14411740.474114683, 4181697.507984717], [14411887.04077094, 4182668.28335924], [14412290.099073624, 4183937.8871818185], [14413145.071231768, 4185521.361705531], [14414659.593340822, 4189466.094009724], [14416235.184889486, 4190347.874119261], [14418116.123638438, 4191708.045624967], [14420009.276274431, 4193442.1331010573], [14421670.365039108, 4195430.688718817], [14423087.176044246, 4197314.91349072], [14424369.634281721, 4199438.789978044], [14425468.884199308, 4201682.765014417], [14426323.856356785, 4203642.876257398], [14426897.90909205, 4205708.102752736], [14427508.603490569, 4207998.262274081], [14427826.164578045, 4210229.005413773], [14428705.564512718, 4211007.626033115], [14430427.722717127, 4212669.865587721], [14432333.089241156, 4214736.783326548], [14433591.119701806, 4217118.712887043], [14434763.65294765, 4219576.085746894], [14435569.76955409, 4222498.676121303], [14436070.538960757, 4223802.851526747], [14436840.01390352, 4227416.375070025], [14437157.574990707, 4228856.1177854985], [14437963.691597, 4230581.057929868], [14439343.860938719, 4234526.939401433], [14440296.544200161, 4238774.472618673], [14440443.110856375, 4242242.7597973915], [14440113.33588104, 4245862.359133215], [14439478.213706143, 4248867.084183436], [14439331.647050746, 4249813.743335828], [14439649.208137836, 4250910.768227175], [14439940.719361056, 4252568.169217453], [14544627.066163512, 4252568.169285575], [14534796.669683114, 4240464.677659911], [14514759.161462417, 4205175.173351611], [14501957.419936124, 4179737.9296527705], [14485448.739516629, 4179288.840961339], [14439555.743282635, 4173635.0297790095], [14435722.322889304, 4173166.719291172], [14421250.789188549, 4166599.3964159037], [14402137.232618976, 4158446.631542469], [14394344.868232908, 4150978.506075684], [14389524.734276652, 4146676.4225404835], [14384326.1140242, 4142606.5776094557], [14365958.39800865, 4119341.9682434443], [14361694.86160705, 4101989.3419181844], [14361694.861568779, 4100867.6858379836], [14360581.666636346, 4094329.1704151263], [14359101.117467742, 4090737.3044364187], [14347779.925183792, 4070274.6906909533], [14347742.667809354, 4070231.176358625], [14339432.408530401, 4075075.6362608722]]]]}}]} \ No newline at end of file diff --git a/prediction/data/zones/특정어업수역Ⅱ.json b/prediction/data/zones/특정어업수역Ⅱ.json new file mode 100644 index 0000000..5f3cea7 --- /dev/null +++ b/prediction/data/zones/특정어업수역Ⅱ.json @@ -0,0 +1 @@ +{"type": "FeatureCollection", "name": "\ud2b9\uc815\uc5b4\uc5c5\uc218\uc5ed2", "crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}}, "features": [{"type": "Feature", "properties": {"fid": 0, "GML_ID": null, "OBJECTID": null, "ZONE_NM": null, "MNCT_NO": null, "MNCT_SCALE": null, "MNCT_NM": null, "RELREGLTN": null, "RELGOAG": null, "REVIYR": null, "ZONE_DESC": null, "PHOTO1_PAT": null, "ID": -2147483647, "CATE_CD": null, "ADR_CD": null, "ADR_KNM": null, "ORIGIN": null, "ORIYR": null, "ORIORG": null, "NAME": "\ud2b9\uc815\uc5b4\uc5c5\uc218\uc5ed\u2161", "WARD_NM": null, "WARD_ID": null, "GISID": null, "FID_2": null, "NAME_2": null, "FID_3": null, "NAME_3": null, "GID": null, "NAME_4": null, "FID_4": null, "NAME_5": null, "FID_5": null, "NAME_6": null}, "geometry": {"type": "MultiPolygon", "coordinates": [[[[14026312.49388151, 3787395.72363925], [14026272.928939708, 3912341.809856742], [14026343.45295978, 3912257.596201178], [14026661.014047539, 3911892.988733094], [14026978.575133963, 3911557.559921697], [14027051.858461797, 3911470.0583396605], [14027137.355677672, 3911178.3911333913], [14027198.425118214, 3910930.4797375146], [14027271.708445255, 3910711.7387614], [14027344.991773328, 3910478.419569922], [14027406.061213229, 3910245.105041409], [14027503.77231727, 3910026.376906358], [14027577.055645473, 3909793.071410766], [14027650.338973064, 3909574.351742198], [14027723.622300655, 3909341.0552779627], [14027821.333404718, 3909122.34407479], [14027906.830620117, 3908903.6369685275], [14027992.327835856, 3908684.9339581975], [14028090.038940514, 3908451.65526086], [14028175.536155386, 3908218.3812227794], [14028273.247259233, 3908014.2702603256], [14028370.958363216, 3907781.0049572694], [14028468.669467552, 3907576.9016377437], [14028566.380571473, 3907343.6450677463], [14028664.091674816, 3907139.5493900776], [14028761.8027788, 3906920.8794053984], [14028883.941658128, 3906716.7911130013], [14028993.866650797, 3906483.5517138042], [14029091.577753998, 3906279.4710601526], [14029201.502746, 3906060.81717149], [14029323.641625034, 3905856.7438994534], [14029433.566617623, 3905638.097921009], [14029543.491609104, 3905419.4560323786], [14029665.630488753, 3905215.393960793], [14029714.486040367, 3905011.3354516793], [14029763.341592517, 3904807.2805052707], [14029824.411032889, 3904574.0792145403], [14029861.052696653, 3904340.8825751734], [14029922.122136267, 3904107.690588767], [14029983.19157568, 3903874.5032539973], [14030044.261016268, 3903641.3205705], [14030105.33045631, 3903393.5690651014], [14030178.613783477, 3903174.96915455], [14030239.683223227, 3902941.800421957], [14030312.96655101, 3902723.2089581615], [14030386.24987901, 3902475.4769059164], [14030447.319319358, 3902256.8941590027], [14030520.602646638, 3902023.7437318605], [14030606.099863403, 3901790.597953613], [14030691.597078484, 3901572.028007055], [14030764.880406654, 3901338.8912325953], [14030862.591510149, 3901120.329726406], [14030935.874838341, 3900887.201954522], [14031021.372054312, 3900668.648888896], [14031119.083157621, 3900450.0999057423], [14031204.580374023, 3900216.9854906956], [14031290.077589095, 3899998.444944336], [14031400.002581708, 3899779.908480802], [14031485.499797242, 3899561.37609934], [14031595.424789447, 3899342.847798803], [14031705.349781059, 3899138.891733757], [14031790.846996775, 3898905.803441251], [14031912.985876147, 3898701.8549923794], [14032010.696979966, 3898468.7754048174], [14032120.621971566, 3898264.8345725327], [14032230.54696337, 3898046.330481661], [14032352.685843932, 3897827.830470188], [14032450.396947037, 3897623.9008064135], [14032572.535826314, 3897405.4086795547], [14032694.674706502, 3897216.052135809], [14032816.81358599, 3896997.5676209624], [14032938.952466376, 3896793.652420099], [14033061.091346277, 3896589.740771255], [14033195.444113696, 3896385.8326731725], [14033329.796881828, 3896181.9281274145], [14033451.935761089, 3895978.0271314387], [14033586.288528644, 3895774.129686274], [14033720.641296018, 3895584.7995237107], [14033989.34683201, 3895191.5851198323], [14034258.05236752, 3894798.383918607], [14034563.399566252, 3894405.195917184], [14034844.31899014, 3894026.5829095095], [14035149.666189065, 3893647.982137594], [14035442.799501298, 3893283.954474], [14035760.360587686, 3892919.938120377], [14036077.92167527, 3892555.9330759617], [14036407.696651513, 3892191.9393388475], [14036737.471625743, 3891857.075086648], [14036870.141786523, 3891726.198837797], [14037091.674377358, 3891507.661720512], [14037421.449352924, 3891172.81701958], [14037787.86599152, 3890867.0976043935], [14038142.06874289, 3890532.271202124], [14038178.710406706, 3890503.1563148433], [14038215.352071756, 3890459.48412038], [14038569.554822957, 3890139.2263254467], [14038923.75757426, 3889818.9772791206], [14039302.388101518, 3889513.29316919], [14039681.018628426, 3889207.617028156], [14040059.649154926, 3888931.0597660383], [14040426.065794326, 3888639.953906682], [14040816.91020996, 3888348.855272826], [14041232.18240151, 3888072.318264321], [14041623.02681568, 3887810.3418493513], [14041818.449023297, 3887679.3558354746], [14042026.085119475, 3887548.3712851256], [14042221.507326983, 3887431.941802017], [14042429.14342227, 3887300.9600144974], [14042624.565629626, 3887184.532986891], [14042844.415613849, 3887053.553961726], [14043052.051709011, 3886951.682398294], [14043259.687804861, 3886820.705972922], [14043467.323900312, 3886704.283712141], [14043687.173883341, 3886602.415180504], [14043894.809980085, 3886471.4426559876], [14044102.446075153, 3886369.576147044], [14044310.082171045, 3886267.7105223597], [14044542.146041911, 3886151.2937484276], [14044749.78213759, 3886049.4300192064], [14044957.418233024, 3885947.567174553], [14045189.4821048, 3885860.256868118], [14045397.118200412, 3885743.844137209], [14045629.182072148, 3885656.5353473793], [14045849.032055777, 3885569.2272066567], [14046056.668151285, 3885481.9197152676], [14046288.73202292, 3885380.06179798], [14046508.58200612, 3885292.755714113], [14046740.6458772, 3885205.4502798985], [14046960.495860584, 3885132.696247827], [14047192.559732666, 3885045.392004632], [14047412.409715734, 3884958.0884115743], [14047644.473587925, 3884885.335912664], [14047864.323571343, 3884812.583865854], [14048096.387442762, 3884739.8322684863], [14048316.237426357, 3884667.081122923], [14048560.515186315, 3884579.7803441263], [14048780.365169752, 3884521.5801856825], [14049012.429040425, 3884448.8303926187], [14049232.279024879, 3884390.630883527], [14049476.556783637, 3884332.431662887], [14049708.620655935, 3884288.7824363117], [14049940.684527121, 3884216.034086747], [14050172.74839862, 3884157.835731066], [14050417.026158523, 3884099.6376657546], [14050636.876141772, 3884055.989305159], [14050881.153900763, 3884012.3411064013], [14051101.003885025, 3883968.693071144], [14051345.28164453, 3883925.045197191], [14051589.55940309, 3883881.397485363], [14051809.409386702, 3883837.7499363376], [14052053.687146327, 3883794.102549823], [14052297.96490621, 3883765.00438295], [14052530.028777948, 3883721.357266986], [14052774.306537522, 3883692.2592790583], [14052994.1565203, 3883677.710312659], [14053238.434280407, 3883648.6124334624], [14053482.712039571, 3883619.514626246], [14053726.989798827, 3883604.9657499343], [14053946.839782678, 3883575.8680508113], [14054203.331430309, 3883561.3192288275], [14054435.395301264, 3883546.770424295], [14054667.459173407, 3883532.221638027], [14054911.736932652, 3883517.6728706607], [14055156.014692299, 3883503.1241203477], [14055388.078563493, 3883503.1241198485], [14055632.356323073, 3883503.1241198387], [14055876.634082645, 3883503.1241195546], [14056120.911842786, 3883488.575388423], [14056352.97571373, 3883488.575387674], [14056585.039585229, 3883503.1241202564], [14056829.31734454, 3883503.1241205744], [14057061.381216811, 3883503.1241201926], [14057305.658975704, 3883517.672870415], [14057549.936734984, 3883532.2216384728], [14057782.000606766, 3883546.7704246663], [14058026.27836623, 3883561.3192288917], [14058270.556125652, 3883575.868050909], [14058514.833885796, 3883604.9657495967], [14058734.683868349, 3883619.5146270352], [14058978.96162855, 3883648.612433189], [14059211.02550005, 3883663.161363651], [14059455.303259443, 3883692.2592792334], [14059699.581018375, 3883721.35726594], [14059919.431002565, 3883765.0043828213], [14060163.708761357, 3883794.1025502756], [14060407.98652079, 3883837.7499373327], [14060652.26428107, 3883881.397485507], [14060872.114264324, 3883925.0451974412], [14061116.392023854, 3883968.6930701793], [14061348.455895012, 3884012.3411065093], [14061580.519767078, 3884055.9893042506], [14061824.797526775, 3884099.6376657034], [14062044.647510495, 3884157.8357315855], [14062288.925269853, 3884216.0340869497], [14062508.775252802, 3884288.782436949], [14062753.053012297, 3884332.4316628594], [14062985.116884248, 3884390.6308830315], [14063204.966867786, 3884448.8303930266], [14063449.244626828, 3884521.5801856043], [14063669.09461016, 3884579.780344494], [14063901.15848242, 3884667.0811240533], [14064121.008465912, 3884739.832268733], [14064353.072337683, 3884812.5838650106], [14064585.136208225, 3884885.3359133895], [14064817.200079866, 3884958.088411405], [14065037.050063629, 3885045.3920052126], [14065269.113934992, 3885132.6962482887], [14065488.963918757, 3885205.450280084], [14065708.813902, 3885292.755713535], [14065928.663886081, 3885380.0617970554], [14066160.72775689, 3885481.9197158483], [14066380.577740904, 3885569.2272062856], [14066588.21383698, 3885656.5353471697], [14066820.277708739, 3885743.8441374716], [14067040.127691144, 3885860.2568676393], [14067259.977674901, 3885947.5671746274], [14067467.613770252, 3886049.4300197763], [14067687.463753887, 3886151.2937480435], [14067907.313738173, 3886267.7105219206], [14068114.949834043, 3886369.5761465007], [14068322.585929519, 3886471.442655181], [14068554.649800802, 3886602.4151809313], [14068750.072007738, 3886704.283711534], [14068957.708103167, 3886820.705972922], [14069165.344199639, 3886951.6823977036], [14069372.980294656, 3887053.5539613245], [14069592.83027846, 3887184.53298712], [14069788.25248586, 3887300.9600143926], [14069995.888581414, 3887431.9418018376], [14070203.524677217, 3887548.3712856653], [14070398.946884345, 3887679.3558351737], [14070606.58297968, 3887810.341848666], [14070997.427395098, 3888072.318263754], [14071400.485698925, 3888348.855272708], [14071791.330113634, 3888639.95390655], [14072157.746753618, 3888931.059766593], [14072548.59116818, 3889207.617027234], [14072915.007807814, 3889513.2931684074], [14073293.638334833, 3889818.9772791504], [14073647.841086097, 3890139.226324991], [14074014.257724732, 3890459.4841204123], [14074356.24658839, 3890779.750664216], [14074710.449339252, 3891114.584135423], [14075052.438203165, 3891463.985782468], [14075369.999290047, 3891813.397846801], [14075699.774265824, 3892162.8203282068], [14076029.549241148, 3892526.813160914], [14076334.896440182, 3892890.817300508], [14076652.457527356, 3893254.8327489593], [14076933.376951266, 3893633.4208147195], [14077238.72415068, 3894012.0211142646], [14077519.643573463, 3894405.1959178024], [14077812.776884863, 3894798.383917827], [14078081.482420994, 3895191.5851198635], [14078337.974068029, 3895584.7995231976], [14078472.326835675, 3895788.693671682], [14078594.465715563, 3895992.591370282], [14078728.818483, 3896196.4926201487], [14078850.957363801, 3896400.3974202448], [14078960.88235518, 3896604.30577156], [14079083.021235045, 3896808.2176739727], [14079205.16011431, 3897012.1331286556], [14079327.298994398, 3897230.6179148587], [14079534.935089272, 3897623.9008055353], [14079766.998961411, 3897594.7682869313], [14079999.062833289, 3897551.069644458], [14080243.34059258, 3897521.9373066192], [14080475.404464027, 3897492.8050412447], [14080707.468334954, 3897449.106778766], [14080951.746094488, 3897419.9746949435], [14081196.023853954, 3897405.408680112], [14081415.87383798, 3897376.2767037847], [14081672.365485134, 3897361.710744355], [14081904.429357013, 3897332.5788773843], [14082136.49322842, 3897318.012970849], [14082380.770987837, 3897303.44708331], [14082625.048747523, 3897303.447082881], [14082844.89873114, 3897288.8812134634], [14083101.39037848, 3897288.8812134196], [14083333.454250371, 3897274.3153610313], [14083577.73200986, 3897274.315360886], [14083809.795881303, 3897274.3153613866], [14084054.07364039, 3897274.315361214], [14084298.35139951, 3897274.3153610793], [14084530.415272055, 3897288.881213491], [14084774.693031047, 3897288.881213693], [14085018.970789962, 3897303.447082888], [14085263.248550324, 3897303.4470835133], [14085483.09853329, 3897318.012971403], [14085727.376292782, 3897332.5788774174], [14085971.654052334, 3897361.71074422], [14086203.7179239, 3897376.276704187], [14086447.995682908, 3897405.4086796427], [14086692.27344248, 3897419.9746941905], [14086912.123426246, 3897449.1067788443], [14087156.401186522, 3897492.8050409877], [14087400.678945886, 3897521.9373064945], [14087620.528929327, 3897551.0696441643], [14087864.806688221, 3897594.7682873094], [14088109.084447999, 3897623.900805951], [14088341.148319451, 3897667.5997203756], [14088573.212191245, 3897696.7324211453], [14088805.276063038, 3897740.4316074788], [14089037.339933824, 3897784.1309570693], [14089281.617693743, 3897827.830470751], [14089501.4676769, 3897900.66335366], [14089745.745436855, 3897944.3633015817], [14089965.595419774, 3898002.630152866], [14090209.873179223, 3898060.897294051], [14090441.937050942, 3898119.164725707], [14090674.000922583, 3898162.8654892095], [14090906.064793834, 3898235.7004582426], [14091125.914777994, 3898308.535880703], [14091370.19253783, 3898381.3717555343], [14091590.04252131, 3898439.6407829127], [14091822.106392259, 3898512.4774739025], [14092041.956375973, 3898585.314618505], [14092274.020247314, 3898672.7197900466], [14092493.870230613, 3898745.557932266], [14092591.581335085, 3898774.693316213], [14092616.00911053, 3898760.125616015], [14092860.286870733, 3898701.8549935655], [14093080.136854356, 3898629.0171225015], [14093312.200724699, 3898570.7471534302], [14093556.478484493, 3898527.044866997], [14093776.328468569, 3898468.775404192], [14094020.606227417, 3898425.073498839], [14094264.883987851, 3898381.3717556526], [14094484.733970387, 3898337.670176473], [14094729.011730317, 3898293.9687599814], [14094961.075602157, 3898235.7004585327], [14095193.13947348, 3898206.566416765], [14095437.417232322, 3898162.865489851], [14095669.481104298, 3898133.7316290126], [14095901.544975886, 3898090.0309733814], [14096145.8227352, 3898060.897294544], [14096377.886607243, 3898031.7636878835], [14096622.164366543, 3898017.196910956], [14096866.442125635, 3897988.063412855], [14097086.292109383, 3897973.4966916144], [14097330.569869047, 3897944.3633026695], [14097574.847628593, 3897929.7966353055], [14097806.91150033, 3897915.2299849386], [14098051.189258894, 3897900.6633545416], [14098295.467019573, 3897886.0967414593], [14098515.317002017, 3897871.5301457075], [14098759.594761733, 3897871.5301461737], [14099003.872522173, 3897871.5301457415], [14099248.15028098, 3897842.3970105853], [14099480.214152526, 3897842.3970112065], [14099724.491911555, 3897842.3970108926], [14099968.769672029, 3897871.5301465685], [14100200.833543025, 3897871.530146231], [14100445.111302666, 3897871.530146376], [14100689.389062308, 3897886.0967411445], [14100909.23904563, 3897900.6633543223], [14101153.516805617, 3897915.229985139], [14101397.794564402, 3897929.796634782], [14101629.858436095, 3897944.363301649], [14101874.136195809, 3897973.4966918174], [14102118.413954498, 3897988.0634127976], [14102338.263938468, 3898017.1969116074], [14102582.541697871, 3898031.763687157], [14102826.819457443, 3898060.897294939], [14103046.669441475, 3898090.0309742764], [14103290.947201025, 3898133.731628701], [14103535.224959875, 3898162.865489382], [14103767.288831646, 3898206.566415829], [14104011.566590969, 3898235.7004580023], [14104243.63046214, 3898293.9687599214], [14104475.694334047, 3898337.6701762597], [14104707.758205285, 3898381.3717553043], [14104952.035964744, 3898425.0734980954], [14105171.885948928, 3898468.7754048845], [14105416.163708236, 3898527.044866273], [14105636.013691971, 3898570.7471529637], [14105880.291451601, 3898629.017122684], [14106100.141434586, 3898701.8549932986], [14106344.419193909, 3898760.1256155283], [14106576.483066218, 3898818.3965284377], [14106796.33304983, 3898891.2355768005], [14107040.61080865, 3898949.5071421904], [14107260.460792817, 3899022.347006401], [14107492.524664072, 3899109.7554428596], [14107712.374646941, 3899168.0280965483], [14107944.43851916, 3899240.8693216643], [14108164.288502041, 3899313.7110006236], [14108396.35237358, 3899401.1216131775], [14108616.202357315, 3899488.5328786755], [14108848.26622925, 3899575.9447972635], [14109068.116212262, 3899648.788562357], [14109300.180084735, 3899736.201678742], [14109507.81617953, 3899823.6154470327], [14109739.8800512, 3899925.5990036307], [14109959.730035394, 3900013.0141874147], [14110167.366130177, 3900100.430024312], [14110399.430001773, 3900202.4159939], [14110607.066097446, 3900318.9724758286], [14110826.91608077, 3900406.3905996806], [14111046.766064817, 3900508.3792349175], [14111254.402160756, 3900610.368760547], [14111462.038255833, 3900726.9293071674], [14111694.102127243, 3900828.9207385355], [14111901.738223149, 3900945.483462624], [14112109.374317858, 3901062.047348597], [14112317.010413814, 3901178.6123951804], [14112536.86039789, 3901295.1786045753], [14112732.282604866, 3901411.745975727], [14112939.91870098, 3901542.8856554995], [14113147.554796439, 3901659.455495108], [14113342.977004122, 3901776.026497272], [14113562.826987848, 3901907.170262538], [14113758.249194663, 3902023.7437324016], [14113965.885290999, 3902154.8902756744], [14114161.307498729, 3902300.6103812656], [14114552.151913268, 3902562.911149271], [14114942.996328058, 3902839.790564282], [14115333.840744248, 3903131.2496635215], [14115712.471271036, 3903408.142537925], [14116091.101798624, 3903714.189658899], [14116457.518437536, 3904020.244793169], [14116823.935076432, 3904326.30794037], [14117190.351715742, 3904646.9541180977], [14117544.554466687, 3904967.6090920256], [14117874.329442928, 3905288.272862804], [14118228.532193437, 3905623.52166774], [14118558.307168506, 3905973.3567619547], [14118900.296031933, 3906323.2023288426], [14119217.857119897, 3906687.6359336833], [14119535.41820684, 3907022.924889553], [14119852.979293982, 3907387.380320937], [14120146.112605767, 3907766.426031002], [14120329.320924878, 3907999.69104288], [14120659.095900508, 3908145.484040798], [14120891.159772767, 3908247.5402225223], [14121098.79586814, 3908364.1769507537], [14121306.43196353, 3908466.2350431], [14121538.49583534, 3908568.294027048], [14121746.13193052, 3908699.514031146], [14121953.768026086, 3908801.575054541], [14122161.404121136, 3908918.217315232], [14122381.254104782, 3909034.860741095], [14122588.890200352, 3909166.0859882417], [14122784.312407838, 3909282.7318893564], [14122991.948504105, 3909399.378956402], [14123187.370711256, 3909530.6082997248], [14123407.22069531, 3909661.8391170804], [14123602.64290326, 3909778.4899717756], [14123675.926230324, 3909822.2343429914], [14123871.348437805, 3909851.3973478205], [14124103.412310153, 3909909.7235756326], [14124335.476181583, 3909953.4684379986], [14124579.753940387, 3909997.2134634694], [14124799.603924207, 3910040.9586544754], [14125043.88168351, 3910099.2858296824], [14125263.731667727, 3910143.0314019676], [14125508.009426983, 3910201.3590867375], [14125752.28718599, 3910259.6870635124], [14125972.137169205, 3910332.59744405], [14126204.201041877, 3910390.92607536], [14126424.051024832, 3910449.254999375], [14126668.328784036, 3910507.584213994], [14126888.17876787, 3910565.913720107], [14127120.242639447, 3910638.8260132936], [14127340.092622804, 3910726.3213651967], [14127584.370382065, 3910799.234659938], [14127816.434254477, 3910872.148409907], [14128036.284238072, 3910945.0626162733], [14128268.348108647, 3911017.9772767853], [14128488.198092327, 3911090.892392731], [14128720.261964472, 3911192.974320413], [14128940.111948168, 3911280.4738266575], [14129147.748043166, 3911367.973988523], [14129379.811914971, 3911440.891291154], [14129599.661898108, 3911557.559922042], [14129819.511881288, 3911645.0621605986], [14130039.361864883, 3911732.5650563217], [14130271.425736733, 3911820.068606416], [14130479.061832469, 3911936.741027793], [14130686.697928369, 3912038.8303543963], [14130918.761799408, 3912140.920572288], [14131126.397895552, 3912243.0116843027], [14131334.033990381, 3912359.6883341745], [14131566.09786253, 3912461.7813591575], [14131773.73395724, 3912578.460196206], [14131981.370053304, 3912680.5551354536], [14132189.006148996, 3912811.821370374], [14132408.856131978, 3912913.918351353], [14132604.278340138, 3913030.6017091586], [14132811.914436067, 3913161.8718802584], [14133019.55053132, 3913278.557718527], [14133214.972738754, 3913409.8306803126], [14133422.608834058, 3913526.518996874], [14133618.03104185, 3913657.7947483873], [14133837.881025733, 3913789.0719763637], [14134033.303232925, 3913920.3506824095], [14134424.147647737, 3914197.4994676104], [14134814.99206299, 3914474.654837102], [14135205.836479066, 3914737.2291565286], [14135584.467006274, 3915043.5733327474], [14135950.883645028, 3915335.3371710163], [14136329.514172366, 3915641.697056373], [14136695.93081197, 3915948.0649892436], [14137050.133562554, 3916269.0305042304], [14137416.550201891, 3916590.0048536095], [14137758.539065152, 3916925.578393816], [14138112.741816988, 3917261.161591096], [14138124.955704955, 3917290.3431960098], [14138222.66680882, 3917363.297525027], [14138576.869559862, 3917669.710695894], [14138931.072311323, 3918005.315314095], [14139297.488950564, 3918326.3374690292], [14139627.263925616, 3918676.5535388705], [14139969.25278898, 3919012.187147011], [14140286.813875556, 3919362.423825106], [14140616.588851899, 3919712.671030513], [14140934.149938418, 3920077.5230633905], [14141251.711026246, 3920442.3865206414], [14141557.058225727, 3920821.8566365796], [14141850.191537393, 3921186.743403776], [14142143.324847814, 3921580.833950086], [14142412.030383276, 3921960.3411501986], [14142705.163695073, 3922354.4578704755], [14142973.869230365, 3922748.5879268395], [14143108.221998872, 3922952.9569106484], [14143230.360877866, 3923157.3294808976], [14143364.71364529, 3923347.1072222115], [14143486.852526005, 3923566.085382383], [14143621.205292745, 3923770.468714929], [14143743.344173217, 3923974.8556338386], [14143853.269164307, 3924179.246141352], [14143975.40804453, 3924398.239951181], [14144085.333036179, 3924602.6378918802], [14144207.471916584, 3924807.039420669], [14144329.61079611, 3925011.4445379367], [14144427.321899917, 3925245.054782049], [14144537.246891692, 3925449.467591055], [14144659.38577103, 3925668.485299123], [14144757.096875027, 3925872.905545292], [14144867.021866404, 3926091.931221066], [14144976.946858248, 3926310.9610188995], [14145062.444074264, 3926529.9949393696], [14145172.369066445, 3926749.032982101], [14145257.866282122, 3926968.07514812], [14145343.36349802, 3927187.1214373973], [14145453.288489206, 3927420.7753580655], [14145538.785705116, 3927639.830169832], [14145636.496809188, 3927858.8891063603], [14145709.78013733, 3928077.9521672083], [14145831.91901659, 3928194.7874860945], [14146173.907880068, 3928516.090663645], [14146528.11063154, 3928837.402716595], [14146857.885606105, 3929187.935079745], [14147199.87446994, 3929538.4780067294], [14147529.649444804, 3929874.424892384], [14147847.210531974, 3930224.988511853], [14148164.771620288, 3930590.1701873746], [14148470.118818687, 3930955.363331506], [14148763.252130508, 3931335.176369344], [14149068.599329932, 3931715.0018159356], [14149349.518753031, 3932094.839672609], [14149642.652064433, 3932489.2998140086], [14149911.357600631, 3932869.1629744656], [14150167.84924795, 3933263.649396584], [14150302.202015493, 3933482.814306461], [14150436.554783072, 3933672.760570551], [14150558.69366279, 3933891.9331968473], [14150693.04643046, 3934081.88614829], [14150815.185310263, 3934301.0664916416], [14150937.324189857, 3934505.638541194], [14151059.463069875, 3934710.2141920165], [14151169.388061073, 3934914.793446248], [14151279.313053045, 3935133.9895019485], [14151401.45193336, 3935338.576218246], [14151499.163036935, 3935557.7802698156], [14151621.301916642, 3935762.3744509714], [14151731.226908179, 3935981.586499216], [14151841.151899958, 3936186.188145459], [14151938.863003807, 3936405.408193718], [14152195.354651982, 3936726.938415809], [14152488.487962838, 3937121.555854616], [14152757.193498401, 3937516.186704223], [14153013.685146198, 3937910.830965573], [14153148.037913712, 3938130.0835739793], [14153282.39068159, 3938320.1058491296], [14153404.52956081, 3938539.3661864866], [14153526.668440903, 3938729.3951621894], [14153661.021208057, 3938934.045228578], [14153783.16008862, 3939153.3171624425], [14153893.085079862, 3939343.356187306], [14154003.010072157, 3939562.635853141], [14154125.148951644, 3939767.300611019], [14154247.287830727, 3939986.588286231], [14154344.998935373, 3940191.260520088], [14154454.923926366, 3940410.5562055036], [14154577.062806187, 3940615.235917028], [14154686.987798307, 3940834.5396143007], [14154784.698902179, 3941053.847455504], [14154894.623893438, 3941273.159443157], [14154980.121109651, 3941477.8543701675], [14155090.046101721, 3941711.795852182], [14155175.543317659, 3941916.4985195934], [14155285.468309484, 3942150.4488461153], [14155370.965524651, 3942369.7815635717], [14155468.676628448, 3942589.1184271937], [14155554.173845042, 3942808.4594375123], [14155639.671059819, 3943042.427754295], [14155725.168275682, 3943261.7773370524], [14155810.665492112, 3943481.131068717], [14155883.948820263, 3943700.4889500365], [14155957.232147578, 3943934.475261892], [14156030.51547582, 3944153.84171713], [14156116.012691723, 3944387.8371760393], [14156189.296019575, 3944621.8373581613], [14156262.579347137, 3944855.842261796], [14156335.86267503, 3945075.226148344], [14156396.932115378, 3945309.240202267], [14156458.001554107, 3945543.2589806733], [14156531.284882791, 3945777.2824817533], [14156580.140434783, 3945996.683805485], [14156641.209874306, 3946245.343659381], [14156702.279314281, 3946479.3813347146], [14156763.348753486, 3946713.4237364368], [14156836.632082347, 3947093.7527188975], [14156922.129297458, 3947210.779532316], [14157044.268176915, 3947415.5792986215], [14157166.407056939, 3947620.3826843738], [14157300.759824937, 3947825.1896898304], [14157422.89870435, 3948030.000315225], [14157545.037583863, 3948234.8145606047], [14157667.17646415, 3948454.2624128303], [14157764.887568416, 3948659.084157569], [14157887.026448287, 3948878.5400449373], [14157996.951438919, 3949083.369290671], [14158119.090319661, 3949288.202157963], [14158216.801423091, 3949507.6699629608], [14158326.726414407, 3949727.1419266723], [14158424.437519114, 3949931.986177989], [14158534.362510465, 3950166.098329143], [14158644.287502103, 3950370.950342997], [14158729.78471748, 3950590.4386638855], [14158815.281933218, 3950809.931145622], [14158925.206925157, 3951029.427785851], [14159010.704141628, 3951248.9285875857], [14159108.415245011, 3951483.0673614168], [14159193.912461305, 3951702.576762513], [14159291.6235645, 3951936.724709892], [14159364.906892387, 3952141.6080482737], [14159450.404108545, 3952375.764874187], [14159523.687435796, 3952595.291199293], [14159621.398540307, 3952829.4572015903], [14159694.681868514, 3953048.992130602], [14159767.965195222, 3953283.1673107734], [14159841.248523328, 3953502.71084457], [14159914.531851163, 3953751.5318834265], [14159975.601291291, 3953971.084300843], [14160036.670731131, 3954205.278137325], [14160097.740171447, 3954424.8391631977], [14160171.023498941, 3954673.6800276935], [14160232.092939438, 3954893.249940999], [14160293.162378157, 3955127.4624392823], [14160342.017930873, 3955361.679679122], [14160403.087371092, 3955610.540691105], [14160451.942922773, 3955830.128382198], [14160500.798474764, 3956064.3598468173], [14160549.65402654, 3956298.596053535], [14160598.509577785, 3956547.477220151], [14160622.937354516, 3956693.880406732], [14160636.617809776, 3956751.2753614364], [14160696.220681982, 3957001.3331325892], [14160757.290121438, 3957220.947224554], [14160818.359561661, 3957469.8482368095], [14160867.215113139, 3957704.112907182], [14160916.070665386, 3957938.3823223934], [14160952.712329246, 3958172.656482196], [14161001.567881363, 3958406.9353898573], [14161050.423433455, 3958641.2190423324], [14161099.278985005, 3958890.1506262408], [14161135.920648871, 3959109.800590544], [14161172.562312365, 3959358.7422623085], [14161209.203976428, 3959593.0452026036], [14161245.845640494, 3959827.352890803], [14161270.273416875, 3960061.6653290996], [14161306.915080808, 3960310.627497995], [14161319.128968159, 3960544.9497317653], [14161343.55674493, 3960779.276716447], [14161380.198409086, 3961013.6084507345], [14161392.412296837, 3961262.5911258464], [14161404.626184527, 3961511.5791632985], [14161429.053960387, 3961745.92574739], [14161441.267848562, 3961994.924199761], [14161453.481735952, 3962214.63317148], [14161453.481736058, 3962463.6417239243], [14161465.695623817, 3962698.007616471], [14161465.695623918, 3962947.0265865847], [14161490.123400616, 3963181.402284813], [14161490.123400327, 3963430.4316741815], [14161490.12340009, 3963664.817180731], [14161490.123400327, 3963913.856991753], [14161465.695623929, 3964148.2523065866], [14161453.481736246, 3964382.6523781596], [14161453.481736366, 3964631.7076659855], [14161441.267847814, 3964866.11754977], [14161429.053960953, 3965100.5321910167], [14161429.053960415, 3965349.602961888], [14161392.412296638, 3965584.027417495], [14161380.198408043, 3965833.1086162445], [14161343.556744935, 3966067.542888706], [14161343.556744233, 3966155.4569668346], [14161331.342857195, 3966316.6345174764], [14160989.35399289, 3968675.9452985157], [14160366.445706693, 3971035.738514718], [14159609.18465247, 3973249.3995169974], [14158717.57082961, 3975478.1498437845], [14157471.754256403, 3977560.6613335563], [14155041.190549271, 3980831.8443643325], [14152329.70741956, 3983619.6844641836], [14151987.71855616, 3983825.1309771356], [14148885.391009744, 3986188.029616028], [14146809.030054526, 3987362.3212974994], [14144659.385770556, 3988331.2022260944], [14142448.672047747, 3989109.3020907817], [14140018.108340502, 3989667.217435894], [14137673.041849403, 3990078.3302842528], [14136695.930811903, 3990151.744841114], [14135095.911487218, 3991458.6024279883], [14131847.017286118, 3993896.5093908194], [14128537.053643916, 3995644.4841260226], [14126998.103759484, 3996261.479934003], [14128940.111947352, 3996599.372644142], [14130882.120135639, 3997025.425458683], [14132824.128323132, 3997436.8018056713], [14133581.38937842, 3997642.4955095113], [14135169.19481456, 3997995.1218662434], [14137111.20300309, 3998215.51884181], [14138979.92786301, 3998421.2265061126], [14140848.652722916, 3998685.713208066], [14142790.66091092, 3998906.123450371], [14144732.669099433, 3999170.621330323], [14146601.39395857, 3999317.5672330167], [14148543.402146455, 3999596.769632924], [14150412.12700758, 3999875.9788301843], [14152329.70741913, 4000081.7162713646], [14154271.715607245, 4000302.153339294], [14155041.190549305, 4000360.9372721342], [14156983.19873771, 4000640.165071943], [14158912.993037248, 4000919.399671307], [14160781.717897676, 4001125.1558314012], [14162797.009413889, 4001345.612956999], [14164580.237058092, 4001610.1671023793], [14166522.245245533, 4001815.935658166], [14167120.725757152, 4001933.519348061], [14167987.911802663, 4002036.4060658], [14173362.02251159, 4002697.8427284476], [14178760.46447004, 4003210.7735754605], [14183572.832858339, 4003668.0188718894], [14206901.358889744, 4005932.0826477716], [14210370.103074845, 4006505.5204501776], [14216550.330390768, 4008623.077985625], [14220983.971725512, 4010961.6694398993], [14225038.982532345, 4014271.8137577237], [14230608.515449705, 4021896.1459267535], [14237533.789931284, 4030556.9660328445], [14248477.43355596, 4044607.966866741], [14248819.422419427, 4044725.9920548676], [14248894.821504684, 4044751.2460003477], [14332384.43799789, 4072715.0099619655], [14339458.685080042, 4075084.437250298], [14339432.408530401, 4075075.6362608722], [14347742.667809354, 4070231.176358625], [14347779.925183792, 4070274.6906909533], [14337204.573632397, 4057923.327535437], [14332384.437499993, 4053484.2600356997], [14325516.027051724, 4047158.849973145], [14297318.800024424, 4007338.115534628], [14296205.605126167, 4000638.8920895318], [14289147.949369663, 3995496.544127517], [14248894.821533248, 3948046.126943193], [14240260.255669821, 3937867.6942487117], [14234601.398877025, 3931197.0296311476], [14233310.092817476, 3925646.324512699], [14227187.520809716, 3914566.141529868], [14215309.731123524, 3889770.2929695556], [14213829.181949753, 3888669.1786231115], [14154273.254398886, 3837917.649432496], [14085910.608311396, 3787395.72363925], [14026312.49388151, 3787395.72363925]]]]}}]} \ No newline at end of file diff --git a/prediction/data/zones/특정어업수역Ⅲ.json b/prediction/data/zones/특정어업수역Ⅲ.json new file mode 100644 index 0000000..186078b --- /dev/null +++ b/prediction/data/zones/특정어업수역Ⅲ.json @@ -0,0 +1 @@ +{"type": "FeatureCollection", "name": "\ud2b9\uc815\uc5b4\uc5c5\uc218\uc5ed3", "crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}}, "features": [{"type": "Feature", "properties": {"fid": 0, "GML_ID": null, "OBJECTID": null, "ZONE_NM": null, "MNCT_NO": null, "MNCT_SCALE": null, "MNCT_NM": null, "RELREGLTN": null, "RELGOAG": null, "REVIYR": null, "ZONE_DESC": null, "PHOTO1_PAT": null, "ID": -2147483647, "CATE_CD": null, "ADR_CD": null, "ADR_KNM": null, "ORIGIN": null, "ORIYR": null, "ORIORG": null, "NAME": "\ud2b9\uc815\uc5b4\uc5c5\uc218\uc5ed\u2162", "WARD_NM": null, "WARD_ID": null, "GISID": null, "FID_2": null, "NAME_2": null, "FID_3": null, "NAME_3": null, "GID": null, "NAME_4": null, "FID_4": null, "NAME_5": null, "FID_5": null, "NAME_6": null}, "geometry": {"type": "MultiPolygon", "coordinates": [[[[13817590.293393573, 4163976.6556012244], [13935718.55954324, 4163976.6556012244], [13935619.01320107, 4163881.1438622964], [13923844.505073382, 4152583.8553656973], [13918348.255484505, 4147059.076131751], [13915673.414018149, 4144230.73373971], [13914293.244677586, 4142310.8465966308], [13912412.305929314, 4139320.052336007], [13910836.71437977, 4135854.113539339], [13910055.02554902, 4132998.86846704], [13908345.081232697, 4127825.5937968497], [13907722.172945773, 4125120.982583305], [13907392.39797139, 4121971.379578588], [13905548.100886794, 4109218.5915996092], [13903996.937114129, 4098184.7930658716], [13902433.559452614, 4086362.162986858], [13899270.162467, 4063581.503750727], [13896473.182120506, 4043914.5936157014], [13897865.565350175, 4034226.54128085], [13899148.023587234, 4029334.0367157785], [13901432.020639004, 4024841.2655827063], [13904314.498200562, 4020453.314503754], [13908809.208975887, 4016346.6591829173], [13913084.069767442, 4012962.3532931497], [13916381.819520816, 4010770.4459119253], [13921743.716342429, 4008961.335245877], [13958947.219114931, 4002242.1822701376], [13979370.589551304, 3998601.864321506], [14011454.723518057, 3992883.0996783567], [14012407.40678011, 3992706.863639312], [14013372.303930415, 3992530.6303036474], [14014324.987193013, 3992369.0854542954], [14015277.670454111, 3992192.8573017977], [14016230.35371629, 3992031.3172021704], [14017195.250866242, 3991855.094230278], [14018147.934129067, 3991678.873962591], [14019100.617390765, 3991502.656396452], [14020053.300652908, 3991341.126002646], [14021018.197802687, 3991179.5978787914], [14021909.811624419, 3991018.0720274393], [14022813.639334908, 3990856.5484449966], [14023766.32259687, 3990650.976261227], [14024731.219746705, 3990445.407755316], [14025525.122466035, 3990298.575359531], [14026477.805727897, 3990063.6474289736], [14027442.702878024, 3989828.724300673], [14027845.761180786, 3989740.629365874], [14028798.444442954, 3989505.7128408654], [14029763.341592584, 3989270.80111587], [14030716.024854451, 3989050.57573502], [14031668.70811654, 3988815.6733126394], [14033073.30523366, 3988478.0094901477], [14033024.449681701, 3988213.757764475], [14033000.021905882, 3987949.512112862], [14032963.380242558, 3987582.5143390666], [14032938.952466676, 3987332.9625437283], [14032914.524689825, 3987098.09521391], [14032902.310802005, 3986863.232680407], [14032877.883026825, 3986613.69649413], [14032877.883025693, 3986378.843854627], [14032841.241362942, 3986143.996010515], [14032841.241361871, 3985894.47543004], [14032829.027473792, 3985659.6374763353], [14032829.027474709, 3985410.127403529], [14032829.027473787, 3985175.2993370066], [14032829.02747455, 3984940.4760656278], [14032816.813586919, 3984690.9815914035], [14032829.027474267, 3984456.1682059844], [14032829.027474323, 3984221.3596118884], [14032829.02747492, 3983971.8807323813], [14032841.24136266, 3983737.082022118], [14032841.24136211, 3983487.613641918], [14032865.66913836, 3983252.8248136323], [14032877.88302635, 3983003.3669318845], [14032902.310802022, 3982783.2615266712], [14032914.52469013, 3982533.813822452], [14032938.952465724, 3982299.044452602], [14032963.38024159, 3982064.2798706265], [14032975.59412962, 3981814.847749137], [14033012.23579437, 3981594.76507107], [14033036.663569707, 3981345.3431236055], [14033073.305233562, 3981110.597991455], [14033097.733010307, 3980861.18653368], [14033146.588561533, 3980641.122086566], [14033171.016337542, 3980391.7207993036], [14033219.871889306, 3980171.665326035], [14033268.727441864, 3979922.2742063804], [14033305.369105555, 3979687.5580867655], [14033354.224656736, 3979452.846750307], [14033403.080208756, 3979218.140198897], [14033464.149649367, 3978968.769728522], [14033500.79131351, 3978748.741443538], [14033561.860753022, 3978514.0492399754], [14033622.930192148, 3978279.3618192296], [14033683.999632282, 3978044.6791785043], [14033745.06907279, 3977824.6685465015], [14033806.138512583, 3977575.3282415215], [14033867.207951993, 3977355.3265721146], [14033940.49128036, 3977120.6627559136], [14034013.774608184, 3976886.0037188698], [14034074.84404724, 3976651.349460947], [14034148.127375823, 3976431.365434353], [14034233.624591732, 3976211.385607544], [14034306.907918751, 3975976.745086343], [14034380.191247718, 3975756.7739374605], [14034465.688463435, 3975522.1426732005], [14034563.399566704, 3975302.1802014867], [14034636.682894846, 3975067.5581912696], [14034722.18011089, 3974862.2678504367], [14034819.89121484, 3974627.654795317], [14034905.388430584, 3974407.7093927874], [14035015.313421464, 3974187.7681863704], [14035100.810637798, 3973967.831176343], [14035198.52174194, 3973747.8983624075], [14035296.232845142, 3973527.9697442516], [14035406.157837268, 3973322.706818716], [14035503.868941093, 3973102.7863105624], [14035564.938380616, 3972970.836018713], [14035430.585612448, 3972882.8699968406], [14035051.955086311, 3972589.6547698523], [14034685.538446855, 3972281.7868043673], [14034306.907918967, 3971988.5868595946], [14033928.277392257, 3971680.7349405857], [14033561.860752953, 3971358.232219437], [14033207.658001786, 3971050.3971231086], [14032865.66913857, 3970727.9120247746], [14032511.46638746, 3970376.1203815714], [14032169.477523897, 3970053.654131018], [14031851.916436315, 3969701.8830453446], [14031522.141461194, 3969350.1226848057], [14031192.36648596, 3969013.029068952], [14030874.80539861, 3968646.634131671], [14030569.4581991, 3968265.5957370186], [14030276.324888123, 3967899.224527907], [14029970.977688076, 3967518.2108068704], [14029677.844376301, 3967137.2096621543], [14029409.138841135, 3966756.2210914562], [14029140.433306115, 3966360.592421976], [14028871.727769978, 3965964.977308897], [14028737.375003058, 3965759.848882349], [14028615.236123221, 3965554.724099639], [14028480.883355275, 3965349.6029609945], [14028358.744475357, 3965144.4854657836], [14028224.391707785, 3964939.371614421], [14028102.252828015, 3964734.2614048333], [14027980.11394778, 3964529.1548381275], [14027857.975069236, 3964324.051914157], [14027760.26396438, 3964104.302822176], [14027638.125085097, 3963899.207441546], [14027528.200093549, 3963679.4664326143], [14027406.061214069, 3963474.378595102], [14027308.350109257, 3963254.6456658943], [14027198.425118413, 3963049.5653696535], [14027088.500126269, 3962815.1923459424], [14027003.00291053, 3962610.11984976], [14026893.077918677, 3962390.4033569284], [14026795.366814636, 3962170.6910431627], [14026697.655711418, 3961965.629985329], [14026599.944607599, 3961731.278946997], [14026502.233502936, 3961511.5791640463], [14026416.736287547, 3961291.883556766], [14026331.239071513, 3961072.192127874], [14026245.741855916, 3960837.859204179], [14026148.030752573, 3960603.531032333], [14026050.319648793, 3960515.6591924117], [14025696.116896924, 3960208.113014822], [14025354.12803417, 3959885.930555696], [14024987.711394375, 3959549.113039522], [14024645.722530954, 3959226.948944662], [14024291.51977964, 3958890.150626061], [14023961.74480434, 3958538.7193601266], [14023644.183716903, 3958172.6564824986], [14023314.408742214, 3957835.8888684427], [14023009.061542125, 3957469.8482367387], [14022703.714343427, 3957103.8191892453], [14022398.367143923, 3956723.1612672796], [14022117.447720738, 3956342.515870507], [14021812.100520544, 3955947.2435213765], [14021543.394986114, 3955566.6236525774], [14021250.261673959, 3955171.377811075], [14020993.770026248, 3954776.145468736], [14020859.417258942, 3954556.577778357], [14020737.27837941, 3954366.2891494785], [14020602.925611844, 3954176.0036486983], [14020480.786732143, 3953956.4473441243], [14020370.861740284, 3953751.53188359], [14020248.722860433, 3953546.620051208], [14020126.583980937, 3953341.7118456624], [14020004.445100708, 3953122.171365647], [14019882.306221237, 3952917.270673172], [14019784.595117368, 3952697.7382422695], [14019662.456236959, 3952492.8450626153], [14019552.531245224, 3952273.3206794215], [14019442.606254242, 3952068.435010681], [14019344.89515069, 3951834.2844000966], [14019234.97015812, 3951629.406499703], [14019137.259054784, 3951424.532224624], [14019027.33406315, 3951190.394634004], [14018941.836846726, 3950985.528125728], [14018831.911855552, 3950751.3994105365], [14018734.200752072, 3950546.540667035], [14018648.703535682, 3950312.4208263634], [14018563.206319205, 3950092.937773037], [14018465.495216299, 3949858.827100896], [14018379.99800022, 3949639.352642628], [14018306.71467212, 3949405.251136331], [14018209.003568063, 3949185.785271415], [14018135.72024025, 3948951.692931064], [14018062.43691314, 3948732.2356575206], [14017989.153585482, 3948498.1524819196], [14017915.87025767, 3948278.7037986964], [14017842.586929433, 3948044.6297840844], [14017757.089713955, 3947825.1896894635], [14017696.02027415, 3947591.124836192], [14017622.736946309, 3947371.6933298754], [14017561.667506203, 3947123.0093109384], [14017500.598066194, 3946888.958639055], [14017451.742514167, 3946654.9126930023], [14017390.673074305, 3946420.8714734227], [14017329.603633871, 3946157.5807487303], [14017195.25086627, 3945791.9091077414], [14017109.753650622, 3945557.885310957], [14017036.470322493, 3945338.4922908037], [14016963.186994748, 3945104.477646201], [14016889.903667673, 3944885.0932069574], [14016816.620339664, 3944651.0877120267], [14016743.337011732, 3944431.7118500136], [14016682.267571904, 3944197.715505999], [14016621.198132234, 3943963.7238824223], [14016547.914804032, 3943729.736980628], [14016486.845363976, 3943510.378546276], [14016425.775924595, 3943261.7773378296], [14016364.70648495, 3943042.427753915], [14016315.850932516, 3942793.8365748394], [14016254.781492097, 3942574.4958398864], [14016193.712052723, 3942325.9146883073], [14016157.070389032, 3942106.582801053], [14016108.214837069, 3941858.0116748144], [14016059.359284986, 3941624.0672444003], [14016034.931508765, 3941390.1275309804], [14015986.075957134, 3941141.571752944], [14015937.22040512, 3940922.2622533315], [14015912.792629696, 3940673.7164975833], [14015876.150965236, 3940439.7959427596], [14015839.509301143, 3940191.260519465], [14015802.867637865, 3939971.9689781107], [14015790.653749412, 3939723.443572689], [14015754.012085313, 3939489.542170439], [14015729.584309753, 3939241.0270946748], [14015717.37042194, 3939007.1354134823], [14015680.728758091, 3938758.630664511], [14015668.514870305, 3938510.131235958], [14015656.300982298, 3938290.871450771], [14015644.087093962, 3938042.3820334156], [14015644.087093726, 3937808.5144985737], [14015619.659317939, 3937560.035403366], [14015619.659317758, 3937326.1775837634], [14015607.445430323, 3937077.708810477], [14015607.445430165, 3936843.86070301], [14015607.44543046, 3936610.017304439], [14015607.445429819, 3936361.563852228], [14015619.659318132, 3936127.730163849], [14015619.65931785, 3935879.2870282456], [14015619.659318443, 3935864.672891335], [14015644.087093579, 3935645.4630488665], [14015644.087094065, 3935397.030227534], [14015656.300982174, 3935163.215955291], [14015668.51486993, 3934914.7934458014], [14015680.728758322, 3934680.988878479], [14015717.370422252, 3934432.576680492], [14015729.584309453, 3934213.393858009], [14015754.012085263, 3933964.991656822], [14015790.653749326, 3933716.5947647644], [14015802.867637549, 3933482.8143059933], [14015839.509301828, 3933234.4277204718], [14015863.937076895, 3933000.6569608934], [14015912.79262938, 3932766.8909023306], [14015937.220405562, 3932533.1295448784], [14015986.075957565, 3932299.3728892817], [14016010.503732808, 3932065.6209347122], [14016059.359284472, 3931817.2646324276], [14016108.214836905, 3931583.522371577], [14016169.284276439, 3931349.784810239], [14016205.925940333, 3931101.4438007176], [14016254.781492744, 3930882.3237842843], [14016315.85093268, 3930633.992758967], [14016364.706484258, 3930414.8815508736], [14016425.77592429, 3930166.5605083024], [14016486.845364062, 3929947.45810794], [14016547.91480444, 3929699.147045077], [14016621.19813177, 3929480.053450812], [14016682.267571429, 3929246.358166313], [14016743.337011438, 3929012.667577336], [14016816.620339306, 3928793.5869140886], [14016889.903667254, 3928545.3004834815], [14016963.186994506, 3928326.228621952], [14017036.470323365, 3928092.5565168317], [14017109.753650554, 3927873.4931812496], [14017195.250866888, 3927639.8301689653], [14017280.748082507, 3927420.7753582653], [14017354.031410536, 3927187.121436862], [14017451.742513658, 3926982.678106103], [14017537.239729747, 3926749.032981505], [14017622.736945918, 3926529.994938449], [14017720.448049057, 3926296.358903417], [14017818.159152932, 3926091.931220958], [14017915.870256566, 3925858.3039796036], [14018001.367473086, 3925653.883990288], [14018111.292464526, 3925420.265541152], [14018209.00356852, 3925201.2525036694], [14018318.928560698, 3924996.844052992], [14018428.853552194, 3924763.2387903365], [14018526.564656094, 3924558.8380302167], [14018636.489648066, 3924339.8411988374], [14018758.628527954, 3924135.447872815], [14018856.339631462, 3923916.459004483], [14018978.478511153, 3923712.073110865], [14019100.617391173, 3923493.0922050807], [14019222.756270064, 3923288.713741847], [14019344.895150287, 3923084.338865018], [14019467.034029689, 3922879.9675755682], [14019589.172909968, 3922661.0023167264], [14019723.525677131, 3922471.235755037], [14019833.45066894, 3922252.2781808744], [14019980.017325126, 3922062.518278752], [14020114.370092534, 3921872.7614680813], [14020383.075627644, 3921464.06499056], [14020651.781162936, 3921084.5739564244], [14020932.700586699, 3920690.500196857], [14021225.833898693, 3920311.0343588293], [14021518.967209466, 3919931.5808786163], [14021824.31440906, 3919552.1397545147], [14021873.169961166, 3919508.3588811457], [14022007.522728465, 3919158.1178169283], [14022117.447720494, 3918939.222496516], [14022215.158823974, 3918720.331287067], [14022312.869927935, 3918486.851860621], [14022410.581031999, 3918282.5611990155], [14022520.506023144, 3918063.6823207946], [14022618.217127495, 3917844.807552783], [14022728.14211945, 3917625.9368926086], [14022838.06711147, 3917421.6613179413], [14022960.205990292, 3917202.798602683], [14023057.917095127, 3916998.53044049], [14023180.055974487, 3916779.675667872], [14023302.194854327, 3916575.4149192595], [14023424.333733585, 3916356.568086974], [14023546.472613554, 3916166.904154751], [14023656.397605527, 3915948.0649889517], [14023778.536485165, 3915758.4077001447], [14023912.889252687, 3915539.5761998114], [14024035.028132746, 3915335.337170445], [14024303.73366759, 3914941.4577129018], [14024560.225315124, 3914547.5915551204], [14024853.358626463, 3914153.7386953356], [14025134.278049812, 3913759.899131608], [14025427.411361214, 3913380.6587829227], [14025720.544672925, 3913001.430759424], [14026025.891872894, 3912636.800052067], [14026272.928939708, 3912341.809856742], [14026312.49388151, 3787395.72363925], [13961312.04903013, 3787395.72363925], [13947389.033479117, 3802232.5362782693], [13947389.033479117, 3802232.5362782683], [13822584.485054709, 3935228.2723445967], [13818455.7465445, 3939627.988734125], [13804540.810181938, 4028802.026185181], [13817531.794596978, 4163881.1427735523], [13817531.794716273, 4163881.144013962], [13817590.293393573, 4163976.6556012244]]]]}}]} \ No newline at end of file diff --git a/prediction/data/zones/특정어업수역Ⅳ.json b/prediction/data/zones/특정어업수역Ⅳ.json new file mode 100644 index 0000000..1ce6f88 --- /dev/null +++ b/prediction/data/zones/특정어업수역Ⅳ.json @@ -0,0 +1 @@ +{"type": "FeatureCollection", "name": "\ud2b9\uc815\uc5b4\uc5c5\uc218\uc5ed4", "crs": {"type": "name", "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}}, "features": [{"type": "Feature", "properties": {"fid": 0, "GML_ID": null, "OBJECTID": null, "ZONE_NM": null, "MNCT_NO": null, "MNCT_SCALE": null, "MNCT_NM": null, "RELREGLTN": null, "RELGOAG": null, "REVIYR": null, "ZONE_DESC": null, "PHOTO1_PAT": null, "ID": -2147483647, "CATE_CD": null, "ADR_CD": null, "ADR_KNM": null, "ORIGIN": null, "ORIYR": null, "ORIORG": null, "NAME": "\ud2b9\uc815\uc5b4\uc5c5\uc218\uc5ed\u2163", "WARD_NM": null, "WARD_ID": null, "GISID": null, "FID_2": null, "NAME_2": null, "FID_3": null, "NAME_3": null, "GID": null, "NAME_4": null, "FID_4": null, "NAME_5": null, "FID_5": null, "NAME_6": null}, "geometry": {"type": "MultiPolygon", "coordinates": [[[[13859276.603817873, 4232038.462456921], [13859276.603762543, 4321218.244482412], [13859276.603710985, 4404317.064005076], [13840719.645028654, 4439106.786523586], [13884632.712472571, 4439106.787250583], [13884632.712472571, 4439504.084564682], [13940418.269436067, 4439504.375880923], [13969123.924724836, 4439504.525783945], [13968718.329494288, 4438626.439593866], [13962623.599395147, 4425543.915710401], [13960437.31344761, 4420657.3891166765], [13958238.813611617, 4416093.569832627], [13958143.094601436, 4415900.994484875], [13958143.094601437, 4415900.994484875], [13957298.344237303, 4414201.456484755], [13953878.455604602, 4406316.186534493], [13949652.450365951, 4397019.979821594], [13948553.200448176, 4393395.13065616], [13947612.731073817, 4389132.176741289], [13947612.731072996, 4387549.226905922], [13947466.164417507, 4385829.556682826], [13947783.725505754, 4381721.729468383], [13948260.06713652, 4379835.70012994], [13949359.317054221, 4375897.403884492], [13951093.689146286, 4371808.582233328], [13954867.780530114, 4365670.678186072], [13964809.885341855, 4351190.629491161], [13978342.873219142, 4331838.456925102], [13980382.592510404, 4329007.496874151], [13981728.043604897, 4327079.749205159], [13985775.34591557, 4321280.81855131], [13997066.763484716, 4305102.598482491], [13999424.043863578, 4300225.286038025], [14003039.354703771, 4290447.064438686], [14005091.287883686, 4284626.561498255], [14006520.312777169, 4279426.932176922], [14007631.77658257, 4275178.643476352], [14008242.470981453, 4271549.325573796], [14009378.362562515, 4262248.123573576], [14009427.990871342, 4261704.85208626], [14009708.137538105, 4258638.140769343], [14009854.704193696, 4257224.555715567], [14009378.362562606, 4254698.603440943], [14005347.779531531, 4240996.452433007], [14002367.590864772, 4231511.1380338315], [14001280.554835469, 4227266.412716273], [14000486.652116666, 4225212.134400094], [13998047.81589918, 4222926.459154359], [13991387.305576058, 4216684.234498038], [13970721.407121927, 4197120.494488488], [13958654.085803084, 4185745.4565721145], [13956602.15262321, 4184012.5742896623], [13944065.033685392, 4171984.566055202], [13940467.606607554, 4168533.224265296], [13935619.01320107, 4163881.1438622964], [13935718.55954324, 4163976.6556012244], [13817590.293393573, 4163976.6556012244], [13859276.603817873, 4232038.462456921]]]]}}]} \ No newline at end of file diff --git a/prediction/db/__init__.py b/prediction/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prediction/db/kcgdb.py b/prediction/db/kcgdb.py new file mode 100644 index 0000000..6654744 --- /dev/null +++ b/prediction/db/kcgdb.py @@ -0,0 +1,330 @@ +import json +import logging +from contextlib import contextmanager +from typing import TYPE_CHECKING, Optional + +import psycopg2 +from psycopg2 import pool +from psycopg2.extras import execute_values + +from config import qualified_table, settings + +if TYPE_CHECKING: + from models.result import AnalysisResult + +logger = logging.getLogger(__name__) + +_pool: Optional[pool.ThreadedConnectionPool] = None +GROUP_POLYGON_SNAPSHOTS = qualified_table('group_polygon_snapshots') + + +def init_pool(): + global _pool + _pool = pool.ThreadedConnectionPool( + minconn=1, + maxconn=5, + host=settings.KCGDB_HOST, + port=settings.KCGDB_PORT, + dbname=settings.KCGDB_NAME, + user=settings.KCGDB_USER, + password=settings.KCGDB_PASSWORD, + options=f'-c search_path={settings.KCGDB_SCHEMA},public', + ) + logger.info('kcgdb connection pool initialized') + + +def close_pool(): + global _pool + if _pool: + _pool.closeall() + _pool = None + logger.info('kcgdb connection pool closed') + + +@contextmanager +def get_conn(): + conn = _pool.getconn() + try: + yield conn + except Exception: + conn.rollback() + raise + finally: + _pool.putconn(conn) + + +def check_health() -> bool: + try: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute('SELECT 1') + return True + except Exception as e: + logger.error('kcgdb health check failed: %s', e) + return False + + +def upsert_results(results: list['AnalysisResult']) -> int: + """분석 결과를 vessel_analysis_results 테이블에 upsert.""" + if not results: + return 0 + + insert_sql = """ + INSERT INTO vessel_analysis_results ( + mmsi, timestamp, vessel_type, confidence, fishing_pct, + cluster_id, season, zone, dist_to_baseline_nm, activity_state, + ucaf_score, ucft_score, is_dark, gap_duration_min, + spoofing_score, bd09_offset_m, speed_jump_count, + cluster_size, is_leader, fleet_role, + risk_score, risk_level, + is_transship_suspect, transship_pair_mmsi, transship_duration_min, + features, analyzed_at + ) VALUES %s + ON CONFLICT (mmsi, timestamp) DO UPDATE SET + vessel_type = EXCLUDED.vessel_type, + confidence = EXCLUDED.confidence, + fishing_pct = EXCLUDED.fishing_pct, + cluster_id = EXCLUDED.cluster_id, + season = EXCLUDED.season, + zone = EXCLUDED.zone, + dist_to_baseline_nm = EXCLUDED.dist_to_baseline_nm, + activity_state = EXCLUDED.activity_state, + ucaf_score = EXCLUDED.ucaf_score, + ucft_score = EXCLUDED.ucft_score, + is_dark = EXCLUDED.is_dark, + gap_duration_min = EXCLUDED.gap_duration_min, + spoofing_score = EXCLUDED.spoofing_score, + bd09_offset_m = EXCLUDED.bd09_offset_m, + speed_jump_count = EXCLUDED.speed_jump_count, + cluster_size = EXCLUDED.cluster_size, + is_leader = EXCLUDED.is_leader, + fleet_role = EXCLUDED.fleet_role, + risk_score = EXCLUDED.risk_score, + risk_level = EXCLUDED.risk_level, + is_transship_suspect = EXCLUDED.is_transship_suspect, + transship_pair_mmsi = EXCLUDED.transship_pair_mmsi, + transship_duration_min = EXCLUDED.transship_duration_min, + features = EXCLUDED.features, + analyzed_at = EXCLUDED.analyzed_at + """ + + try: + with get_conn() as conn: + with conn.cursor() as cur: + tuples = [r.to_db_tuple() for r in results] + execute_values(cur, insert_sql, tuples, page_size=100) + conn.commit() + count = len(tuples) + logger.info('upserted %d analysis results', count) + return count + except Exception as e: + logger.error('failed to upsert results: %s', e) + return 0 + + +def cleanup_old(hours: int = 48) -> int: + """오래된 분석 결과 삭제.""" + try: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + 'DELETE FROM vessel_analysis_results WHERE analyzed_at < NOW() - (%s * INTERVAL \'1 hour\')', + (hours,), + ) + deleted = cur.rowcount + conn.commit() + if deleted > 0: + logger.info('cleaned up %d old results (older than %dh)', deleted, hours) + return deleted + except Exception as e: + logger.error('failed to cleanup old results: %s', e) + return 0 + + +def save_group_snapshots(snapshots: list[dict]) -> int: + """group_polygon_snapshots에 폴리곤 스냅샷 배치 INSERT. + + snapshots: polygon_builder.build_all_group_snapshots() 결과 + 각 항목은: group_type, group_key, group_label, snapshot_time, + polygon_wkt (str|None), center_wkt (str|None), + area_sq_nm, member_count, zone_id, zone_name, + members (list[dict]), color + """ + if not snapshots: + return 0 + + insert_sql = f""" + INSERT INTO {GROUP_POLYGON_SNAPSHOTS} ( + group_type, group_key, group_label, sub_cluster_id, resolution, snapshot_time, + polygon, center_point, area_sq_nm, member_count, + zone_id, zone_name, members, color + ) VALUES ( + %s, %s, %s, %s, %s, %s, + ST_GeomFromText(%s, 4326), ST_GeomFromText(%s, 4326), + %s, %s, %s, %s, %s::jsonb, %s + ) + """ + + inserted = 0 + try: + with get_conn() as conn: + with conn.cursor() as cur: + for s in snapshots: + cur.execute( + insert_sql, + ( + s['group_type'], + s['group_key'], + s['group_label'], + s.get('sub_cluster_id', 0), + s.get('resolution', '6h'), + s['snapshot_time'], + s.get('polygon_wkt'), + s.get('center_wkt'), + s.get('area_sq_nm'), + s.get('member_count'), + s.get('zone_id'), + s.get('zone_name'), + json.dumps(s.get('members', []), ensure_ascii=False), + s.get('color'), + ), + ) + inserted += 1 + conn.commit() + logger.info('saved %d group polygon snapshots', inserted) + return inserted + except Exception as e: + logger.error('failed to save group snapshots: %s', e) + return 0 + + +def fetch_analysis_summary() -> dict: + """최근 1시간 분석 결과 요약 (채팅 컨텍스트용).""" + try: + with get_conn() as conn: + with conn.cursor() as cur: + # 위험도 분포 + cur.execute(""" + SELECT risk_level, COUNT(*) FROM vessel_analysis_results + WHERE analyzed_at > NOW() - INTERVAL '1 hour' + GROUP BY risk_level + """) + risk_dist = {row[0]: row[1] for row in cur.fetchall()} + + # 수역별 분포 + cur.execute(""" + SELECT zone, COUNT(*) FROM vessel_analysis_results + WHERE analyzed_at > NOW() - INTERVAL '1 hour' + GROUP BY zone + """) + zone_dist = {row[0]: row[1] for row in cur.fetchall()} + + # 다크/스푸핑/환적 카운트 + cur.execute(""" + SELECT + COUNT(*) FILTER (WHERE is_dark = TRUE) AS dark_count, + COUNT(*) FILTER (WHERE spoofing_score > 0.5) AS spoofing_count, + COUNT(*) FILTER (WHERE is_transship_suspect = TRUE) AS transship_count + FROM vessel_analysis_results + WHERE analyzed_at > NOW() - INTERVAL '1 hour' + """) + row = cur.fetchone() + + result = { + 'risk_distribution': {**risk_dist, **zone_dist}, + 'dark_count': row[0] if row else 0, + 'spoofing_count': row[1] if row else 0, + 'transship_count': row[2] if row else 0, + } + return result + except Exception as e: + logger.error('fetch_analysis_summary failed: %s', e) + return {'risk_distribution': {}, 'dark_count': 0, 'spoofing_count': 0, 'transship_count': 0} + + +def fetch_recent_high_risk(limit: int = 10) -> list[dict]: + """위험도 상위 N척 선박 상세 (채팅 컨텍스트용).""" + try: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute(""" + SELECT mmsi, risk_score, risk_level, zone, is_dark, + is_transship_suspect, activity_state, spoofing_score + FROM vessel_analysis_results + WHERE analyzed_at > NOW() - INTERVAL '1 hour' + ORDER BY risk_score DESC + LIMIT %s + """, (limit,)) + rows = cur.fetchall() + + result = [] + for row in rows: + result.append({ + 'mmsi': row[0], + 'name': row[0], # vessel_store에서 이름 조회 필요시 보강 + 'risk_score': row[1], + 'risk_level': row[2], + 'zone': row[3], + 'is_dark': row[4], + 'is_transship': row[5], + 'activity_state': row[6], + 'spoofing_score': float(row[7]) if row[7] else 0.0, + }) + return result + except Exception as e: + logger.error('fetch_recent_high_risk failed: %s', e) + return [] + + +def fetch_polygon_summary() -> dict: + """최신 그룹 폴리곤 요약 (채팅 컨텍스트용).""" + try: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute(f""" + SELECT group_type, COUNT(*), SUM(member_count) + FROM {GROUP_POLYGON_SNAPSHOTS} + WHERE snapshot_time = ( + SELECT MAX(snapshot_time) FROM {GROUP_POLYGON_SNAPSHOTS} + ) + GROUP BY group_type + """) + rows = cur.fetchall() + + result = { + 'fleet_count': 0, 'fleet_members': 0, + 'gear_in_zone': 0, 'gear_out_zone': 0, + } + for row in rows: + gtype, count, members = row[0], row[1], row[2] or 0 + if gtype == 'FLEET': + result['fleet_count'] = count + result['fleet_members'] = members + elif gtype == 'GEAR_IN_ZONE': + result['gear_in_zone'] = count + elif gtype == 'GEAR_OUT_ZONE': + result['gear_out_zone'] = count + return result + except Exception as e: + logger.error('fetch_polygon_summary failed: %s', e) + return {'fleet_count': 0, 'fleet_members': 0, 'gear_in_zone': 0, 'gear_out_zone': 0} + + +def cleanup_group_snapshots(days: int = 7) -> int: + """오래된 그룹 폴리곤 스냅샷 삭제.""" + try: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute( + f"DELETE FROM {GROUP_POLYGON_SNAPSHOTS} " + "WHERE snapshot_time < NOW() - (%s * INTERVAL '1 day')", + (days,), + ) + deleted = cur.rowcount + conn.commit() + if deleted > 0: + logger.info('cleaned up %d old group snapshots (older than %dd)', deleted, days) + return deleted + except Exception as e: + logger.error('failed to cleanup group snapshots: %s', e) + return 0 diff --git a/prediction/db/partition_manager.py b/prediction/db/partition_manager.py new file mode 100644 index 0000000..9941229 --- /dev/null +++ b/prediction/db/partition_manager.py @@ -0,0 +1,143 @@ +"""gear_correlation_raw_metrics 파티션 유지보수. + +APScheduler 일별 작업으로 실행: +- system_config에서 설정 읽기 (hot-reload, 프로세스 재시작 불필요) +- 미래 파티션 미리 생성 +- 만료 파티션 DROP +- 미관측 점수 레코드 정리 +""" + +import logging +from datetime import date, datetime, timedelta + +from config import qualified_table, settings + +logger = logging.getLogger(__name__) + +SYSTEM_CONFIG = qualified_table('system_config') +GEAR_CORRELATION_RAW_METRICS = qualified_table('gear_correlation_raw_metrics') +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') + + +def _get_config_int(conn, key: str, default: int) -> int: + """system_config에서 설정값 조회. 없으면 default.""" + cur = conn.cursor() + try: + cur.execute( + f"SELECT value::text FROM {SYSTEM_CONFIG} WHERE key = %s", + (key,), + ) + row = cur.fetchone() + return int(row[0].strip('"')) if row else default + except Exception: + return default + finally: + cur.close() + + +def _create_future_partitions(conn, days_ahead: int) -> int: + """미래 N일 파티션 생성. 반환: 생성된 파티션 수.""" + cur = conn.cursor() + created = 0 + try: + for i in range(days_ahead + 1): + d = date.today() + timedelta(days=i) + partition_name = f'gear_correlation_raw_metrics_{d.strftime("%Y%m%d")}' + cur.execute( + "SELECT 1 FROM pg_class c " + "JOIN pg_namespace n ON n.oid = c.relnamespace " + "WHERE c.relname = %s AND n.nspname = %s", + (partition_name, settings.KCGDB_SCHEMA), + ) + if cur.fetchone() is None: + next_d = d + timedelta(days=1) + cur.execute( + f"CREATE TABLE IF NOT EXISTS {qualified_table(partition_name)} " + f"PARTITION OF {GEAR_CORRELATION_RAW_METRICS} " + f"FOR VALUES FROM ('{d.isoformat()}') TO ('{next_d.isoformat()}')" + ) + created += 1 + logger.info('created partition: %s.%s', settings.KCGDB_SCHEMA, partition_name) + conn.commit() + except Exception as e: + conn.rollback() + logger.error('failed to create partitions: %s', e) + finally: + cur.close() + return created + + +def _drop_expired_partitions(conn, retention_days: int) -> int: + """retention_days 초과 파티션 DROP. 반환: 삭제된 파티션 수.""" + cutoff = date.today() - timedelta(days=retention_days) + cur = conn.cursor() + dropped = 0 + try: + cur.execute( + "SELECT c.relname FROM pg_class c " + "JOIN pg_namespace n ON n.oid = c.relnamespace " + "WHERE c.relname LIKE 'gear_correlation_raw_metrics_%%' " + "AND n.nspname = %s AND c.relkind = 'r'", + (settings.KCGDB_SCHEMA,), + ) + for (name,) in cur.fetchall(): + date_str = name.rsplit('_', 1)[-1] + try: + partition_date = datetime.strptime(date_str, '%Y%m%d').date() + except ValueError: + continue + if partition_date < cutoff: + cur.execute(f'DROP TABLE IF EXISTS {qualified_table(name)}') + dropped += 1 + logger.info('dropped expired partition: %s.%s', settings.KCGDB_SCHEMA, name) + conn.commit() + except Exception as e: + conn.rollback() + logger.error('failed to drop partitions: %s', e) + finally: + cur.close() + return dropped + + +def _cleanup_stale_scores(conn, cleanup_days: int) -> int: + """cleanup_days 이상 미관측 점수 레코드 삭제.""" + cur = conn.cursor() + try: + cur.execute( + f"DELETE FROM {GEAR_CORRELATION_SCORES} " + "WHERE last_observed_at < NOW() - make_interval(days => %s)", + (cleanup_days,), + ) + deleted = cur.rowcount + conn.commit() + return deleted + except Exception as e: + conn.rollback() + logger.error('failed to cleanup stale scores: %s', e) + return 0 + finally: + cur.close() + + +def maintain_partitions(): + """일별 파티션 유지보수 — 스케줄러에서 호출. + + system_config에서 설정을 매번 읽으므로 + API를 통한 설정 변경이 다음 실행 시 즉시 반영됨. + """ + from db import kcgdb + + with kcgdb.get_conn() as conn: + retention = _get_config_int(conn, 'partition.raw_metrics.retention_days', 7) + ahead = _get_config_int(conn, 'partition.raw_metrics.create_ahead_days', 3) + cleanup_days = _get_config_int(conn, 'partition.scores.cleanup_days', 30) + + created = _create_future_partitions(conn, ahead) + dropped = _drop_expired_partitions(conn, retention) + cleaned = _cleanup_stale_scores(conn, cleanup_days) + + logger.info( + 'partition maintenance: %d created, %d dropped, %d stale scores cleaned ' + '(retention=%dd, ahead=%dd, cleanup=%dd)', + created, dropped, cleaned, retention, ahead, cleanup_days, + ) diff --git a/prediction/db/snpdb.py b/prediction/db/snpdb.py new file mode 100644 index 0000000..8b46df5 --- /dev/null +++ b/prediction/db/snpdb.py @@ -0,0 +1,210 @@ +import logging +from contextlib import contextmanager +from datetime import datetime +from typing import Optional + +import pandas as pd +import psycopg2 +from psycopg2 import pool + +from config import settings +from time_bucket import compute_incremental_window_start, compute_initial_window_start, compute_safe_bucket + +logger = logging.getLogger(__name__) + +_pool: Optional[pool.ThreadedConnectionPool] = None + + +def init_pool(): + global _pool + _pool = pool.ThreadedConnectionPool( + minconn=1, + maxconn=3, + host=settings.SNPDB_HOST, + port=settings.SNPDB_PORT, + dbname=settings.SNPDB_NAME, + user=settings.SNPDB_USER, + password=settings.SNPDB_PASSWORD, + ) + logger.info('snpdb connection pool initialized') + + +def close_pool(): + global _pool + if _pool: + _pool.closeall() + _pool = None + logger.info('snpdb connection pool closed') + + +@contextmanager +def get_conn(): + conn = _pool.getconn() + try: + yield conn + finally: + _pool.putconn(conn) + + +def check_health() -> bool: + try: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute('SELECT 1') + return True + except Exception as e: + logger.error('snpdb health check failed: %s', e) + return False + + +def fetch_all_tracks(hours: int = 24) -> pd.DataFrame: + """한국 해역 전 선박의 궤적 포인트를 조회한다. + + LineStringM 지오메트리에서 개별 포인트를 추출하며, + 한국 해역(122-132E, 31-39N) 내 최근 N시간 데이터를 반환한다. + """ + safe_bucket = compute_safe_bucket() + window_start = compute_initial_window_start(hours, safe_bucket) + + query = """ + SELECT + t.mmsi, + to_timestamp(ST_M((dp).geom)) as timestamp, + t.time_bucket, + ST_Y((dp).geom) as lat, + ST_X((dp).geom) as lon, + CASE + WHEN (dp).path[1] = 1 THEN (t.start_position->>'sog')::float + ELSE COALESCE((t.end_position->>'sog')::float, t.avg_speed::float) + END as raw_sog + FROM signal.t_vessel_tracks_5min t, + LATERAL ST_DumpPoints(t.track_geom) dp + WHERE t.time_bucket >= %s + AND t.time_bucket <= %s + AND t.track_geom && ST_MakeEnvelope(122, 31, 132, 39, 4326) + ORDER BY t.mmsi, to_timestamp(ST_M((dp).geom)) + """ + + try: + with get_conn() as conn: + df = pd.read_sql_query(query, conn, params=(window_start, safe_bucket)) + logger.info( + 'fetch_all_tracks: %d rows, %d vessels (window=%s..%s, last %dh safe)', + len(df), + df['mmsi'].nunique() if len(df) > 0 else 0, + window_start, + safe_bucket, + hours, + ) + return df + except Exception as e: + logger.error('fetch_all_tracks failed: %s', e) + return pd.DataFrame(columns=['mmsi', 'timestamp', 'lat', 'lon', 'raw_sog']) + + +def fetch_incremental(last_bucket: datetime) -> pd.DataFrame: + """last_bucket 이후의 신규 궤적 포인트를 조회한다. + + 스케줄러 증분 업데이트에 사용되며, time_bucket > last_bucket 조건으로 + 이미 처리한 버킷을 건너뛴다. + """ + safe_bucket = compute_safe_bucket() + from_bucket = compute_incremental_window_start(last_bucket) + if safe_bucket <= from_bucket: + logger.info( + 'fetch_incremental skipped: safe_bucket=%s, from_bucket=%s, last_bucket=%s', + safe_bucket, + from_bucket, + last_bucket, + ) + return pd.DataFrame(columns=['mmsi', 'timestamp', 'lat', 'lon', 'raw_sog']) + + query = """ + SELECT + t.mmsi, + to_timestamp(ST_M((dp).geom)) as timestamp, + t.time_bucket, + ST_Y((dp).geom) as lat, + ST_X((dp).geom) as lon, + CASE + WHEN (dp).path[1] = 1 THEN (t.start_position->>'sog')::float + ELSE COALESCE((t.end_position->>'sog')::float, t.avg_speed::float) + END as raw_sog + FROM signal.t_vessel_tracks_5min t, + LATERAL ST_DumpPoints(t.track_geom) dp + WHERE t.time_bucket > %s + AND t.time_bucket <= %s + AND t.track_geom && ST_MakeEnvelope(122, 31, 132, 39, 4326) + ORDER BY t.mmsi, to_timestamp(ST_M((dp).geom)) + """ + + try: + with get_conn() as conn: + df = pd.read_sql_query(query, conn, params=(from_bucket, safe_bucket)) + logger.info( + 'fetch_incremental: %d rows, %d vessels (from %s, safe %s, last %s)', + len(df), + df['mmsi'].nunique() if len(df) > 0 else 0, + from_bucket.isoformat(), + safe_bucket.isoformat(), + last_bucket.isoformat(), + ) + return df + except Exception as e: + logger.error('fetch_incremental failed: %s', e) + return pd.DataFrame(columns=['mmsi', 'timestamp', 'lat', 'lon', 'raw_sog']) + + +def fetch_static_info(mmsi_list: list[str]) -> dict[str, dict]: + """MMSI 목록에 해당하는 선박 정적 정보를 조회한다. + + DISTINCT ON (mmsi)로 최신 레코드만 반환한다. + """ + query = """ + SELECT DISTINCT ON (mmsi) mmsi, name, vessel_type, length, width + FROM signal.t_vessel_static + WHERE mmsi = ANY(%s) + ORDER BY mmsi, time_bucket DESC + """ + + try: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute(query, (mmsi_list,)) + rows = cur.fetchall() + result = { + row[0]: { + 'name': row[1], + 'vessel_type': row[2], + 'length': row[3], + 'width': row[4], + } + for row in rows + } + logger.info('fetch_static_info: %d vessels resolved', len(result)) + return result + except Exception as e: + logger.error('fetch_static_info failed: %s', e) + return {} + + +def fetch_permit_mmsis() -> set[str]: + """중국 허가어선 MMSI 목록을 조회한다. + + signal.t_chnprmship_positions 테이블에서 DISTINCT mmsi를 반환한다. + """ + query = """ + SELECT DISTINCT mmsi FROM signal.t_chnprmship_positions + """ + + try: + with get_conn() as conn: + with conn.cursor() as cur: + cur.execute(query) + rows = cur.fetchall() + result = {row[0] for row in rows} + logger.info('fetch_permit_mmsis: %d permitted vessels', len(result)) + return result + except Exception as e: + logger.error('fetch_permit_mmsis failed: %s', e) + return set() diff --git a/prediction/env.example b/prediction/env.example new file mode 100644 index 0000000..5a915ed --- /dev/null +++ b/prediction/env.example @@ -0,0 +1,34 @@ +# snpdb (궤적 데이터 소스) +SNPDB_HOST=211.208.115.83 +SNPDB_PORT=5432 +SNPDB_NAME=snpdb +SNPDB_USER=snp +SNPDB_PASSWORD=snp#8932 + +# kcgdb (분석 결과 저장) +KCGDB_HOST=211.208.115.83 +KCGDB_PORT=5432 +KCGDB_NAME=kcgdb +KCGDB_SCHEMA=kcg +KCGDB_USER=kcg_app +KCGDB_PASSWORD=Kcg2026monitor + +# 스케줄러 +SCHEDULER_INTERVAL_MIN=5 + +# 파이프라인 +TRAJECTORY_HOURS=6 +MMSI_PREFIX=412 +MIN_TRAJ_POINTS=100 + +# Ollama (LLM) +OLLAMA_BASE_URL=http://localhost:11434 +OLLAMA_MODEL=qwen3:32b + +# Redis +REDIS_HOST=localhost +REDIS_PORT=6379 +REDIS_PASSWORD= + +# 로깅 +LOG_LEVEL=INFO diff --git a/prediction/fleet_tracker.py b/prediction/fleet_tracker.py new file mode 100644 index 0000000..ba4f959 --- /dev/null +++ b/prediction/fleet_tracker.py @@ -0,0 +1,370 @@ +"""등록 선단 기반 추적기.""" +import logging +import re +import time +from datetime import datetime, timezone +from typing import Optional + +import pandas as pd + +from algorithms.gear_name_rules import is_trackable_parent_name +from config import qualified_table + +logger = logging.getLogger(__name__) + +# 어구 이름 패턴 — 공백/영숫자 인덱스/끝_ 허용 +GEAR_PATTERN = re.compile(r'^(.+?)_(?=\S*\d)\S+(?:[_ ]\S*)*[_ ]*$|^(\d+)$') +GEAR_PATTERN_PCT = re.compile(r'^(.+?)%$') + +_REGISTRY_CACHE_SEC = 3600 +FLEET_COMPANIES = qualified_table('fleet_companies') +FLEET_VESSELS = qualified_table('fleet_vessels') +GEAR_IDENTITY_LOG = qualified_table('gear_identity_log') +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +FLEET_TRACKING_SNAPSHOT = qualified_table('fleet_tracking_snapshot') + + +class FleetTracker: + def __init__(self) -> None: + self._companies: dict[int, dict] = {} # id → {name_cn, name_en} + self._vessels: dict[int, dict] = {} # id → {permit_no, name_cn, ...} + self._name_cn_map: dict[str, int] = {} # name_cn → vessel_id + self._name_en_map: dict[str, int] = {} # name_en(lowercase) → vessel_id + self._mmsi_to_vid: dict[str, int] = {} # mmsi → vessel_id (매칭된 것만) + self._gear_active: dict[str, dict] = {} # mmsi → {name, parent_mmsi, ...} + self._last_registry_load: float = 0.0 + + def load_registry(self, conn) -> None: + """DB에서 fleet_companies + fleet_vessels 로드. 1시간 캐시.""" + if time.time() - self._last_registry_load < _REGISTRY_CACHE_SEC: + return + + cur = conn.cursor() + cur.execute(f'SELECT id, name_cn, name_en FROM {FLEET_COMPANIES}') + self._companies = {r[0]: {'name_cn': r[1], 'name_en': r[2]} for r in cur.fetchall()} + + cur.execute( + f"""SELECT id, company_id, permit_no, name_cn, name_en, tonnage, + gear_code, fleet_role, pair_vessel_id, mmsi + FROM {FLEET_VESSELS}""" + ) + self._vessels = {} + self._name_cn_map = {} + self._name_en_map = {} + self._mmsi_to_vid = {} + + for r in cur.fetchall(): + vid = r[0] + v: dict = { + 'id': vid, + 'company_id': r[1], + 'permit_no': r[2], + 'name_cn': r[3], + 'name_en': r[4], + 'tonnage': r[5], + 'gear_code': r[6], + 'fleet_role': r[7], + 'pair_vessel_id': r[8], + 'mmsi': r[9], + } + self._vessels[vid] = v + if r[3]: + self._name_cn_map[r[3]] = vid + if r[4]: + self._name_en_map[r[4].lower().strip()] = vid + if r[9]: + self._mmsi_to_vid[r[9]] = vid + + cur.close() + self._last_registry_load = time.time() + logger.info( + 'fleet registry loaded: %d companies, %d vessels', + len(self._companies), + len(self._vessels), + ) + + def match_ais_to_registry(self, ais_vessels: list[dict], conn) -> None: + """AIS 선박을 등록 선단에 매칭. DB 업데이트. + + ais_vessels: [{mmsi, name, lat, lon, sog, cog}, ...] + """ + cur = conn.cursor() + matched = 0 + + for v in ais_vessels: + mmsi = v.get('mmsi', '') + name = v.get('name', '') + if not mmsi or not name: + continue + + # 이미 매칭됨 → last_seen_at 업데이트 + if mmsi in self._mmsi_to_vid: + cur.execute( + f'UPDATE {FLEET_VESSELS} SET last_seen_at = NOW() WHERE id = %s', + (self._mmsi_to_vid[mmsi],), + ) + continue + + # NAME_EXACT 매칭 + vid: Optional[int] = self._name_cn_map.get(name) + if not vid: + vid = self._name_en_map.get(name.lower().strip()) + + if vid: + cur.execute( + f"""UPDATE {FLEET_VESSELS} + SET mmsi = %s, match_confidence = 0.95, match_method = 'NAME_EXACT', + last_seen_at = NOW(), updated_at = NOW() + WHERE id = %s AND (mmsi IS NULL OR mmsi = %s)""", + (mmsi, vid, mmsi), + ) + self._mmsi_to_vid[mmsi] = vid + matched += 1 + + conn.commit() + cur.close() + if matched > 0: + logger.info('AIS→registry matched: %d vessels', matched) + + def track_gear_identity(self, gear_signals: list[dict], conn) -> None: + """어구/어망 정체성 추적. + + gear_signals: [{mmsi, name, lat, lon}, ...] — 이름이 XXX_숫자_숫자 패턴인 AIS 신호 + """ + cur = conn.cursor() + now = datetime.now(timezone.utc) + + for g in gear_signals: + mmsi = g['mmsi'] + name = g['name'] + lat = g.get('lat', 0) + lon = g.get('lon', 0) + + # 모선명 + 인덱스 추출 + parent_name: Optional[str] = None + idx1: Optional[int] = None + idx2: Optional[int] = None + + m = GEAR_PATTERN.match(name) + if m: + # group(1): parent+index 패턴, group(2): 순수 숫자 패턴 + if m.group(1): + parent_name = m.group(1).strip() + suffix = name[m.end(1):].strip(' _') + digits = re.findall(r'\d+', suffix) + idx1 = int(digits[0]) if len(digits) >= 1 else None + idx2 = int(digits[1]) if len(digits) >= 2 else None + else: + # 순수 숫자 이름 (예: 12345) — parent 없음, 인덱스만 + idx1 = int(m.group(2)) + else: + m2 = GEAR_PATTERN_PCT.match(name) + if m2: + parent_name = m2.group(1).strip() + + effective_parent_name = parent_name or name + if not is_trackable_parent_name(effective_parent_name): + continue + + # 모선 매칭 + parent_mmsi: Optional[str] = None + parent_vid: Optional[int] = None + if parent_name: + vid = self._name_cn_map.get(parent_name) + if not vid: + vid = self._name_en_map.get(parent_name.lower()) + if vid: + parent_vid = vid + parent_mmsi = self._vessels[vid].get('mmsi') + + match_method: Optional[str] = 'NAME_PARENT' if parent_vid else None + confidence = 0.9 if parent_vid else 0.0 + + # 기존 활성 행 조회 + cur.execute( + f"""SELECT id, name FROM {GEAR_IDENTITY_LOG} + WHERE mmsi = %s AND is_active = TRUE""", + (mmsi,), + ) + existing = cur.fetchone() + + if existing: + if existing[1] == name: + # 같은 MMSI + 같은 이름 → 위치/시간 업데이트 + cur.execute( + f"""UPDATE {GEAR_IDENTITY_LOG} + SET last_seen_at = %s, lat = %s, lon = %s + WHERE id = %s""", + (now, lat, lon, existing[0]), + ) + else: + # 같은 MMSI + 다른 이름 → 이전 비활성화 + 새 행 + cur.execute( + f'UPDATE {GEAR_IDENTITY_LOG} SET is_active = FALSE WHERE id = %s', + (existing[0],), + ) + cur.execute( + f"""INSERT INTO {GEAR_IDENTITY_LOG} + (mmsi, name, parent_name, parent_mmsi, parent_vessel_id, + gear_index_1, gear_index_2, lat, lon, + match_method, match_confidence, first_seen_at, last_seen_at) + VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""", + (mmsi, name, parent_name, parent_mmsi, parent_vid, + idx1, idx2, lat, lon, + match_method, confidence, now, now), + ) + else: + # 새 MMSI → 같은 이름이 다른 MMSI로 있는지 확인 + cur.execute( + f"""SELECT id, mmsi FROM {GEAR_IDENTITY_LOG} + WHERE name = %s AND is_active = TRUE AND mmsi != %s""", + (name, mmsi), + ) + old_mmsi_row = cur.fetchone() + if old_mmsi_row: + # 같은 이름 + 다른 MMSI → MMSI 변경 + cur.execute( + f'UPDATE {GEAR_IDENTITY_LOG} SET is_active = FALSE WHERE id = %s', + (old_mmsi_row[0],), + ) + logger.info('gear MMSI change: %s → %s (name=%s)', old_mmsi_row[1], mmsi, name) + + # 어피니티 점수 이전 (이전 MMSI → 새 MMSI) + try: + cur.execute( + f"UPDATE {GEAR_CORRELATION_SCORES} " + "SET target_mmsi = %s, updated_at = NOW() " + "WHERE target_mmsi = %s", + (mmsi, old_mmsi_row[1]), + ) + if cur.rowcount > 0: + logger.info( + 'transferred %d affinity scores: %s → %s', + cur.rowcount, old_mmsi_row[1], mmsi, + ) + except Exception as e: + logger.warning('affinity score transfer failed: %s', e) + + cur.execute( + f"""INSERT INTO {GEAR_IDENTITY_LOG} + (mmsi, name, parent_name, parent_mmsi, parent_vessel_id, + gear_index_1, gear_index_2, lat, lon, + match_method, match_confidence, first_seen_at, last_seen_at) + VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""", + (mmsi, name, parent_name, parent_mmsi, parent_vid, + idx1, idx2, lat, lon, + match_method, confidence, now, now), + ) + + conn.commit() + cur.close() + + def build_fleet_clusters(self, vessel_dfs: dict[str, pd.DataFrame]) -> dict[str, dict]: + """등록 선단 기준으로 cluster 정보 구성. + + Returns: {mmsi → {cluster_id, cluster_size, is_leader, fleet_role}} + cluster_id = company_id (등록 선단 기준) + """ + results: dict[str, dict] = {} + + # 회사별로 현재 AIS 수신 중인 선박 그룹핑 + company_vessels: dict[int, list[str]] = {} + for mmsi, vid in self._mmsi_to_vid.items(): + v = self._vessels.get(vid) + if not v or mmsi not in vessel_dfs: + continue + cid = v['company_id'] + company_vessels.setdefault(cid, []).append(mmsi) + + for cid, mmsis in company_vessels.items(): + if len(mmsis) < 2: + # 단독 선박 → NOISE + for mmsi in mmsis: + v = self._vessels.get(self._mmsi_to_vid.get(mmsi, -1), {}) + results[mmsi] = { + 'cluster_id': -1, + 'cluster_size': 1, + 'is_leader': False, + 'fleet_role': v.get('fleet_role', 'NOISE'), + } + continue + + # 2척 이상 → 등록 선단 클러스터 + for mmsi in mmsis: + vid = self._mmsi_to_vid[mmsi] + v = self._vessels[vid] + results[mmsi] = { + 'cluster_id': cid, + 'cluster_size': len(mmsis), + 'is_leader': v['fleet_role'] == 'MAIN', + 'fleet_role': v['fleet_role'], + } + + # 매칭 안 된 선박 → NOISE + for mmsi in vessel_dfs: + if mmsi not in results: + results[mmsi] = { + 'cluster_id': -1, + 'cluster_size': 0, + 'is_leader': False, + 'fleet_role': 'NOISE', + } + + return results + + def save_snapshot(self, vessel_dfs: dict[str, pd.DataFrame], conn) -> None: + """fleet_tracking_snapshot 저장.""" + now = datetime.now(timezone.utc) + cur = conn.cursor() + + company_vessels: dict[int, list[str]] = {} + for mmsi, vid in self._mmsi_to_vid.items(): + v = self._vessels.get(vid) + if not v or mmsi not in vessel_dfs: + continue + company_vessels.setdefault(v['company_id'], []).append(mmsi) + + for cid, mmsis in company_vessels.items(): + active = len(mmsis) + total = sum(1 for v in self._vessels.values() if v['company_id'] == cid) + + lats: list[float] = [] + lons: list[float] = [] + for mmsi in mmsis: + df = vessel_dfs.get(mmsi) + if df is not None and len(df) > 0: + last = df.iloc[-1] + lats.append(float(last['lat'])) + lons.append(float(last['lon'])) + + center_lat = sum(lats) / len(lats) if lats else None + center_lon = sum(lons) / len(lons) if lons else None + + cur.execute( + f"""INSERT INTO {FLEET_TRACKING_SNAPSHOT} + (company_id, snapshot_time, total_vessels, active_vessels, + center_lat, center_lon) + VALUES (%s, %s, %s, %s, %s, %s)""", + (cid, now, total, active, center_lat, center_lon), + ) + + conn.commit() + cur.close() + logger.info('fleet snapshot saved: %d companies', len(company_vessels)) + + def get_company_vessels(self, vessel_dfs: dict[str, 'pd.DataFrame']) -> dict[int, list[str]]: + """현재 AIS 수신 중인 등록 선단의 회사별 MMSI 목록 반환. + + Returns: {company_id: [mmsi, ...]} + """ + result: dict[int, list[str]] = {} + for mmsi, vid in self._mmsi_to_vid.items(): + v = self._vessels.get(vid) + if not v or mmsi not in vessel_dfs: + continue + result.setdefault(v['company_id'], []).append(mmsi) + return result + + +# 싱글턴 +fleet_tracker = FleetTracker() diff --git a/prediction/main.py b/prediction/main.py new file mode 100644 index 0000000..e16283a --- /dev/null +++ b/prediction/main.py @@ -0,0 +1,159 @@ +import logging +import sys +from contextlib import asynccontextmanager + +from fastapi import BackgroundTasks, FastAPI + +from config import qualified_table, settings +from db import kcgdb, snpdb +from scheduler import get_last_run, run_analysis_cycle, start_scheduler, stop_scheduler + +logging.basicConfig( + level=getattr(logging, settings.LOG_LEVEL, logging.INFO), + format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', + stream=sys.stdout, +) +logger = logging.getLogger(__name__) +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +CORRELATION_PARAM_MODELS = qualified_table('correlation_param_models') + + +@asynccontextmanager +async def lifespan(application: FastAPI): + from cache.vessel_store import vessel_store + + logger.info('starting KCG Prediction Service') + snpdb.init_pool() + kcgdb.init_pool() + + # 인메모리 캐시 초기 로드 (24시간) + logger.info('loading initial vessel data (%dh)...', settings.INITIAL_LOAD_HOURS) + vessel_store.load_initial(settings.INITIAL_LOAD_HOURS) + logger.info('initial load complete: %s', vessel_store.stats()) + + start_scheduler() + yield + stop_scheduler() + snpdb.close_pool() + kcgdb.close_pool() + logger.info('KCG Prediction Service stopped') + + +app = FastAPI( + title='KCG Prediction Service', + version='2.1.0', + lifespan=lifespan, +) + +# AI 해양분석 채팅 라우터 +from chat.router import router as chat_router +app.include_router(chat_router) + + +@app.get('/health') +def health_check(): + from cache.vessel_store import vessel_store + return { + 'status': 'ok', + 'snpdb': snpdb.check_health(), + 'kcgdb': kcgdb.check_health(), + 'store': vessel_store.stats(), + } + + +@app.get('/api/v1/analysis/status') +def analysis_status(): + return get_last_run() + + +@app.post('/api/v1/analysis/trigger') +def trigger_analysis(background_tasks: BackgroundTasks): + background_tasks.add_task(run_analysis_cycle) + return {'message': 'analysis cycle triggered'} + + +@app.get('/api/v1/correlation/{group_key:path}/tracks') +def get_correlation_tracks( + group_key: str, + hours: int = 24, + min_score: float = 0.3, +): + """Return correlated vessels with their track history for map rendering. + + Queries gear_correlation_scores (ALL active models) and enriches with + 24h track data from in-memory vessel_store. + Each vessel includes which models detected it. + """ + from cache.vessel_store import vessel_store + + try: + with kcgdb.get_conn() as conn: + cur = conn.cursor() + + # Get correlated vessels from ALL active models + cur.execute(f""" + SELECT s.target_mmsi, s.target_type, s.target_name, + s.current_score, m.name AS model_name + FROM {GEAR_CORRELATION_SCORES} s + JOIN {CORRELATION_PARAM_MODELS} m ON s.model_id = m.id + WHERE s.group_key = %s + AND s.current_score >= %s + AND m.is_active = TRUE + ORDER BY s.current_score DESC + """, (group_key, min_score)) + + rows = cur.fetchall() + cur.close() + + logger.info('correlation tracks: group_key=%r, min_score=%s, rows=%d', + group_key, min_score, len(rows)) + + if not rows: + return {'groupKey': group_key, 'vessels': []} + + # Group by MMSI: collect all models per vessel, keep highest score + vessel_map: dict[str, dict] = {} + for row in rows: + mmsi = row[0] + model_name = row[4] + score = float(row[3]) + if mmsi not in vessel_map: + vessel_map[mmsi] = { + 'mmsi': mmsi, + 'type': row[1], + 'name': row[2] or '', + 'score': score, + 'models': {model_name: score}, + } + else: + entry = vessel_map[mmsi] + entry['models'][model_name] = score + if score > entry['score']: + entry['score'] = score + + mmsis = list(vessel_map.keys()) + + # Get tracks from vessel_store + tracks = vessel_store.get_vessel_tracks(mmsis, hours) + with_tracks = sum(1 for m in mmsis if m in tracks and len(tracks[m]) > 0) + logger.info('correlation tracks: %d unique mmsis, %d with track data, vessel_store._tracks has %d entries', + len(mmsis), with_tracks, len(vessel_store._tracks)) + + # Build response + vessels = [] + for info in vessel_map.values(): + track = tracks.get(info['mmsi'], []) + vessels.append({ + 'mmsi': info['mmsi'], + 'name': info['name'], + 'type': info['type'], + 'score': info['score'], + 'models': info['models'], # {modelName: score, ...} + 'track': track, + }) + + return {'groupKey': group_key, 'vessels': vessels} + + except Exception as e: + logger.warning('get_correlation_tracks failed for %s: %s', group_key, e) + return {'groupKey': group_key, 'vessels': []} diff --git a/prediction/models/__init__.py b/prediction/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prediction/models/ais.py b/prediction/models/ais.py new file mode 100644 index 0000000..9effbc4 --- /dev/null +++ b/prediction/models/ais.py @@ -0,0 +1,38 @@ +from dataclasses import dataclass, field +from typing import List, Dict + +import pandas as pd + + +@dataclass +class AISPoint: + mmsi: str + ts: pd.Timestamp + lat: float + lon: float + sog: float + cog: float + state: str = 'UNKNOWN' + + +@dataclass +class VesselTrajectory: + mmsi: str + points: List[AISPoint] = field(default_factory=list) + vessel_type: str = 'UNKNOWN' + cluster_id: int = -1 + season: str = 'UNKNOWN' + fishing_pct: float = 0.0 + features: Dict = field(default_factory=dict) + + +@dataclass +class ClassificationResult: + mmsi: str + vessel_type: str + confidence: float + dominant_state: str + fishing_pct: float + cluster_id: int + season: str + feature_vector: Dict diff --git a/prediction/models/result.py b/prediction/models/result.py new file mode 100644 index 0000000..3ef41a1 --- /dev/null +++ b/prediction/models/result.py @@ -0,0 +1,104 @@ +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Optional + + +@dataclass +class AnalysisResult: + """vessel_analysis_results 테이블 28컬럼 매핑.""" + + mmsi: str + timestamp: datetime + + # 분류 결과 + vessel_type: str = 'UNKNOWN' + confidence: float = 0.0 + fishing_pct: float = 0.0 + cluster_id: int = -1 + season: str = 'UNKNOWN' + + # ALGO 01: 위치 + zone: str = 'EEZ_OR_BEYOND' + dist_to_baseline_nm: float = 999.0 + + # ALGO 02: 활동 상태 + activity_state: str = 'UNKNOWN' + ucaf_score: float = 0.0 + ucft_score: float = 0.0 + + # ALGO 03: 다크 베셀 + is_dark: bool = False + gap_duration_min: int = 0 + + # ALGO 04: GPS 스푸핑 + spoofing_score: float = 0.0 + bd09_offset_m: float = 0.0 + speed_jump_count: int = 0 + + # ALGO 05+06: 선단 + cluster_size: int = 0 + is_leader: bool = False + fleet_role: str = 'NOISE' + + # ALGO 07: 위험도 + risk_score: int = 0 + risk_level: str = 'LOW' + + # ALGO 08: 환적 의심 + is_transship_suspect: bool = False + transship_pair_mmsi: str = '' + transship_duration_min: int = 0 + + # 특징 벡터 + features: dict = field(default_factory=dict) + + # 메타 + analyzed_at: Optional[datetime] = None + + def __post_init__(self): + if self.analyzed_at is None: + self.analyzed_at = datetime.now(timezone.utc) + + def to_db_tuple(self) -> tuple: + import json + + def _f(v: object) -> float: + """numpy float → Python float 변환.""" + return float(v) if v is not None else 0.0 + + def _i(v: object) -> int: + """numpy int → Python int 변환.""" + return int(v) if v is not None else 0 + + # features dict 내부 numpy 값도 변환 + safe_features = {k: float(v) for k, v in self.features.items()} if self.features else {} + + return ( + str(self.mmsi), + self.timestamp, + str(self.vessel_type), + _f(self.confidence), + _f(self.fishing_pct), + _i(self.cluster_id), + str(self.season), + str(self.zone), + _f(self.dist_to_baseline_nm), + str(self.activity_state), + _f(self.ucaf_score), + _f(self.ucft_score), + bool(self.is_dark), + _i(self.gap_duration_min), + _f(self.spoofing_score), + _f(self.bd09_offset_m), + _i(self.speed_jump_count), + _i(self.cluster_size), + bool(self.is_leader), + str(self.fleet_role), + _i(self.risk_score), + str(self.risk_level), + bool(self.is_transship_suspect), + str(self.transship_pair_mmsi), + _i(self.transship_duration_min), + json.dumps(safe_features), + self.analyzed_at, + ) diff --git a/prediction/pipeline/__init__.py b/prediction/pipeline/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prediction/pipeline/behavior.py b/prediction/pipeline/behavior.py new file mode 100644 index 0000000..7d40a83 --- /dev/null +++ b/prediction/pipeline/behavior.py @@ -0,0 +1,31 @@ +import pandas as pd +from pipeline.constants import SOG_STATIONARY_MAX, SOG_FISHING_MAX + + +class BehaviorDetector: + """ + 속도 기반 3단계 행동 분류 (Yan et al. 2022, Natale et al. 2015) + 정박(STATIONARY) / 조업(FISHING) / 항행(SAILING) + """ + + @staticmethod + def classify_point(sog: float) -> str: + if sog < SOG_STATIONARY_MAX: + return 'STATIONARY' + elif sog <= SOG_FISHING_MAX: + return 'FISHING' + else: + return 'SAILING' + + def detect(self, df: pd.DataFrame) -> pd.DataFrame: + df = df.copy() + df['state'] = df['sog'].apply(self.classify_point) + return df + + @staticmethod + def compute_fishing_ratio(df_vessel: pd.DataFrame) -> float: + total = len(df_vessel) + if total == 0: + return 0.0 + fishing = (df_vessel['state'] == 'FISHING').sum() + return round(fishing / total * 100, 2) diff --git a/prediction/pipeline/classifier.py b/prediction/pipeline/classifier.py new file mode 100644 index 0000000..9de9184 --- /dev/null +++ b/prediction/pipeline/classifier.py @@ -0,0 +1,100 @@ +import pandas as pd +from typing import Dict, Tuple + + +class VesselTypeClassifier: + """ + Rule-based scoring classifier for fishing vessel types. + + Scoring: for each feature in a type's profile, if the value falls within + the defined range a distance-based score is added (closer to the range + centre = higher score). Values outside the range incur a penalty. + Returns (vessel_type, confidence). + + TRAWL — trawling speed 2.5–4.5 kt, high COG variation + PURSE — purse-seine speed 3–5 kt, circular COG pattern + LONGLINE — longline speed 0.5–2 kt, low COG variation, long fishing runs + TRAP — trap/pot speed ~0 kt, many stationary events, short range + """ + + PROFILES: Dict[str, Dict[str, Tuple[float, float]]] = { + 'TRAWL': { + 'sog_fishing_mean': (2.5, 4.5), + 'cog_change_mean': (0.15, 9.9), + 'fishing_pct': (0.3, 0.7), + 'fishing_run_mean': (5, 50), + 'stationary_events': (0, 5), + }, + 'PURSE': { + 'sog_fishing_mean': (3.0, 5.0), + 'cog_circularity': (0.2, 1.0), + 'fishing_pct': (0.1, 0.5), + 'fishing_run_mean': (3, 30), + 'stationary_events': (0, 3), + }, + 'LONGLINE': { + 'sog_fishing_mean': (0.5, 2.5), + 'cog_change_mean': (0.0, 0.15), + 'fishing_pct': (0.4, 0.9), + 'fishing_run_mean': (20, 999), + 'stationary_events': (0, 10), + }, + 'TRAP': { + 'sog_fishing_mean': (0.0, 2.0), + 'stationary_pct': (0.2, 0.8), + 'stationary_events': (5, 999), + 'fishing_run_mean': (1, 10), + 'total_distance_km': (0, 100), + }, + } + + def classify(self, features: Dict) -> Tuple[str, float]: + """Classify a vessel from its feature dict. + + Returns: + (vessel_type, confidence) where confidence is in [0, 1]. + """ + if not features: + return 'UNKNOWN', 0.0 + + scores: Dict[str, float] = {} + for vtype, profile in self.PROFILES.items(): + score = 0.0 + matched = 0 + for feat_name, (lo, hi) in profile.items(): + val = features.get(feat_name) + if val is None: + continue + matched += 1 + if lo <= val <= hi: + mid = (lo + hi) / 2 + span = (hi - lo) / 2 if (hi - lo) > 0 else 1 + score += max(0.0, 1 - abs(val - mid) / span) + else: + overshoot = min(abs(val - lo), abs(val - hi)) + score -= min(0.5, overshoot / (hi - lo + 1e-9)) + scores[vtype] = score / matched if matched > 0 else 0.0 + + best_type = max(scores, key=lambda k: scores[k]) + total = sum(max(v, 0.0) for v in scores.values()) + confidence = scores[best_type] / total if total > 0 else 0.0 + + return best_type, round(confidence, 3) + + +def get_season(ts: pd.Timestamp) -> str: + """Return the Northern-Hemisphere season for a timestamp. + + Reference: paper 12 seasonal activity analysis (Chinese EEZ). + Chinese fishing ban period: Yellow Sea / East China Sea May–Sep, + South China Sea May–Aug. + """ + m = ts.month + if m in [3, 4, 5]: + return 'SPRING' + elif m in [6, 7, 8]: + return 'SUMMER' + elif m in [9, 10, 11]: + return 'FALL' + else: + return 'WINTER' diff --git a/prediction/pipeline/clusterer.py b/prediction/pipeline/clusterer.py new file mode 100644 index 0000000..7f5d34d --- /dev/null +++ b/prediction/pipeline/clusterer.py @@ -0,0 +1,101 @@ +from collections import Counter +from typing import Dict, Optional + +import numpy as np +import pandas as pd + +from pipeline.constants import BIRCH_THRESHOLD, BIRCH_BRANCHING, MIN_CLUSTER_SIZE + + +class EnhancedBIRCHClusterer: + """Trajectory clustering using sklearn Birch with a simple K-means fallback. + + Based on the enhanced-BIRCH approach (Yan, Yang et al.): + 1. Resample each trajectory to a fixed-length vector. + 2. Build a BIRCH CF-tree for memory-efficient hierarchical clustering. + 3. Small clusters (< MIN_CLUSTER_SIZE) are relabelled as noise (-1). + """ + + def __init__( + self, + threshold: float = BIRCH_THRESHOLD, + branching: int = BIRCH_BRANCHING, + n_clusters: Optional[int] = None, + ) -> None: + self.threshold = threshold + self.branching = branching + self.n_clusters = n_clusters + self._model = None + + def _traj_to_vector(self, df_vessel: pd.DataFrame, n_points: int = 20) -> np.ndarray: + """Convert a vessel trajectory DataFrame to a fixed-length vector. + + Linearly samples n_points from the trajectory and interleaves lat/lon + values, then normalises to zero mean / unit variance. + """ + lats = df_vessel['lat'].values + lons = df_vessel['lon'].values + idx = np.linspace(0, len(lats) - 1, n_points).astype(int) + vec = np.concatenate([lats[idx], lons[idx]]) + vec = (vec - vec.mean()) / (vec.std() + 1e-9) + return vec + + def fit_predict(self, vessels: Dict[str, pd.DataFrame]) -> Dict[str, int]: + """Cluster vessel trajectories. + + Args: + vessels: mapping of mmsi -> resampled trajectory DataFrame. + + Returns: + Mapping of mmsi -> cluster_id. Vessels in small clusters are + assigned cluster_id -1 (noise). Vessels with fewer than 20 + points are excluded from the result. + """ + mmsi_list: list[str] = [] + vectors: list[np.ndarray] = [] + + for mmsi, df_v in vessels.items(): + if len(df_v) < 20: + continue + mmsi_list.append(mmsi) + vectors.append(self._traj_to_vector(df_v)) + + if len(vectors) < 3: + return {m: 0 for m in mmsi_list} + + X = np.array(vectors) + + try: + from sklearn.cluster import Birch + model = Birch( + threshold=self.threshold, + branching_factor=self.branching, + n_clusters=self.n_clusters, + ) + labels = model.fit_predict(X) + self._model = model + except ImportError: + labels = self._simple_cluster(X) + + cnt = Counter(labels) + labels = np.array([lbl if cnt[lbl] >= MIN_CLUSTER_SIZE else -1 for lbl in labels]) + + return dict(zip(mmsi_list, labels.tolist())) + + @staticmethod + def _simple_cluster(X: np.ndarray, k: int = 5) -> np.ndarray: + """Fallback K-means used when sklearn is unavailable.""" + n = len(X) + k = min(k, n) + centers = X[np.random.choice(n, k, replace=False)] + labels = np.zeros(n, dtype=int) + for _ in range(20): + dists = np.array([[np.linalg.norm(x - c) for c in centers] for x in X]) + labels = dists.argmin(axis=1) + new_centers = np.array( + [X[labels == i].mean(axis=0) if (labels == i).any() else centers[i] for i in range(k)] + ) + if np.allclose(centers, new_centers, atol=1e-6): + break + centers = new_centers + return labels diff --git a/prediction/pipeline/constants.py b/prediction/pipeline/constants.py new file mode 100644 index 0000000..83a22e4 --- /dev/null +++ b/prediction/pipeline/constants.py @@ -0,0 +1,26 @@ +SOG_STATIONARY_MAX = 1.0 +SOG_FISHING_MAX = 5.0 +SOG_SAILING_MIN = 5.0 + +VESSEL_SOG_PROFILE = { + 'TRAWL': {'min': 1.5, 'max': 4.5, 'mean': 2.8, 'cog_var': 'high'}, + 'PURSE': {'min': 2.0, 'max': 5.0, 'mean': 3.5, 'cog_var': 'circular'}, + 'LONGLINE': {'min': 0.5, 'max': 3.0, 'mean': 1.8, 'cog_var': 'low'}, + 'TRAP': {'min': 0.0, 'max': 2.0, 'mean': 0.8, 'cog_var': 'very_low'}, +} + +RESAMPLE_INTERVAL_MIN = 4 + +BIRCH_THRESHOLD = 0.35 +BIRCH_BRANCHING = 50 +MIN_CLUSTER_SIZE = 5 + +MMSI_DIGITS = 9 +MAX_VESSEL_LENGTH = 300 +MAX_SOG_KNOTS = 30.0 +MIN_TRAJ_POINTS = 20 + +KR_BOUNDS = { + 'lat_min': 32.0, 'lat_max': 39.0, + 'lon_min': 124.0, 'lon_max': 132.0, +} diff --git a/prediction/pipeline/features.py b/prediction/pipeline/features.py new file mode 100644 index 0000000..b59565e --- /dev/null +++ b/prediction/pipeline/features.py @@ -0,0 +1,93 @@ +import math +import numpy as np +import pandas as pd +from typing import Dict + + +class FeatureExtractor: + """ + 어선 유형 분류를 위한 특징 벡터 추출 + 논문 12 (남중국해 어선 유형 식별) 기반 핵심 피처: + - 속도 통계 (mean, std, 분위수) + - 침로 변동성 (COG variance → 선회 패턴) + - 조업 비율 및 조업 지속 시간 + - 이동 거리 및 해역 커버리지 + - 정박 빈도 (투망/양망 간격 추정) + """ + + @staticmethod + def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """두 좌표 간 거리 (km)""" + R = 6371.0 + phi1, phi2 = math.radians(lat1), math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2 + return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + def extract(self, df_vessel: pd.DataFrame) -> Dict[str, float]: + if len(df_vessel) < 10: + return {} + + sog = df_vessel['sog'].values + cog = df_vessel['cog'].values + states = df_vessel['state'].values + + # Speed features + fishing_sog = sog[states == 'FISHING'] if (states == 'FISHING').any() else np.array([0]) + feat: Dict[str, float] = { + 'sog_mean': float(np.mean(sog)), + 'sog_std': float(np.std(sog)), + 'sog_fishing_mean': float(np.mean(fishing_sog)), + 'sog_fishing_std': float(np.std(fishing_sog)), + 'sog_q25': float(np.percentile(sog, 25)), + 'sog_q75': float(np.percentile(sog, 75)), + } + + # COG features (선망: 원형, 트롤: 직선왕복, 연승: 부드러운 곡선) + cog_diff = np.abs(np.diff(np.unwrap(np.radians(cog)))) + feat['cog_change_mean'] = float(np.mean(cog_diff)) + feat['cog_change_std'] = float(np.std(cog_diff)) + feat['cog_circularity'] = float(np.sum(cog_diff > np.pi / 4) / len(cog_diff)) + + # State ratios + n = len(states) + feat['fishing_pct'] = float((states == 'FISHING').sum() / n) + feat['stationary_pct'] = float((states == 'STATIONARY').sum() / n) + feat['sailing_pct'] = float((states == 'SAILING').sum() / n) + + # Stationary events (투망·양망 횟수 추정) + stationary_events = 0 + prev = None + for s in states: + if s == 'STATIONARY' and prev != 'STATIONARY': + stationary_events += 1 + prev = s + feat['stationary_events'] = float(stationary_events) + + # Total distance (km) + lats = df_vessel['lat'].values + lons = df_vessel['lon'].values + total_dist = sum( + self.haversine(lats[i], lons[i], lats[i + 1], lons[i + 1]) + for i in range(len(lats) - 1) + ) + feat['total_distance_km'] = round(total_dist, 2) + + # Coverage (바운딩 박스 면적 — 근사) + feat['coverage_deg2'] = round(float(np.ptp(lats)) * float(np.ptp(lons)), 4) + + # Average fishing run length + fishing_runs = [] + run = 0 + for s in states: + if s == 'FISHING': + run += 1 + elif run > 0: + fishing_runs.append(run) + run = 0 + if run > 0: + fishing_runs.append(run) + feat['fishing_run_mean'] = float(np.mean(fishing_runs)) if fishing_runs else 0.0 + + return feat diff --git a/prediction/pipeline/orchestrator.py b/prediction/pipeline/orchestrator.py new file mode 100644 index 0000000..2bcbf86 --- /dev/null +++ b/prediction/pipeline/orchestrator.py @@ -0,0 +1,95 @@ +import logging + +import pandas as pd + +from pipeline.preprocessor import AISPreprocessor +from pipeline.behavior import BehaviorDetector +from pipeline.resampler import TrajectoryResampler +from pipeline.features import FeatureExtractor +from pipeline.classifier import VesselTypeClassifier, get_season +from pipeline.clusterer import EnhancedBIRCHClusterer +from pipeline.constants import RESAMPLE_INTERVAL_MIN + +logger = logging.getLogger(__name__) + + +class ChineseFishingVesselPipeline: + """7-step pipeline for classifying Chinese fishing vessel activity types. + + Steps: + 1. AIS preprocessing (Yan et al. 2022) + 2. Behaviour-state detection (speed-based 3-class) + 3. Trajectory resampling (Yan, Yang et al. — 4-minute interval) + 4. Feature vector extraction (paper 12) + 5. Vessel-type classification (rule-based scoring) + 6. Enhanced BIRCH trajectory clustering (Yan, Yang et al.) + 7. Seasonal activity tagging (paper 12) + """ + + def __init__(self) -> None: + self.preprocessor = AISPreprocessor() + self.detector = BehaviorDetector() + self.resampler = TrajectoryResampler(RESAMPLE_INTERVAL_MIN) + self.extractor = FeatureExtractor() + self.classifier = VesselTypeClassifier() + self.clusterer = EnhancedBIRCHClusterer() + + def run( + self, df_raw: pd.DataFrame + ) -> tuple[list[dict], dict[str, pd.DataFrame]]: + """Run the 7-step pipeline. + + Args: + df_raw: raw AIS DataFrame with columns mmsi, timestamp, lat, lon, + sog, cog. + + Returns: + (results, vessel_dfs) where: + - results is a list of classification dicts, each containing: + mmsi, vessel_type, confidence, fishing_pct, cluster_id, season, + n_points, features. + - vessel_dfs is a mapping of mmsi -> resampled trajectory DataFrame. + """ + # Step 1: preprocess + df = self.preprocessor.run(df_raw) + if len(df) == 0: + logger.warning('pipeline: no rows after preprocessing') + return [], {} + + # Step 2: behaviour detection + df = self.detector.detect(df) + + # Steps 3–5: per-vessel processing + vessel_dfs: dict[str, pd.DataFrame] = {} + results: list[dict] = [] + + for mmsi, df_v in df.groupby('mmsi'): + df_resampled = self.resampler.resample(df_v) + vessel_dfs[mmsi] = df_resampled + + features = self.extractor.extract(df_resampled) + vtype, confidence = self.classifier.classify(features) + fishing_pct = BehaviorDetector.compute_fishing_ratio(df_resampled) + season = get_season(df_v['timestamp'].iloc[len(df_v) // 2]) + + results.append({ + 'mmsi': mmsi, + 'vessel_type': vtype, + 'confidence': confidence, + 'fishing_pct': fishing_pct, + 'season': season, + 'n_points': len(df_resampled), + 'features': features, + }) + + # Step 6: BIRCH clustering + cluster_map = self.clusterer.fit_predict(vessel_dfs) + for r in results: + r['cluster_id'] = cluster_map.get(r['mmsi'], -1) + + logger.info( + 'pipeline complete: %d vessels, types=%s', + len(results), + {r['vessel_type'] for r in results}, + ) + return results, vessel_dfs diff --git a/prediction/pipeline/preprocessor.py b/prediction/pipeline/preprocessor.py new file mode 100644 index 0000000..762d651 --- /dev/null +++ b/prediction/pipeline/preprocessor.py @@ -0,0 +1,52 @@ +import pandas as pd +from collections import defaultdict + +from pipeline.constants import KR_BOUNDS, MAX_SOG_KNOTS, MIN_TRAJ_POINTS + + +class AISPreprocessor: + """Delete-Supplement-Update (Yan et al. 2022)""" + + def __init__(self): + self.stats = defaultdict(int) + + def run(self, df: pd.DataFrame) -> pd.DataFrame: + original = len(df) + + required = ['mmsi', 'timestamp', 'lat', 'lon', 'sog', 'cog'] + missing = [c for c in required if c not in df.columns] + if missing: + raise ValueError(f"필수 컬럼 누락: {missing}") + + df = df.copy() + df['timestamp'] = pd.to_datetime(df['timestamp']) + + valid_mmsi = df['mmsi'].astype(str).str.match(r'^\d{9}$') + df = df[valid_mmsi] + self.stats['invalid_mmsi'] += original - len(df) + + df = df[(df['lat'].between(-90, 90)) & (df['lon'].between(-180, 180))] + + df = df[ + df['lat'].between(KR_BOUNDS['lat_min'], KR_BOUNDS['lat_max']) & + df['lon'].between(KR_BOUNDS['lon_min'], KR_BOUNDS['lon_max']) + ] + + df = df.sort_values(['mmsi', 'timestamp']) + df['sog'] = df.groupby('mmsi')['sog'].transform( + lambda x: x.where( + x.between(0, MAX_SOG_KNOTS), + x.rolling(3, center=True, min_periods=1).mean(), + ) + ) + df = df[(df['sog'] >= 0) & (df['sog'] <= MAX_SOG_KNOTS)] + + counts = df.groupby('mmsi').size() + valid_mmsi_list = counts[counts >= MIN_TRAJ_POINTS].index + df = df[df['mmsi'].isin(valid_mmsi_list)] + + df = df.drop_duplicates(subset=['mmsi', 'timestamp']) + + self.stats['final_records'] = len(df) + self.stats['retention_pct'] = round(len(df) / max(original, 1) * 100, 2) + return df.reset_index(drop=True) diff --git a/prediction/pipeline/resampler.py b/prediction/pipeline/resampler.py new file mode 100644 index 0000000..2c6330f --- /dev/null +++ b/prediction/pipeline/resampler.py @@ -0,0 +1,35 @@ +import pandas as pd +from pipeline.constants import RESAMPLE_INTERVAL_MIN +from pipeline.behavior import BehaviorDetector + + +class TrajectoryResampler: + """ + 불균등 AIS 수신 간격을 균등 시간 간격으로 보간 + 목적: BIRCH 군집화의 입력 벡터 정규화 + 방법: 선형 보간 (위도·경도·SOG·COG) + 기준: 4분 간격 (Shepperson et al. 2017) + """ + + def __init__(self, interval_min: int = RESAMPLE_INTERVAL_MIN): + self.interval = pd.Timedelta(minutes=interval_min) + + def resample(self, df_vessel: pd.DataFrame) -> pd.DataFrame: + df_vessel = df_vessel.sort_values('timestamp').copy() + if len(df_vessel) < 2: + return df_vessel + + t_start = df_vessel['timestamp'].iloc[0] + t_end = df_vessel['timestamp'].iloc[-1] + new_times = pd.date_range(t_start, t_end, freq=self.interval) + + df_vessel = df_vessel.set_index('timestamp') + df_vessel = df_vessel.reindex(df_vessel.index.union(new_times)) + for col in ['lat', 'lon', 'sog', 'cog']: + if col in df_vessel.columns: + df_vessel[col] = df_vessel[col].interpolate(method='time') + + df_vessel = df_vessel.loc[new_times].reset_index() + df_vessel.rename(columns={'index': 'timestamp'}, inplace=True) + df_vessel['state'] = df_vessel['sog'].apply(BehaviorDetector.classify_point) + return df_vessel diff --git a/prediction/requirements.txt b/prediction/requirements.txt new file mode 100644 index 0000000..8360ec5 --- /dev/null +++ b/prediction/requirements.txt @@ -0,0 +1,12 @@ +fastapi==0.115.0 +uvicorn==0.30.6 +pydantic-settings>=2.0 +psycopg2-binary>=2.9 +numpy>=1.26 +pandas>=2.2 +scikit-learn>=1.5 +apscheduler>=3.10 +shapely>=2.0 +tzdata +httpx>=0.27 +redis>=5.0 diff --git a/prediction/scheduler.py b/prediction/scheduler.py new file mode 100644 index 0000000..46a7dea --- /dev/null +++ b/prediction/scheduler.py @@ -0,0 +1,385 @@ +import logging +import time +from datetime import datetime, timezone +from typing import Optional + +from apscheduler.schedulers.background import BackgroundScheduler + +from config import settings + +logger = logging.getLogger(__name__) + +_scheduler: Optional[BackgroundScheduler] = None +_last_run: dict = { + 'timestamp': None, + 'duration_sec': 0, + 'vessel_count': 0, + 'upserted': 0, + 'error': None, +} + +_transship_pair_history: dict = {} + + +def get_last_run() -> dict: + return _last_run.copy() + + +def run_analysis_cycle(): + """5분 주기 분석 사이클 — 인메모리 캐시 기반.""" + import re as _re + from cache.vessel_store import vessel_store + from db import snpdb, kcgdb + from pipeline.orchestrator import ChineseFishingVesselPipeline + from algorithms.location import classify_zone + from algorithms.fishing_pattern import compute_ucaf_score, compute_ucft_score + from algorithms.dark_vessel import is_dark_vessel + from algorithms.spoofing import compute_spoofing_score, count_speed_jumps, compute_bd09_offset + from algorithms.risk import compute_vessel_risk_score + from fleet_tracker import fleet_tracker + from models.result import AnalysisResult + + start = time.time() + _last_run['timestamp'] = datetime.now(timezone.utc).isoformat() + _last_run['error'] = None + + try: + # 1. 증분 로드 + stale 제거 + if vessel_store.last_bucket is None: + logger.warning('last_bucket is None, skipping incremental fetch (initial load not complete)') + df_new = None + else: + df_new = snpdb.fetch_incremental(vessel_store.last_bucket) + if df_new is not None and len(df_new) > 0: + vessel_store.merge_incremental(df_new) + vessel_store.evict_stale(settings.CACHE_WINDOW_HOURS) + + # 정적정보 / 허가어선 주기적 갱신 + vessel_store.refresh_static_info() + vessel_store.refresh_permit_registry() + + # 2. 분석 대상 선별 (SOG/COG 계산 포함) + df_targets = vessel_store.select_analysis_targets() + if len(df_targets) == 0: + logger.info('no analysis targets, skipping cycle') + _last_run['vessel_count'] = 0 + return + + # 3. 7단계 파이프라인 실행 + pipeline = ChineseFishingVesselPipeline() + classifications, vessel_dfs = pipeline.run(df_targets) + + if not classifications: + logger.info('no vessels classified, skipping') + _last_run['vessel_count'] = 0 + return + + # 4. 등록 선단 기반 fleet 분석 + _gear_re = _re.compile(r'^.+_(?=\S*\d)\S+(?:[_ ]\S*)*[_ ]*$|^\d+$|^.+%$') + with kcgdb.get_conn() as kcg_conn: + fleet_tracker.load_registry(kcg_conn) + + all_ais = [] + for mmsi, df in vessel_dfs.items(): + if len(df) > 0: + last = df.iloc[-1] + all_ais.append({ + 'mmsi': mmsi, + 'name': vessel_store.get_vessel_info(mmsi).get('name', ''), + 'lat': float(last['lat']), + 'lon': float(last['lon']), + }) + + fleet_tracker.match_ais_to_registry(all_ais, kcg_conn) + + gear_signals = [v for v in all_ais if _gear_re.match(v.get('name', ''))] + fleet_tracker.track_gear_identity(gear_signals, kcg_conn) + + fleet_roles = fleet_tracker.build_fleet_clusters(vessel_dfs) + + fleet_tracker.save_snapshot(vessel_dfs, kcg_conn) + + gear_groups = [] + + # 4.5 그룹 폴리곤 생성 + 저장 + try: + from algorithms.polygon_builder import detect_gear_groups, build_all_group_snapshots + + company_vessels = fleet_tracker.get_company_vessels(vessel_dfs) + gear_groups = detect_gear_groups(vessel_store) + group_snapshots = build_all_group_snapshots( + vessel_store, company_vessels, + fleet_tracker._companies, + ) + saved = kcgdb.save_group_snapshots(group_snapshots) + cleaned = kcgdb.cleanup_group_snapshots(days=7) + logger.info('group polygons: %d saved, %d cleaned, %d gear groups', + saved, cleaned, len(gear_groups)) + except Exception as e: + logger.warning('group polygon generation failed: %s', e) + + # 4.7 어구 연관성 분석 (멀티모델 패턴 추적) + try: + from algorithms.gear_correlation import run_gear_correlation + from algorithms.gear_parent_inference import run_gear_parent_inference + + corr_result = run_gear_correlation( + vessel_store=vessel_store, + gear_groups=gear_groups, + conn=kcg_conn, + ) + logger.info( + 'gear correlation: %d scores updated, %d raw metrics, %d models', + corr_result['updated'], corr_result['raw_inserted'], + corr_result['models'], + ) + + inference_result = run_gear_parent_inference( + vessel_store=vessel_store, + gear_groups=gear_groups, + conn=kcg_conn, + ) + logger.info( + 'gear parent inference: %d groups, %d direct-match, %d candidates, %d promoted, %d review, %d skipped', + inference_result['groups'], + inference_result.get('direct_matched', 0), + inference_result['candidates'], + inference_result['promoted'], + inference_result['review_required'], + inference_result['skipped'], + ) + except Exception as e: + logger.warning('gear correlation failed: %s', e) + + # 5. 선박별 추가 알고리즘 → AnalysisResult 생성 + results = [] + for c in classifications: + mmsi = c['mmsi'] + df_v = vessel_dfs.get(mmsi) + if df_v is None or len(df_v) == 0: + continue + + last_row = df_v.iloc[-1] + ts = last_row.get('timestamp') + + zone_info = classify_zone(last_row['lat'], last_row['lon']) + + gear_map = {'TRAWL': 'OT', 'PURSE': 'PS', 'LONGLINE': 'GN', 'TRAP': 'TRAP'} + gear = gear_map.get(c['vessel_type'], 'OT') + ucaf = compute_ucaf_score(df_v, gear) + ucft = compute_ucft_score(df_v) + + dark, gap_min = is_dark_vessel(df_v) + + spoof_score = compute_spoofing_score(df_v) + speed_jumps = count_speed_jumps(df_v) + bd09_offset = compute_bd09_offset(last_row['lat'], last_row['lon']) + + fleet_info = fleet_roles.get(mmsi, {}) + + is_permitted = vessel_store.is_permitted(mmsi) + risk_score, risk_level = compute_vessel_risk_score( + mmsi, df_v, zone_info, is_permitted=is_permitted, + ) + + activity = 'UNKNOWN' + if 'state' in df_v.columns and len(df_v) > 0: + activity = df_v['state'].mode().iloc[0] + + results.append(AnalysisResult( + mmsi=mmsi, + timestamp=ts, + vessel_type=c['vessel_type'], + confidence=c['confidence'], + fishing_pct=c['fishing_pct'], + cluster_id=fleet_info.get('cluster_id', -1), + season=c['season'], + zone=zone_info.get('zone', 'EEZ_OR_BEYOND'), + dist_to_baseline_nm=zone_info.get('dist_from_baseline_nm', 999.0), + activity_state=activity, + ucaf_score=ucaf, + ucft_score=ucft, + is_dark=dark, + gap_duration_min=gap_min, + spoofing_score=spoof_score, + bd09_offset_m=bd09_offset, + speed_jump_count=speed_jumps, + cluster_size=fleet_info.get('cluster_size', 0), + is_leader=fleet_info.get('is_leader', False), + fleet_role=fleet_info.get('fleet_role', 'NOISE'), + risk_score=risk_score, + risk_level=risk_level, + features=c.get('features', {}), + )) + + # ── 5.5 경량 분석 — 파이프라인 미통과 412* 선박 ── + from algorithms.risk import compute_lightweight_risk_score + + pipeline_mmsis = {c['mmsi'] for c in classifications} + lightweight_mmsis = vessel_store.get_chinese_mmsis() - pipeline_mmsis + + if lightweight_mmsis: + now = datetime.now(timezone.utc) + all_positions = vessel_store.get_all_latest_positions() + lw_count = 0 + for mmsi in lightweight_mmsis: + pos = all_positions.get(mmsi) + if pos is None or pos.get('lat') is None: + continue + lat, lon = pos['lat'], pos['lon'] + sog = pos.get('sog', 0) or 0 + cog = pos.get('cog', 0) or 0 + ts = pos.get('timestamp', now) + + zone_info = classify_zone(lat, lon) + if sog <= 1.0: + state = 'STATIONARY' + elif sog <= 5.0: + state = 'FISHING' + else: + state = 'SAILING' + + is_permitted = vessel_store.is_permitted(mmsi) + risk_score, risk_level = compute_lightweight_risk_score( + zone_info, sog, is_permitted=is_permitted, + ) + + # BD-09 오프셋은 중국 선박이므로 제외 (412* = 중국) + results.append(AnalysisResult( + mmsi=mmsi, + timestamp=ts, + vessel_type='UNKNOWN', + confidence=0.0, + fishing_pct=0.0, + zone=zone_info.get('zone', 'EEZ_OR_BEYOND'), + dist_to_baseline_nm=zone_info.get('dist_from_baseline_nm', 999.0), + activity_state=state, + ucaf_score=0.0, + ucft_score=0.0, + is_dark=False, + gap_duration_min=0, + spoofing_score=0.0, + bd09_offset_m=0.0, + speed_jump_count=0, + cluster_id=-1, + cluster_size=0, + is_leader=False, + fleet_role='NONE', + risk_score=risk_score, + risk_level=risk_level, + is_transship_suspect=False, + transship_pair_mmsi='', + transship_duration_min=0, + )) + lw_count += 1 + logger.info('lightweight analysis: %d vessels', lw_count) + + # 6. 환적 의심 탐지 (pair_history 모듈 레벨로 사이클 간 유지) + from algorithms.transshipment import detect_transshipment + + results_map = {r.mmsi: r for r in results} + transship_pairs = detect_transshipment(df_targets, _transship_pair_history) + for mmsi_a, mmsi_b, dur in transship_pairs: + if mmsi_a in results_map: + results_map[mmsi_a].is_transship_suspect = True + results_map[mmsi_a].transship_pair_mmsi = mmsi_b + results_map[mmsi_a].transship_duration_min = dur + if mmsi_b in results_map: + results_map[mmsi_b].is_transship_suspect = True + results_map[mmsi_b].transship_pair_mmsi = mmsi_a + results_map[mmsi_b].transship_duration_min = dur + + # 7. 결과 저장 + upserted = kcgdb.upsert_results(results) + kcgdb.cleanup_old(hours=48) + + # 8. Redis에 분석 컨텍스트 캐싱 (채팅용) + try: + from chat.cache import cache_analysis_context + + results_map = {r.mmsi: r for r in results} + risk_dist = {} + zone_dist = {} + dark_count = 0 + spoofing_count = 0 + transship_count = 0 + top_risk_list = [] + + for r in results: + risk_dist[r.risk_level] = risk_dist.get(r.risk_level, 0) + 1 + zone_dist[r.zone] = zone_dist.get(r.zone, 0) + 1 + if r.is_dark: + dark_count += 1 + if r.spoofing_score > 0.5: + spoofing_count += 1 + if r.is_transship_suspect: + transship_count += 1 + top_risk_list.append({ + 'mmsi': r.mmsi, + 'name': vessel_store.get_vessel_info(r.mmsi).get('name', r.mmsi), + 'risk_score': r.risk_score, + 'risk_level': r.risk_level, + 'zone': r.zone, + 'is_dark': r.is_dark, + 'is_transship': r.is_transship_suspect, + 'activity_state': r.activity_state, + }) + + top_risk_list.sort(key=lambda x: x['risk_score'], reverse=True) + + cache_analysis_context({ + 'vessel_stats': vessel_store.stats(), + 'risk_distribution': {**risk_dist, **zone_dist}, + 'dark_count': dark_count, + 'spoofing_count': spoofing_count, + 'transship_count': transship_count, + 'top_risk_vessels': top_risk_list[:10], + 'polygon_summary': kcgdb.fetch_polygon_summary(), + }) + except Exception as e: + logger.warning('failed to cache analysis context for chat: %s', e) + + elapsed = round(time.time() - start, 2) + _last_run['duration_sec'] = elapsed + _last_run['vessel_count'] = len(results) + _last_run['upserted'] = upserted + logger.info( + 'analysis cycle: %d vessels, %d upserted, %.2fs', + len(results), upserted, elapsed, + ) + + except Exception as e: + _last_run['error'] = str(e) + logger.exception('analysis cycle failed: %s', e) + + +def start_scheduler(): + global _scheduler + _scheduler = BackgroundScheduler() + _scheduler.add_job( + run_analysis_cycle, + 'interval', + minutes=settings.SCHEDULER_INTERVAL_MIN, + id='vessel_analysis', + max_instances=1, + replace_existing=True, + ) + # 파티션 유지보수 (매일 04:00) + from db.partition_manager import maintain_partitions + _scheduler.add_job( + maintain_partitions, + 'cron', hour=4, minute=0, + id='partition_maintenance', + max_instances=1, + replace_existing=True, + ) + _scheduler.start() + logger.info('scheduler started (interval=%dm)', settings.SCHEDULER_INTERVAL_MIN) + + +def stop_scheduler(): + global _scheduler + if _scheduler: + _scheduler.shutdown(wait=False) + _scheduler = None + logger.info('scheduler stopped') diff --git a/prediction/scripts/load_fleet_registry.py b/prediction/scripts/load_fleet_registry.py new file mode 100644 index 0000000..c1cf479 --- /dev/null +++ b/prediction/scripts/load_fleet_registry.py @@ -0,0 +1,176 @@ +"""선단 구성 JSX → kcgdb fleet_companies + fleet_vessels 적재. + +Usage: python3 prediction/scripts/load_fleet_registry.py +""" + +import json +import re +import sys +from pathlib import Path + +import psycopg2 +import psycopg2.extras + +# JSX 파일에서 D 배열 추출 +JSX_PATH = Path(__file__).parent.parent.parent.parent / 'gc-wing-dev' / 'legacy' / '선단구성_906척_어업수역 (1).jsx' + +# kcgdb 접속 — prediction/.env 또는 환경변수 +DB_HOST = '211.208.115.83' +DB_PORT = 5432 +DB_NAME = 'kcgdb' +DB_USER = 'kcg_app' +DB_SCHEMA = 'kcg' + + +def parse_jsx(path: Path) -> list[list]: + """JSX 파일에서 D=[ ... ] 배열을 파싱.""" + text = path.read_text(encoding='utf-8') + + # const D=[ 부터 ]; 까지 추출 + m = re.search(r'const\s+D\s*=\s*\[', text) + if not m: + raise ValueError('D 배열을 찾을 수 없습니다') + + start = m.end() - 1 # [ 위치 + # 중첩 배열을 추적하여 닫는 ] 찾기 + depth = 0 + end = start + for i in range(start, len(text)): + if text[i] == '[': + depth += 1 + elif text[i] == ']': + depth -= 1 + if depth == 0: + end = i + 1 + break + + raw = text[start:end] + + # JavaScript → JSON 변환 (trailing comma 제거) + raw = re.sub(r',\s*]', ']', raw) + raw = re.sub(r',\s*}', '}', raw) + + return json.loads(raw) + + +def load_to_db(data: list[list], db_password: str): + """파싱된 데이터를 DB에 적재.""" + conn = psycopg2.connect( + host=DB_HOST, port=DB_PORT, dbname=DB_NAME, + user=DB_USER, password=db_password, + options=f'-c search_path={DB_SCHEMA}', + ) + conn.autocommit = False + cur = conn.cursor() + + try: + # 기존 데이터 초기화 + cur.execute('DELETE FROM fleet_vessels') + cur.execute('DELETE FROM fleet_companies') + + company_count = 0 + vessel_count = 0 + pair_links = [] # (vessel_id, pair_vessel_id) 후처리 + + for row in data: + if len(row) < 7: + continue + + name_cn = row[0] + name_en = row[1] + + # 회사 INSERT + cur.execute( + 'INSERT INTO fleet_companies (name_cn, name_en) VALUES (%s, %s) RETURNING id', + (name_cn, name_en), + ) + company_id = cur.fetchone()[0] + company_count += 1 + + # 인덱스: 0=own, 1=ownEn, 2=pairs, 3=gn, 4=ot, 5=ps, 6=fc, 7=upt, 8=upts + pairs = row[2] if len(row) > 2 and isinstance(row[2], list) else [] + gn = row[3] if len(row) > 3 and isinstance(row[3], list) else [] + ot = row[4] if len(row) > 4 and isinstance(row[4], list) else [] + ps = row[5] if len(row) > 5 and isinstance(row[5], list) else [] + fc = row[6] if len(row) > 6 and isinstance(row[6], list) else [] + upt = row[7] if len(row) > 7 and isinstance(row[7], list) else [] + upts = row[8] if len(row) > 8 and isinstance(row[8], list) else [] + + def insert_vessel(v, gear_code, role): + nonlocal vessel_count + if not isinstance(v, list) or len(v) < 4: + return None + cur.execute( + '''INSERT INTO fleet_vessels + (company_id, permit_no, name_cn, name_en, tonnage, gear_code, fleet_role) + VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id''', + (company_id, v[0], v[1], v[2], v[3], gear_code, role), + ) + vessel_count += 1 + return cur.fetchone()[0] + + # PT 본선쌍 (pairs) + for pair in pairs: + if not isinstance(pair, list) or len(pair) < 2: + continue + main_id = insert_vessel(pair[0], 'C21', 'MAIN') + sub_id = insert_vessel(pair[1], 'C21', 'SUB') + if main_id and sub_id: + pair_links.append((main_id, sub_id)) + + # GN 유자망 + for v in gn: + insert_vessel(v, 'C25', 'GN') + + # OT 기타 + for v in ot: + insert_vessel(v, 'C22', 'OT') + + # PS 선망 + for v in ps: + insert_vessel(v, 'C23', 'PS') + + # FC 운반선 + for v in fc: + insert_vessel(v, 'C40', 'FC') + + # UPT 단독 본선 + for v in upt: + insert_vessel(v, 'C21', 'MAIN_SOLO') + + # UPTS 단독 부속선 + for v in upts: + insert_vessel(v, 'C21', 'SUB_SOLO') + + # PT 쌍 상호 참조 설정 + for main_id, sub_id in pair_links: + cur.execute('UPDATE fleet_vessels SET pair_vessel_id = %s WHERE id = %s', (sub_id, main_id)) + cur.execute('UPDATE fleet_vessels SET pair_vessel_id = %s WHERE id = %s', (main_id, sub_id)) + + conn.commit() + print(f'적재 완료: {company_count}개 회사, {vessel_count}척 선박, {len(pair_links)}쌍 PT') + + except Exception as e: + conn.rollback() + print(f'적재 실패: {e}', file=sys.stderr) + raise + finally: + cur.close() + conn.close() + + +if __name__ == '__main__': + if not JSX_PATH.exists(): + print(f'파일을 찾을 수 없습니다: {JSX_PATH}', file=sys.stderr) + sys.exit(1) + + # DB 비밀번호 — 환경변수 또는 직접 입력 + import os + password = os.environ.get('KCGDB_PASSWORD', 'Kcg2026monitor') + + print(f'JSX 파싱: {JSX_PATH}') + data = parse_jsx(JSX_PATH) + print(f'파싱 완료: {len(data)}개 회사') + + print('DB 적재 시작...') + load_to_db(data, password) diff --git a/prediction/tests/test_gear_parent_episode.py b/prediction/tests/test_gear_parent_episode.py new file mode 100644 index 0000000..1ffeaaa --- /dev/null +++ b/prediction/tests/test_gear_parent_episode.py @@ -0,0 +1,177 @@ +import unittest +import sys +import types +from datetime import datetime, timedelta, timezone + +stub = types.ModuleType('pydantic_settings') + + +class BaseSettings: + def __init__(self, **kwargs): + for name, value in self.__class__.__dict__.items(): + if name.isupper(): + setattr(self, name, kwargs.get(name, value)) + + +stub.BaseSettings = BaseSettings +sys.modules.setdefault('pydantic_settings', stub) + +from algorithms.gear_parent_episode import ( + GroupEpisodeInput, + EpisodeState, + build_episode_plan, + compute_prior_bonus_components, + continuity_score, +) + + +class GearParentEpisodeTest(unittest.TestCase): + def test_continuity_score_prefers_member_overlap_and_near_center(self): + current = GroupEpisodeInput( + group_key='ZHEDAIYU02394', + normalized_parent_name='ZHEDAIYU02394', + sub_cluster_id=1, + member_mmsis=['100', '200', '300'], + member_count=3, + center_lat=35.0, + center_lon=129.0, + ) + previous = EpisodeState( + episode_id='ep-prev', + lineage_key='ZHEDAIYU02394', + group_key='ZHEDAIYU02394', + normalized_parent_name='ZHEDAIYU02394', + current_sub_cluster_id=0, + member_mmsis=['100', '200', '400'], + member_count=3, + center_lat=35.02, + center_lon=129.01, + last_snapshot_time=datetime.now(timezone.utc), + status='ACTIVE', + ) + score, overlap_count, distance_nm = continuity_score(current, previous) + self.assertGreaterEqual(overlap_count, 2) + self.assertGreater(score, 0.45) + self.assertLess(distance_nm, 12.0) + + def test_build_episode_plan_creates_merge_episode(self): + now = datetime.now(timezone.utc) + current = GroupEpisodeInput( + group_key='JINSHI', + normalized_parent_name='JINSHI', + sub_cluster_id=0, + member_mmsis=['a', 'b', 'c', 'd'], + member_count=4, + center_lat=35.0, + center_lon=129.0, + ) + previous_a = EpisodeState( + episode_id='ep-a', + lineage_key='JINSHI', + group_key='JINSHI', + normalized_parent_name='JINSHI', + current_sub_cluster_id=1, + member_mmsis=['a', 'b'], + member_count=2, + center_lat=35.0, + center_lon=129.0, + last_snapshot_time=now - timedelta(minutes=5), + status='ACTIVE', + ) + previous_b = EpisodeState( + episode_id='ep-b', + lineage_key='JINSHI', + group_key='JINSHI', + normalized_parent_name='JINSHI', + current_sub_cluster_id=2, + member_mmsis=['c', 'd'], + member_count=2, + center_lat=35.01, + center_lon=129.01, + last_snapshot_time=now - timedelta(minutes=5), + status='ACTIVE', + ) + plan = build_episode_plan([current], {'JINSHI': [previous_a, previous_b]}) + assignment = plan.assignments[current.key] + self.assertEqual(assignment.continuity_source, 'MERGE_NEW') + self.assertEqual(set(assignment.merged_from_episode_ids), {'ep-a', 'ep-b'}) + self.assertEqual(plan.merged_episode_targets['ep-a'], assignment.episode_id) + self.assertEqual(plan.merged_episode_targets['ep-b'], assignment.episode_id) + + def test_build_episode_plan_marks_split_continue_and_split_new(self): + now = datetime.now(timezone.utc) + previous = EpisodeState( + episode_id='ep-prev', + lineage_key='A01859', + group_key='A01859', + normalized_parent_name='A01859', + current_sub_cluster_id=0, + member_mmsis=['a', 'b', 'c', 'd'], + member_count=4, + center_lat=35.0, + center_lon=129.0, + last_snapshot_time=now - timedelta(minutes=5), + status='ACTIVE', + ) + current_a = GroupEpisodeInput( + group_key='A01859', + normalized_parent_name='A01859', + sub_cluster_id=1, + member_mmsis=['a', 'b', 'c'], + member_count=3, + center_lat=35.0, + center_lon=129.0, + ) + current_b = GroupEpisodeInput( + group_key='A01859', + normalized_parent_name='A01859', + sub_cluster_id=2, + member_mmsis=['c', 'd'], + member_count=2, + center_lat=35.02, + center_lon=129.02, + ) + plan = build_episode_plan([current_a, current_b], {'A01859': [previous]}) + sources = {plan.assignments[current_a.key].continuity_source, plan.assignments[current_b.key].continuity_source} + self.assertIn('SPLIT_CONTINUE', sources) + self.assertIn('SPLIT_NEW', sources) + + def test_compute_prior_bonus_components_caps_total_bonus(self): + observed_at = datetime.now(timezone.utc) + bonuses = compute_prior_bonus_components( + observed_at=observed_at, + normalized_parent_name='JINSHI', + episode_id='ep-1', + candidate_mmsi='412333326', + episode_prior_stats={ + ('ep-1', '412333326'): { + 'seen_count': 12, + 'top1_count': 5, + 'avg_score': 0.88, + 'last_seen_at': observed_at - timedelta(hours=1), + }, + }, + lineage_prior_stats={ + ('JINSHI', '412333326'): { + 'seen_count': 24, + 'top1_count': 6, + 'top3_count': 10, + 'avg_score': 0.82, + 'last_seen_at': observed_at - timedelta(hours=3), + }, + }, + label_prior_stats={ + ('JINSHI', '412333326'): { + 'session_count': 4, + 'last_labeled_at': observed_at - timedelta(days=1), + }, + }, + ) + self.assertGreater(bonuses['episodePriorBonus'], 0.0) + self.assertGreater(bonuses['lineagePriorBonus'], 0.0) + self.assertGreater(bonuses['labelPriorBonus'], 0.0) + self.assertLessEqual(bonuses['priorBonusTotal'], 0.20) + + +if __name__ == '__main__': + unittest.main() diff --git a/prediction/tests/test_gear_parent_inference.py b/prediction/tests/test_gear_parent_inference.py new file mode 100644 index 0000000..fdee2af --- /dev/null +++ b/prediction/tests/test_gear_parent_inference.py @@ -0,0 +1,279 @@ +import unittest +import sys +import types +from datetime import datetime, timedelta, timezone + +stub = types.ModuleType('pydantic_settings') + + +class BaseSettings: + def __init__(self, **kwargs): + for name, value in self.__class__.__dict__.items(): + if name.isupper(): + setattr(self, name, kwargs.get(name, value)) + + +stub.BaseSettings = BaseSettings +sys.modules.setdefault('pydantic_settings', stub) + +from algorithms.gear_parent_inference import ( + RegistryVessel, + CandidateScore, + _AUTO_PROMOTED_STATUS, + _apply_final_score_bonus, + _build_track_coverage_metrics, + _build_candidate_scores, + _china_mmsi_prefix_bonus, + _direct_parent_member, + _direct_parent_stable_cycles, + _label_tracking_row, + _NO_CANDIDATE_STATUS, + _REVIEW_REQUIRED_STATUS, + _UNRESOLVED_STATUS, + _name_match_score, + _select_status, + _top_candidate_stable_cycles, + is_trackable_parent_name, + normalize_parent_name, +) + + +class GearParentInferenceRuleTest(unittest.TestCase): + def _candidate(self, *, mmsi='123456789', score=0.8, sources=None): + return CandidateScore( + mmsi=mmsi, + name='TEST', + vessel_id=1, + target_type='VESSEL', + candidate_source=','.join(sources or ['CORRELATION']), + base_corr_score=0.7, + name_match_score=0.1, + track_similarity_score=0.8, + visit_score_6h=0.4, + proximity_score_6h=0.3, + activity_sync_score_6h=0.2, + stability_score=0.9, + registry_bonus=0.05, + episode_prior_bonus=0.0, + lineage_prior_bonus=0.0, + label_prior_bonus=0.0, + final_score=score, + streak_count=6, + model_id=1, + model_name='default', + evidence={'sources': sources or ['CORRELATION']}, + ) + + def test_normalize_parent_name_removes_space_symbols(self): + self.assertEqual(normalize_parent_name(' A_B-C% 12 '), 'ABC12') + + def test_trackable_parent_name_requires_length_four_after_normalize(self): + self.assertFalse(is_trackable_parent_name('A-1%')) + self.assertFalse(is_trackable_parent_name('ZSY')) + self.assertFalse(is_trackable_parent_name('991')) + self.assertTrue(is_trackable_parent_name(' AB_12 ')) + + def test_name_match_score_prefers_raw_exact(self): + self.assertEqual(_name_match_score('LUWENYU 53265', 'LUWENYU 53265', None), 1.0) + + def test_name_match_score_supports_compact_exact_and_prefix(self): + registry = RegistryVessel( + vessel_id=1, + mmsi='412327765', + name_cn='LUWENYU53265', + name_en='LUWENYU 53265', + ) + self.assertEqual(_name_match_score('LUWENYU 53265', 'LUWENYU53265', None), 0.8) + self.assertEqual(_name_match_score('LUWENYU 532', 'LUWENYU53265', None), 0.5) + self.assertEqual(_name_match_score('LUWENYU 53265', 'DIFFERENT', registry), 1.0) + self.assertEqual(_name_match_score('ZHEDAIYU02433', 'ZHEDAIYU06178', None), 0.3) + + def test_name_match_score_does_not_use_candidate_registry_self_match(self): + registry = RegistryVessel( + vessel_id=1, + mmsi='412413545', + name_cn='ZHEXIANGYU55005', + name_en='ZHEXIANGYU55005', + ) + self.assertEqual(_name_match_score('JINSHI', 'ZHEXIANGYU55005', registry), 0.0) + + def test_direct_parent_member_prefers_parent_member_then_parent_mmsi(self): + all_positions = {'412420673': {'name': 'ZHEDAIYU02433'}} + from_members = _direct_parent_member( + { + 'parent_name': 'ZHEDAIYU02433', + 'members': [ + {'mmsi': '412420673', 'name': 'ZHEDAIYU02433', 'isParent': True}, + {'mmsi': '24330082', 'name': 'ZHEDAIYU02433_82_99_', 'isParent': False}, + ], + }, + all_positions, + ) + self.assertEqual(from_members['mmsi'], '412420673') + + from_parent_mmsi = _direct_parent_member( + { + 'parent_name': 'ZHEDAIYU02433', + 'parent_mmsi': '412420673', + 'members': [], + }, + all_positions, + ) + self.assertEqual(from_parent_mmsi['mmsi'], '412420673') + self.assertEqual(from_parent_mmsi['name'], 'ZHEDAIYU02433') + + def test_direct_parent_stable_cycles_reuses_same_parent(self): + existing = { + 'selected_parent_mmsi': '412420673', + 'stable_cycles': 4, + 'evidence_summary': {'directParentMmsi': '412420673'}, + } + self.assertEqual(_direct_parent_stable_cycles(existing, '412420673'), 5) + self.assertEqual(_direct_parent_stable_cycles(existing, '412000000'), 1) + + def test_china_prefix_bonus_requires_threshold(self): + self.assertEqual(_china_mmsi_prefix_bonus('412327765', 0.30), 0.15) + self.assertEqual(_china_mmsi_prefix_bonus('413987654', 0.65), 0.15) + self.assertEqual(_china_mmsi_prefix_bonus('412327765', 0.29), 0.0) + self.assertEqual(_china_mmsi_prefix_bonus('440123456', 0.75), 0.0) + + def test_apply_final_score_bonus_adds_bonus_after_weighted_score(self): + pre_bonus_score, china_bonus, final_score = _apply_final_score_bonus('412333326', 0.66) + self.assertIsInstance(pre_bonus_score, float) + self.assertIsInstance(china_bonus, float) + self.assertIsInstance(final_score, float) + self.assertEqual(pre_bonus_score, 0.66) + self.assertEqual(china_bonus, 0.15) + self.assertEqual(final_score, 0.81) + + def test_top_candidate_stable_cycles_resets_on_candidate_change(self): + existing = { + 'stable_cycles': 5, + 'evidence_summary': {'topCandidateMmsi': '111111111'}, + } + self.assertEqual(_top_candidate_stable_cycles(existing, self._candidate(mmsi='111111111')), 6) + self.assertEqual(_top_candidate_stable_cycles(existing, self._candidate(mmsi='222222222')), 1) + + def test_select_status_requires_recent_stability_and_correlation_for_auto(self): + self.assertEqual( + _select_status(self._candidate(score=0.8, sources=['CORRELATION']), margin=0.2, stable_cycles=3), + (_AUTO_PROMOTED_STATUS, 'AUTO_PROMOTION'), + ) + self.assertEqual( + _select_status(self._candidate(score=0.8, sources=['PREVIOUS_SELECTION']), margin=0.2, stable_cycles=3), + (_REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW'), + ) + self.assertEqual( + _select_status(self._candidate(score=0.8, sources=['CORRELATION']), margin=0.2, stable_cycles=2), + (_REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW'), + ) + + def test_select_status_marks_candidate_gaps_explicitly(self): + self.assertEqual(_select_status(None, margin=0.0, stable_cycles=0), (_NO_CANDIDATE_STATUS, 'AUTO_NO_CANDIDATE')) + self.assertEqual( + _select_status(self._candidate(score=0.45, sources=['CORRELATION']), margin=0.1, stable_cycles=1), + (_UNRESOLVED_STATUS, 'AUTO_SCORE'), + ) + + def test_build_candidate_scores_applies_active_exclusions_before_scoring(self): + class FakeStore: + _tracks = {} + + candidates = _build_candidate_scores( + vessel_store=FakeStore(), + observed_at=datetime(2026, 4, 3, 0, 0, tzinfo=timezone.utc), + group={'parent_name': 'AB1234', 'sub_cluster_id': 1}, + episode_assignment=types.SimpleNamespace( + episode_id='ep-test', + continuity_source='NEW', + continuity_score=0.0, + ), + default_model_id=1, + default_model_name='default', + score_rows=[ + { + 'target_mmsi': '412111111', + 'target_type': 'VESSEL', + 'target_name': 'AB1234', + 'current_score': 0.8, + 'streak_count': 4, + }, + { + 'target_mmsi': '440222222', + 'target_type': 'VESSEL', + 'target_name': 'AB1234', + 'current_score': 0.7, + 'streak_count': 3, + }, + ], + raw_metrics={}, + center_track=[], + all_positions={}, + registry_by_mmsi={}, + registry_by_name={}, + existing=None, + excluded_candidate_mmsis={'412111111'}, + episode_prior_stats={}, + lineage_prior_stats={}, + label_prior_stats={}, + ) + self.assertEqual([candidate.mmsi for candidate in candidates], ['440222222']) + + def test_track_coverage_metrics_penalize_short_track_support(self): + now = datetime(2026, 4, 3, 0, 0, tzinfo=timezone.utc) + center_track = [ + {'timestamp': now - timedelta(hours=5), 'lat': 35.0, 'lon': 129.0}, + {'timestamp': now - timedelta(hours=1), 'lat': 35.1, 'lon': 129.1}, + ] + short_track = [ + {'timestamp': now - timedelta(minutes=10), 'lat': 35.1, 'lon': 129.1, 'sog': 0.5}, + ] + long_track = [ + {'timestamp': now - timedelta(minutes=90) + timedelta(minutes=10 * idx), 'lat': 35.0, 'lon': 129.0 + (0.01 * idx), 'sog': 0.5} + for idx in range(10) + ] + + short_metrics = _build_track_coverage_metrics(center_track, short_track, 35.05, 129.05) + long_metrics = _build_track_coverage_metrics(center_track, long_track, 35.05, 129.05) + + self.assertEqual(short_metrics['trackPointCount'], 1) + self.assertEqual(short_metrics['trackCoverageFactor'], 0.0) + self.assertGreater(long_metrics['trackCoverageFactor'], 0.0) + self.assertGreater(long_metrics['coverageFactor'], short_metrics['coverageFactor']) + + def test_label_tracking_row_tracks_rank_and_match_flags(self): + top_candidate = self._candidate(mmsi='412333326', score=0.81, sources=['CORRELATION']) + top_candidate.evidence = { + 'sources': ['CORRELATION'], + 'scoreBreakdown': {'preBonusScore': 0.66}, + } + labeled_candidate = self._candidate(mmsi='440123456', score=0.62, sources=['CORRELATION']) + labeled_candidate.evidence = { + 'sources': ['CORRELATION'], + 'scoreBreakdown': {'preBonusScore': 0.62}, + } + + row = _label_tracking_row( + observed_at='2026-04-03T00:00:00Z', + label_session={ + 'id': 10, + 'label_parent_mmsi': '440123456', + 'label_parent_name': 'TARGET', + }, + auto_status='REVIEW_REQUIRED', + top_candidate=top_candidate, + margin=0.19, + candidates=[top_candidate, labeled_candidate], + ) + self.assertEqual(row[0], 10) + self.assertEqual(row[8], 2) + self.assertTrue(row[9]) + self.assertEqual(row[10], 2) + self.assertEqual(row[11], 0.62) + self.assertEqual(row[12], 0.62) + self.assertFalse(row[14]) + self.assertTrue(row[15]) + + +if __name__ == '__main__': + unittest.main() diff --git a/prediction/tests/test_time_bucket.py b/prediction/tests/test_time_bucket.py new file mode 100644 index 0000000..c9d091c --- /dev/null +++ b/prediction/tests/test_time_bucket.py @@ -0,0 +1,90 @@ +import unittest +import sys +import types +from datetime import datetime, timezone +from zoneinfo import ZoneInfo + +import pandas as pd + +stub = types.ModuleType('pydantic_settings') + + +class BaseSettings: + def __init__(self, **kwargs): + for name, value in self.__class__.__dict__.items(): + if name.isupper(): + setattr(self, name, kwargs.get(name, value)) + + +stub.BaseSettings = BaseSettings +sys.modules.setdefault('pydantic_settings', stub) + +from cache.vessel_store import VesselStore +from time_bucket import compute_incremental_window_start, compute_initial_window_start, compute_safe_bucket + + +class TimeBucketRuleTest(unittest.TestCase): + def test_safe_bucket_uses_delay_then_floors_to_5m(self): + now = datetime(2026, 4, 2, 15, 14, 0, tzinfo=ZoneInfo('Asia/Seoul')) + self.assertEqual(compute_safe_bucket(now), datetime(2026, 4, 2, 15, 0, 0)) + + def test_incremental_window_includes_overlap_buckets(self): + last_bucket = datetime(2026, 4, 2, 15, 0, 0) + self.assertEqual(compute_incremental_window_start(last_bucket), datetime(2026, 4, 2, 14, 45, 0)) + + def test_initial_window_start_anchors_to_safe_bucket(self): + safe_bucket = datetime(2026, 4, 2, 15, 0, 0) + self.assertEqual(compute_initial_window_start(24, safe_bucket), datetime(2026, 4, 1, 15, 0, 0)) + + def test_merge_incremental_prefers_newer_overlap_rows(self): + store = VesselStore() + store._tracks = { + '412000001': pd.DataFrame([ + { + 'mmsi': '412000001', + 'timestamp': pd.Timestamp('2026-04-02T00:01:00Z'), + 'time_bucket': datetime(2026, 4, 2, 9, 0, 0), + 'lat': 30.0, + 'lon': 120.0, + 'raw_sog': 1.0, + }, + { + 'mmsi': '412000001', + 'timestamp': pd.Timestamp('2026-04-02T00:02:00Z'), + 'time_bucket': datetime(2026, 4, 2, 9, 0, 0), + 'lat': 30.1, + 'lon': 120.1, + 'raw_sog': 1.0, + }, + ]) + } + df_new = pd.DataFrame([ + { + 'mmsi': '412000001', + 'timestamp': pd.Timestamp('2026-04-02T00:02:00Z'), + 'time_bucket': datetime(2026, 4, 2, 9, 0, 0), + 'lat': 30.2, + 'lon': 120.2, + 'raw_sog': 2.0, + }, + { + 'mmsi': '412000001', + 'timestamp': pd.Timestamp('2026-04-02T00:03:00Z'), + 'time_bucket': datetime(2026, 4, 2, 9, 5, 0), + 'lat': 30.3, + 'lon': 120.3, + 'raw_sog': 2.0, + }, + ]) + + store.merge_incremental(df_new) + + merged = store._tracks['412000001'] + self.assertEqual(len(merged), 3) + replacement = merged.loc[merged['timestamp'] == pd.Timestamp('2026-04-02T00:02:00Z')].iloc[0] + self.assertEqual(float(replacement['lat']), 30.2) + self.assertEqual(float(replacement['lon']), 120.2) + + +if __name__ == '__main__': + unittest.main() diff --git a/prediction/time_bucket.py b/prediction/time_bucket.py new file mode 100644 index 0000000..2cc741d --- /dev/null +++ b/prediction/time_bucket.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from datetime import datetime, timedelta, timezone +from zoneinfo import ZoneInfo + +from config import settings + +_KST = ZoneInfo('Asia/Seoul') +_BUCKET_MINUTES = 5 + + +def normalize_bucket_kst(bucket: datetime) -> datetime: + if bucket.tzinfo is None: + return bucket + return bucket.astimezone(_KST).replace(tzinfo=None) + + +def floor_bucket_kst(value: datetime, bucket_minutes: int = _BUCKET_MINUTES) -> datetime: + if value.tzinfo is None: + localized = value.replace(tzinfo=_KST) + else: + localized = value.astimezone(_KST) + floored_minute = (localized.minute // bucket_minutes) * bucket_minutes + return localized.replace(minute=floored_minute, second=0, microsecond=0) + + +def compute_safe_bucket(now: datetime | None = None) -> datetime: + current = now or datetime.now(timezone.utc) + if current.tzinfo is None: + current = current.replace(tzinfo=timezone.utc) + safe_point = current.astimezone(_KST) - timedelta(minutes=settings.SNPDB_SAFE_DELAY_MIN) + return floor_bucket_kst(safe_point).replace(tzinfo=None) + + +def compute_initial_window_start(hours: int, safe_bucket: datetime | None = None) -> datetime: + anchor = normalize_bucket_kst(safe_bucket or compute_safe_bucket()) + return anchor - timedelta(hours=hours) + + +def compute_incremental_window_start(last_bucket: datetime) -> datetime: + normalized = normalize_bucket_kst(last_bucket) + return normalized - timedelta(minutes=settings.SNPDB_BACKFILL_BUCKETS * _BUCKET_MINUTES)