kcg-monitoring/prediction/pipeline/classifier.py
htlee a68dfb21b2 feat: Python 어선 분류기 + 배포 설정 + 백엔드 모니터링 프록시
- prediction/: FastAPI 7단계 분류 파이프라인 + 6개 탐지 알고리즘
  - snpdb 궤적 조회 → 인메모리 캐시(13K척) → 분류 → kcgdb 저장
  - APScheduler 5분 주기, Python 3.9 호환
  - 버그 수정: @property last_bucket, SQL INTERVAL 바인딩, rollback, None 가드
  - 보안: DB 비밀번호 하드코딩 제거 → env 환경변수 필수
- deploy/kcg-prediction.service: systemd 서비스 (redis-211, 포트 8001)
- deploy.yml: prediction CI/CD 배포 단계 추가 (192.168.1.18:32023)
- backend: PredictionProxyController (health/status/trigger 프록시)
- backend: AppProperties predictionBaseUrl + AuthFilter 인증 예외

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 12:10:21 +09:00

101 lines
3.4 KiB
Python
Raw Blame 히스토리

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
from typing import Dict, Tuple
class VesselTypeClassifier:
"""
Rule-based scoring classifier for fishing vessel types.
Scoring: for each feature in a type's profile, if the value falls within
the defined range a distance-based score is added (closer to the range
centre = higher score). Values outside the range incur a penalty.
Returns (vessel_type, confidence).
TRAWL — trawling speed 2.54.5 kt, high COG variation
PURSE — purse-seine speed 35 kt, circular COG pattern
LONGLINE — longline speed 0.52 kt, low COG variation, long fishing runs
TRAP — trap/pot speed ~0 kt, many stationary events, short range
"""
PROFILES: Dict[str, Dict[str, Tuple[float, float]]] = {
'TRAWL': {
'sog_fishing_mean': (2.5, 4.5),
'cog_change_mean': (0.15, 9.9),
'fishing_pct': (0.3, 0.7),
'fishing_run_mean': (5, 50),
'stationary_events': (0, 5),
},
'PURSE': {
'sog_fishing_mean': (3.0, 5.0),
'cog_circularity': (0.2, 1.0),
'fishing_pct': (0.1, 0.5),
'fishing_run_mean': (3, 30),
'stationary_events': (0, 3),
},
'LONGLINE': {
'sog_fishing_mean': (0.5, 2.5),
'cog_change_mean': (0.0, 0.15),
'fishing_pct': (0.4, 0.9),
'fishing_run_mean': (20, 999),
'stationary_events': (0, 10),
},
'TRAP': {
'sog_fishing_mean': (0.0, 2.0),
'stationary_pct': (0.2, 0.8),
'stationary_events': (5, 999),
'fishing_run_mean': (1, 10),
'total_distance_km': (0, 100),
},
}
def classify(self, features: Dict) -> Tuple[str, float]:
"""Classify a vessel from its feature dict.
Returns:
(vessel_type, confidence) where confidence is in [0, 1].
"""
if not features:
return 'UNKNOWN', 0.0
scores: Dict[str, float] = {}
for vtype, profile in self.PROFILES.items():
score = 0.0
matched = 0
for feat_name, (lo, hi) in profile.items():
val = features.get(feat_name)
if val is None:
continue
matched += 1
if lo <= val <= hi:
mid = (lo + hi) / 2
span = (hi - lo) / 2 if (hi - lo) > 0 else 1
score += max(0.0, 1 - abs(val - mid) / span)
else:
overshoot = min(abs(val - lo), abs(val - hi))
score -= min(0.5, overshoot / (hi - lo + 1e-9))
scores[vtype] = score / matched if matched > 0 else 0.0
best_type = max(scores, key=lambda k: scores[k])
total = sum(max(v, 0.0) for v in scores.values())
confidence = scores[best_type] / total if total > 0 else 0.0
return best_type, round(confidence, 3)
def get_season(ts: pd.Timestamp) -> str:
"""Return the Northern-Hemisphere season for a timestamp.
Reference: paper 12 seasonal activity analysis (Chinese EEZ).
Chinese fishing ban period: Yellow Sea / East China Sea MaySep,
South China Sea MayAug.
"""
m = ts.month
if m in [3, 4, 5]:
return 'SPRING'
elif m in [6, 7, 8]:
return 'SUMMER'
elif m in [9, 10, 11]:
return 'FALL'
else:
return 'WINTER'