kcg-ai-monitoring/prediction/output/violation_classifier.py

"""
위반 유형 라벨링 — 분석 결과에 violation_categories[] 태깅.

vessel_analysis_results의 각 행에 대해 5개 위반 카테고리를 판정하고
violation_categories TEXT[] 컬럼을 업데이트합니다.
"""
import logging
from psycopg2.extras import execute_batch

from config import qualified_table
from db.kcgdb import get_conn

logger = logging.getLogger(__name__)

VAR_TABLE = qualified_table('vessel_analysis_results')


def classify_violations(result: dict) -> list[str]:
    """단일 분석 결과에 대해 위반 유형 리스트 반환."""
    violations = []

    zone = result.get('zone_code', '') or ''
    risk_score = result.get('risk_score', 0) or 0
    is_dark = result.get('is_dark', False)
    spoofing = result.get('spoofing_score', 0) or 0
    transship = result.get('transship_suspect', False)
    permit = result.get('permit_status', 'UNKNOWN') or 'UNKNOWN'
    gap_min = result.get('gap_duration_min', 0) or 0

    # EEZ 침범
    if zone in ('NLL', 'SPECIAL_FISHING_1', 'SPECIAL_FISHING_2',
                'SPECIAL_FISHING_3', 'SPECIAL_FISHING_4', 'EEZ_KR'):
        if permit in ('NONE', 'EXPIRED', 'REVOKED'):
            violations.append('EEZ_VIOLATION')

    # 다크베셀
    if is_dark and gap_min > 30:
        violations.append('DARK_VESSEL')

    # MMSI 변조
    if spoofing > 0.6:
        violations.append('MMSI_TAMPERING')

    # 불법환적
    if transship:
        violations.append('ILLEGAL_TRANSSHIP')

    # 어구 불법 (gear_judgment이 있는 경우)
    gear_judgment = result.get('gear_judgment', '') or ''
    if gear_judgment in ('NO_PERMIT', 'GEAR_MISMATCH', 'ZONE_VIOLATION', 'SEASON_VIOLATION'):
        violations.append('ILLEGAL_GEAR')

    # 위험 행동 (다른 위반 없이 고위험)
    if not violations and risk_score >= 70:
        violations.append('RISK_BEHAVIOR')

    return violations


def run_violation_classifier(analysis_results: list[dict]) -> dict:
    """
    분석 결과 리스트에 위반 카테고리를 라벨링하고 DB 업데이트.

    Returns:
        { 'classified': int, 'violations_found': int }
    """
    updates = []
    violations_found = 0

    for result in analysis_results:
        violations = classify_violations(result)
        result_id = result.get('id')
        if result_id and violations:
            updates.append((violations, result_id))
            violations_found += len(violations)

    if updates:
        with get_conn() as conn:
            execute_batch(
                conn.cursor(),
                f"UPDATE {VAR_TABLE} SET violation_categories = %s WHERE id = %s",
                updates,
            )
            conn.commit()

    logger.info(f'violation_classifier: classified={len(updates)}, violations={violations_found}')
    return {'classified': len(updates), 'violations_found': violations_found}