kcg-ai-monitoring/prediction/tests/test_gear_parent_inference.py
htlee e2fc355b2c feat: S2 prediction 분석 엔진 모노레포 이식
iran prediction 47개 Python 파일을 prediction/ 디렉토리로 복제:
- algorithms/ 14개 분석 알고리즘 (어구추론, 다크베셀, 스푸핑, 환적, 위험도 등)
- pipeline/ 7단계 분류 파이프라인
- cache/vessel_store (24h 슬라이딩 윈도우)
- db/ 어댑터 (snpdb 원본조회, kcgdb 결과저장)
- chat/ AI 채팅 (Ollama, 후순위)
- data/ 정적 데이터 (기선, 특정어업수역 GeoJSON)

config.py를 kcgaidb로 재구성 (DB명, 사용자, 비밀번호)
DB 연결 검증 완료 (kcgaidb 37개 테이블 접근 확인)
Makefile에 dev-prediction / dev-all 타겟 추가
CLAUDE.md에 prediction 섹션 추가

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 12:56:51 +09:00

280 lines
11 KiB
Python

import unittest
import sys
import types
from datetime import datetime, timedelta, timezone
stub = types.ModuleType('pydantic_settings')
class BaseSettings:
def __init__(self, **kwargs):
for name, value in self.__class__.__dict__.items():
if name.isupper():
setattr(self, name, kwargs.get(name, value))
stub.BaseSettings = BaseSettings
sys.modules.setdefault('pydantic_settings', stub)
from algorithms.gear_parent_inference import (
RegistryVessel,
CandidateScore,
_AUTO_PROMOTED_STATUS,
_apply_final_score_bonus,
_build_track_coverage_metrics,
_build_candidate_scores,
_china_mmsi_prefix_bonus,
_direct_parent_member,
_direct_parent_stable_cycles,
_label_tracking_row,
_NO_CANDIDATE_STATUS,
_REVIEW_REQUIRED_STATUS,
_UNRESOLVED_STATUS,
_name_match_score,
_select_status,
_top_candidate_stable_cycles,
is_trackable_parent_name,
normalize_parent_name,
)
class GearParentInferenceRuleTest(unittest.TestCase):
def _candidate(self, *, mmsi='123456789', score=0.8, sources=None):
return CandidateScore(
mmsi=mmsi,
name='TEST',
vessel_id=1,
target_type='VESSEL',
candidate_source=','.join(sources or ['CORRELATION']),
base_corr_score=0.7,
name_match_score=0.1,
track_similarity_score=0.8,
visit_score_6h=0.4,
proximity_score_6h=0.3,
activity_sync_score_6h=0.2,
stability_score=0.9,
registry_bonus=0.05,
episode_prior_bonus=0.0,
lineage_prior_bonus=0.0,
label_prior_bonus=0.0,
final_score=score,
streak_count=6,
model_id=1,
model_name='default',
evidence={'sources': sources or ['CORRELATION']},
)
def test_normalize_parent_name_removes_space_symbols(self):
self.assertEqual(normalize_parent_name(' A_B-C% 12 '), 'ABC12')
def test_trackable_parent_name_requires_length_four_after_normalize(self):
self.assertFalse(is_trackable_parent_name('A-1%'))
self.assertFalse(is_trackable_parent_name('ZSY'))
self.assertFalse(is_trackable_parent_name('991'))
self.assertTrue(is_trackable_parent_name(' AB_12 '))
def test_name_match_score_prefers_raw_exact(self):
self.assertEqual(_name_match_score('LUWENYU 53265', 'LUWENYU 53265', None), 1.0)
def test_name_match_score_supports_compact_exact_and_prefix(self):
registry = RegistryVessel(
vessel_id=1,
mmsi='412327765',
name_cn='LUWENYU53265',
name_en='LUWENYU 53265',
)
self.assertEqual(_name_match_score('LUWENYU 53265', 'LUWENYU53265', None), 0.8)
self.assertEqual(_name_match_score('LUWENYU 532', 'LUWENYU53265', None), 0.5)
self.assertEqual(_name_match_score('LUWENYU 53265', 'DIFFERENT', registry), 1.0)
self.assertEqual(_name_match_score('ZHEDAIYU02433', 'ZHEDAIYU06178', None), 0.3)
def test_name_match_score_does_not_use_candidate_registry_self_match(self):
registry = RegistryVessel(
vessel_id=1,
mmsi='412413545',
name_cn='ZHEXIANGYU55005',
name_en='ZHEXIANGYU55005',
)
self.assertEqual(_name_match_score('JINSHI', 'ZHEXIANGYU55005', registry), 0.0)
def test_direct_parent_member_prefers_parent_member_then_parent_mmsi(self):
all_positions = {'412420673': {'name': 'ZHEDAIYU02433'}}
from_members = _direct_parent_member(
{
'parent_name': 'ZHEDAIYU02433',
'members': [
{'mmsi': '412420673', 'name': 'ZHEDAIYU02433', 'isParent': True},
{'mmsi': '24330082', 'name': 'ZHEDAIYU02433_82_99_', 'isParent': False},
],
},
all_positions,
)
self.assertEqual(from_members['mmsi'], '412420673')
from_parent_mmsi = _direct_parent_member(
{
'parent_name': 'ZHEDAIYU02433',
'parent_mmsi': '412420673',
'members': [],
},
all_positions,
)
self.assertEqual(from_parent_mmsi['mmsi'], '412420673')
self.assertEqual(from_parent_mmsi['name'], 'ZHEDAIYU02433')
def test_direct_parent_stable_cycles_reuses_same_parent(self):
existing = {
'selected_parent_mmsi': '412420673',
'stable_cycles': 4,
'evidence_summary': {'directParentMmsi': '412420673'},
}
self.assertEqual(_direct_parent_stable_cycles(existing, '412420673'), 5)
self.assertEqual(_direct_parent_stable_cycles(existing, '412000000'), 1)
def test_china_prefix_bonus_requires_threshold(self):
self.assertEqual(_china_mmsi_prefix_bonus('412327765', 0.30), 0.15)
self.assertEqual(_china_mmsi_prefix_bonus('413987654', 0.65), 0.15)
self.assertEqual(_china_mmsi_prefix_bonus('412327765', 0.29), 0.0)
self.assertEqual(_china_mmsi_prefix_bonus('440123456', 0.75), 0.0)
def test_apply_final_score_bonus_adds_bonus_after_weighted_score(self):
pre_bonus_score, china_bonus, final_score = _apply_final_score_bonus('412333326', 0.66)
self.assertIsInstance(pre_bonus_score, float)
self.assertIsInstance(china_bonus, float)
self.assertIsInstance(final_score, float)
self.assertEqual(pre_bonus_score, 0.66)
self.assertEqual(china_bonus, 0.15)
self.assertEqual(final_score, 0.81)
def test_top_candidate_stable_cycles_resets_on_candidate_change(self):
existing = {
'stable_cycles': 5,
'evidence_summary': {'topCandidateMmsi': '111111111'},
}
self.assertEqual(_top_candidate_stable_cycles(existing, self._candidate(mmsi='111111111')), 6)
self.assertEqual(_top_candidate_stable_cycles(existing, self._candidate(mmsi='222222222')), 1)
def test_select_status_requires_recent_stability_and_correlation_for_auto(self):
self.assertEqual(
_select_status(self._candidate(score=0.8, sources=['CORRELATION']), margin=0.2, stable_cycles=3),
(_AUTO_PROMOTED_STATUS, 'AUTO_PROMOTION'),
)
self.assertEqual(
_select_status(self._candidate(score=0.8, sources=['PREVIOUS_SELECTION']), margin=0.2, stable_cycles=3),
(_REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW'),
)
self.assertEqual(
_select_status(self._candidate(score=0.8, sources=['CORRELATION']), margin=0.2, stable_cycles=2),
(_REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW'),
)
def test_select_status_marks_candidate_gaps_explicitly(self):
self.assertEqual(_select_status(None, margin=0.0, stable_cycles=0), (_NO_CANDIDATE_STATUS, 'AUTO_NO_CANDIDATE'))
self.assertEqual(
_select_status(self._candidate(score=0.45, sources=['CORRELATION']), margin=0.1, stable_cycles=1),
(_UNRESOLVED_STATUS, 'AUTO_SCORE'),
)
def test_build_candidate_scores_applies_active_exclusions_before_scoring(self):
class FakeStore:
_tracks = {}
candidates = _build_candidate_scores(
vessel_store=FakeStore(),
observed_at=datetime(2026, 4, 3, 0, 0, tzinfo=timezone.utc),
group={'parent_name': 'AB1234', 'sub_cluster_id': 1},
episode_assignment=types.SimpleNamespace(
episode_id='ep-test',
continuity_source='NEW',
continuity_score=0.0,
),
default_model_id=1,
default_model_name='default',
score_rows=[
{
'target_mmsi': '412111111',
'target_type': 'VESSEL',
'target_name': 'AB1234',
'current_score': 0.8,
'streak_count': 4,
},
{
'target_mmsi': '440222222',
'target_type': 'VESSEL',
'target_name': 'AB1234',
'current_score': 0.7,
'streak_count': 3,
},
],
raw_metrics={},
center_track=[],
all_positions={},
registry_by_mmsi={},
registry_by_name={},
existing=None,
excluded_candidate_mmsis={'412111111'},
episode_prior_stats={},
lineage_prior_stats={},
label_prior_stats={},
)
self.assertEqual([candidate.mmsi for candidate in candidates], ['440222222'])
def test_track_coverage_metrics_penalize_short_track_support(self):
now = datetime(2026, 4, 3, 0, 0, tzinfo=timezone.utc)
center_track = [
{'timestamp': now - timedelta(hours=5), 'lat': 35.0, 'lon': 129.0},
{'timestamp': now - timedelta(hours=1), 'lat': 35.1, 'lon': 129.1},
]
short_track = [
{'timestamp': now - timedelta(minutes=10), 'lat': 35.1, 'lon': 129.1, 'sog': 0.5},
]
long_track = [
{'timestamp': now - timedelta(minutes=90) + timedelta(minutes=10 * idx), 'lat': 35.0, 'lon': 129.0 + (0.01 * idx), 'sog': 0.5}
for idx in range(10)
]
short_metrics = _build_track_coverage_metrics(center_track, short_track, 35.05, 129.05)
long_metrics = _build_track_coverage_metrics(center_track, long_track, 35.05, 129.05)
self.assertEqual(short_metrics['trackPointCount'], 1)
self.assertEqual(short_metrics['trackCoverageFactor'], 0.0)
self.assertGreater(long_metrics['trackCoverageFactor'], 0.0)
self.assertGreater(long_metrics['coverageFactor'], short_metrics['coverageFactor'])
def test_label_tracking_row_tracks_rank_and_match_flags(self):
top_candidate = self._candidate(mmsi='412333326', score=0.81, sources=['CORRELATION'])
top_candidate.evidence = {
'sources': ['CORRELATION'],
'scoreBreakdown': {'preBonusScore': 0.66},
}
labeled_candidate = self._candidate(mmsi='440123456', score=0.62, sources=['CORRELATION'])
labeled_candidate.evidence = {
'sources': ['CORRELATION'],
'scoreBreakdown': {'preBonusScore': 0.62},
}
row = _label_tracking_row(
observed_at='2026-04-03T00:00:00Z',
label_session={
'id': 10,
'label_parent_mmsi': '440123456',
'label_parent_name': 'TARGET',
},
auto_status='REVIEW_REQUIRED',
top_candidate=top_candidate,
margin=0.19,
candidates=[top_candidate, labeled_candidate],
)
self.assertEqual(row[0], 10)
self.assertEqual(row[8], 2)
self.assertTrue(row[9])
self.assertEqual(row[10], 2)
self.assertEqual(row[11], 0.62)
self.assertEqual(row[12], 0.62)
self.assertFalse(row[14])
self.assertTrue(row[15])
if __name__ == '__main__':
unittest.main()