iran prediction 47개 Python 파일을 prediction/ 디렉토리로 복제: - algorithms/ 14개 분석 알고리즘 (어구추론, 다크베셀, 스푸핑, 환적, 위험도 등) - pipeline/ 7단계 분류 파이프라인 - cache/vessel_store (24h 슬라이딩 윈도우) - db/ 어댑터 (snpdb 원본조회, kcgdb 결과저장) - chat/ AI 채팅 (Ollama, 후순위) - data/ 정적 데이터 (기선, 특정어업수역 GeoJSON) config.py를 kcgaidb로 재구성 (DB명, 사용자, 비밀번호) DB 연결 검증 완료 (kcgaidb 37개 테이블 접근 확인) Makefile에 dev-prediction / dev-all 타겟 추가 CLAUDE.md에 prediction 섹션 추가 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
280 lines
11 KiB
Python
280 lines
11 KiB
Python
import unittest
|
|
import sys
|
|
import types
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
stub = types.ModuleType('pydantic_settings')
|
|
|
|
|
|
class BaseSettings:
|
|
def __init__(self, **kwargs):
|
|
for name, value in self.__class__.__dict__.items():
|
|
if name.isupper():
|
|
setattr(self, name, kwargs.get(name, value))
|
|
|
|
|
|
stub.BaseSettings = BaseSettings
|
|
sys.modules.setdefault('pydantic_settings', stub)
|
|
|
|
from algorithms.gear_parent_inference import (
|
|
RegistryVessel,
|
|
CandidateScore,
|
|
_AUTO_PROMOTED_STATUS,
|
|
_apply_final_score_bonus,
|
|
_build_track_coverage_metrics,
|
|
_build_candidate_scores,
|
|
_china_mmsi_prefix_bonus,
|
|
_direct_parent_member,
|
|
_direct_parent_stable_cycles,
|
|
_label_tracking_row,
|
|
_NO_CANDIDATE_STATUS,
|
|
_REVIEW_REQUIRED_STATUS,
|
|
_UNRESOLVED_STATUS,
|
|
_name_match_score,
|
|
_select_status,
|
|
_top_candidate_stable_cycles,
|
|
is_trackable_parent_name,
|
|
normalize_parent_name,
|
|
)
|
|
|
|
|
|
class GearParentInferenceRuleTest(unittest.TestCase):
|
|
def _candidate(self, *, mmsi='123456789', score=0.8, sources=None):
|
|
return CandidateScore(
|
|
mmsi=mmsi,
|
|
name='TEST',
|
|
vessel_id=1,
|
|
target_type='VESSEL',
|
|
candidate_source=','.join(sources or ['CORRELATION']),
|
|
base_corr_score=0.7,
|
|
name_match_score=0.1,
|
|
track_similarity_score=0.8,
|
|
visit_score_6h=0.4,
|
|
proximity_score_6h=0.3,
|
|
activity_sync_score_6h=0.2,
|
|
stability_score=0.9,
|
|
registry_bonus=0.05,
|
|
episode_prior_bonus=0.0,
|
|
lineage_prior_bonus=0.0,
|
|
label_prior_bonus=0.0,
|
|
final_score=score,
|
|
streak_count=6,
|
|
model_id=1,
|
|
model_name='default',
|
|
evidence={'sources': sources or ['CORRELATION']},
|
|
)
|
|
|
|
def test_normalize_parent_name_removes_space_symbols(self):
|
|
self.assertEqual(normalize_parent_name(' A_B-C% 12 '), 'ABC12')
|
|
|
|
def test_trackable_parent_name_requires_length_four_after_normalize(self):
|
|
self.assertFalse(is_trackable_parent_name('A-1%'))
|
|
self.assertFalse(is_trackable_parent_name('ZSY'))
|
|
self.assertFalse(is_trackable_parent_name('991'))
|
|
self.assertTrue(is_trackable_parent_name(' AB_12 '))
|
|
|
|
def test_name_match_score_prefers_raw_exact(self):
|
|
self.assertEqual(_name_match_score('LUWENYU 53265', 'LUWENYU 53265', None), 1.0)
|
|
|
|
def test_name_match_score_supports_compact_exact_and_prefix(self):
|
|
registry = RegistryVessel(
|
|
vessel_id=1,
|
|
mmsi='412327765',
|
|
name_cn='LUWENYU53265',
|
|
name_en='LUWENYU 53265',
|
|
)
|
|
self.assertEqual(_name_match_score('LUWENYU 53265', 'LUWENYU53265', None), 0.8)
|
|
self.assertEqual(_name_match_score('LUWENYU 532', 'LUWENYU53265', None), 0.5)
|
|
self.assertEqual(_name_match_score('LUWENYU 53265', 'DIFFERENT', registry), 1.0)
|
|
self.assertEqual(_name_match_score('ZHEDAIYU02433', 'ZHEDAIYU06178', None), 0.3)
|
|
|
|
def test_name_match_score_does_not_use_candidate_registry_self_match(self):
|
|
registry = RegistryVessel(
|
|
vessel_id=1,
|
|
mmsi='412413545',
|
|
name_cn='ZHEXIANGYU55005',
|
|
name_en='ZHEXIANGYU55005',
|
|
)
|
|
self.assertEqual(_name_match_score('JINSHI', 'ZHEXIANGYU55005', registry), 0.0)
|
|
|
|
def test_direct_parent_member_prefers_parent_member_then_parent_mmsi(self):
|
|
all_positions = {'412420673': {'name': 'ZHEDAIYU02433'}}
|
|
from_members = _direct_parent_member(
|
|
{
|
|
'parent_name': 'ZHEDAIYU02433',
|
|
'members': [
|
|
{'mmsi': '412420673', 'name': 'ZHEDAIYU02433', 'isParent': True},
|
|
{'mmsi': '24330082', 'name': 'ZHEDAIYU02433_82_99_', 'isParent': False},
|
|
],
|
|
},
|
|
all_positions,
|
|
)
|
|
self.assertEqual(from_members['mmsi'], '412420673')
|
|
|
|
from_parent_mmsi = _direct_parent_member(
|
|
{
|
|
'parent_name': 'ZHEDAIYU02433',
|
|
'parent_mmsi': '412420673',
|
|
'members': [],
|
|
},
|
|
all_positions,
|
|
)
|
|
self.assertEqual(from_parent_mmsi['mmsi'], '412420673')
|
|
self.assertEqual(from_parent_mmsi['name'], 'ZHEDAIYU02433')
|
|
|
|
def test_direct_parent_stable_cycles_reuses_same_parent(self):
|
|
existing = {
|
|
'selected_parent_mmsi': '412420673',
|
|
'stable_cycles': 4,
|
|
'evidence_summary': {'directParentMmsi': '412420673'},
|
|
}
|
|
self.assertEqual(_direct_parent_stable_cycles(existing, '412420673'), 5)
|
|
self.assertEqual(_direct_parent_stable_cycles(existing, '412000000'), 1)
|
|
|
|
def test_china_prefix_bonus_requires_threshold(self):
|
|
self.assertEqual(_china_mmsi_prefix_bonus('412327765', 0.30), 0.15)
|
|
self.assertEqual(_china_mmsi_prefix_bonus('413987654', 0.65), 0.15)
|
|
self.assertEqual(_china_mmsi_prefix_bonus('412327765', 0.29), 0.0)
|
|
self.assertEqual(_china_mmsi_prefix_bonus('440123456', 0.75), 0.0)
|
|
|
|
def test_apply_final_score_bonus_adds_bonus_after_weighted_score(self):
|
|
pre_bonus_score, china_bonus, final_score = _apply_final_score_bonus('412333326', 0.66)
|
|
self.assertIsInstance(pre_bonus_score, float)
|
|
self.assertIsInstance(china_bonus, float)
|
|
self.assertIsInstance(final_score, float)
|
|
self.assertEqual(pre_bonus_score, 0.66)
|
|
self.assertEqual(china_bonus, 0.15)
|
|
self.assertEqual(final_score, 0.81)
|
|
|
|
def test_top_candidate_stable_cycles_resets_on_candidate_change(self):
|
|
existing = {
|
|
'stable_cycles': 5,
|
|
'evidence_summary': {'topCandidateMmsi': '111111111'},
|
|
}
|
|
self.assertEqual(_top_candidate_stable_cycles(existing, self._candidate(mmsi='111111111')), 6)
|
|
self.assertEqual(_top_candidate_stable_cycles(existing, self._candidate(mmsi='222222222')), 1)
|
|
|
|
def test_select_status_requires_recent_stability_and_correlation_for_auto(self):
|
|
self.assertEqual(
|
|
_select_status(self._candidate(score=0.8, sources=['CORRELATION']), margin=0.2, stable_cycles=3),
|
|
(_AUTO_PROMOTED_STATUS, 'AUTO_PROMOTION'),
|
|
)
|
|
self.assertEqual(
|
|
_select_status(self._candidate(score=0.8, sources=['PREVIOUS_SELECTION']), margin=0.2, stable_cycles=3),
|
|
(_REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW'),
|
|
)
|
|
self.assertEqual(
|
|
_select_status(self._candidate(score=0.8, sources=['CORRELATION']), margin=0.2, stable_cycles=2),
|
|
(_REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW'),
|
|
)
|
|
|
|
def test_select_status_marks_candidate_gaps_explicitly(self):
|
|
self.assertEqual(_select_status(None, margin=0.0, stable_cycles=0), (_NO_CANDIDATE_STATUS, 'AUTO_NO_CANDIDATE'))
|
|
self.assertEqual(
|
|
_select_status(self._candidate(score=0.45, sources=['CORRELATION']), margin=0.1, stable_cycles=1),
|
|
(_UNRESOLVED_STATUS, 'AUTO_SCORE'),
|
|
)
|
|
|
|
def test_build_candidate_scores_applies_active_exclusions_before_scoring(self):
|
|
class FakeStore:
|
|
_tracks = {}
|
|
|
|
candidates = _build_candidate_scores(
|
|
vessel_store=FakeStore(),
|
|
observed_at=datetime(2026, 4, 3, 0, 0, tzinfo=timezone.utc),
|
|
group={'parent_name': 'AB1234', 'sub_cluster_id': 1},
|
|
episode_assignment=types.SimpleNamespace(
|
|
episode_id='ep-test',
|
|
continuity_source='NEW',
|
|
continuity_score=0.0,
|
|
),
|
|
default_model_id=1,
|
|
default_model_name='default',
|
|
score_rows=[
|
|
{
|
|
'target_mmsi': '412111111',
|
|
'target_type': 'VESSEL',
|
|
'target_name': 'AB1234',
|
|
'current_score': 0.8,
|
|
'streak_count': 4,
|
|
},
|
|
{
|
|
'target_mmsi': '440222222',
|
|
'target_type': 'VESSEL',
|
|
'target_name': 'AB1234',
|
|
'current_score': 0.7,
|
|
'streak_count': 3,
|
|
},
|
|
],
|
|
raw_metrics={},
|
|
center_track=[],
|
|
all_positions={},
|
|
registry_by_mmsi={},
|
|
registry_by_name={},
|
|
existing=None,
|
|
excluded_candidate_mmsis={'412111111'},
|
|
episode_prior_stats={},
|
|
lineage_prior_stats={},
|
|
label_prior_stats={},
|
|
)
|
|
self.assertEqual([candidate.mmsi for candidate in candidates], ['440222222'])
|
|
|
|
def test_track_coverage_metrics_penalize_short_track_support(self):
|
|
now = datetime(2026, 4, 3, 0, 0, tzinfo=timezone.utc)
|
|
center_track = [
|
|
{'timestamp': now - timedelta(hours=5), 'lat': 35.0, 'lon': 129.0},
|
|
{'timestamp': now - timedelta(hours=1), 'lat': 35.1, 'lon': 129.1},
|
|
]
|
|
short_track = [
|
|
{'timestamp': now - timedelta(minutes=10), 'lat': 35.1, 'lon': 129.1, 'sog': 0.5},
|
|
]
|
|
long_track = [
|
|
{'timestamp': now - timedelta(minutes=90) + timedelta(minutes=10 * idx), 'lat': 35.0, 'lon': 129.0 + (0.01 * idx), 'sog': 0.5}
|
|
for idx in range(10)
|
|
]
|
|
|
|
short_metrics = _build_track_coverage_metrics(center_track, short_track, 35.05, 129.05)
|
|
long_metrics = _build_track_coverage_metrics(center_track, long_track, 35.05, 129.05)
|
|
|
|
self.assertEqual(short_metrics['trackPointCount'], 1)
|
|
self.assertEqual(short_metrics['trackCoverageFactor'], 0.0)
|
|
self.assertGreater(long_metrics['trackCoverageFactor'], 0.0)
|
|
self.assertGreater(long_metrics['coverageFactor'], short_metrics['coverageFactor'])
|
|
|
|
def test_label_tracking_row_tracks_rank_and_match_flags(self):
|
|
top_candidate = self._candidate(mmsi='412333326', score=0.81, sources=['CORRELATION'])
|
|
top_candidate.evidence = {
|
|
'sources': ['CORRELATION'],
|
|
'scoreBreakdown': {'preBonusScore': 0.66},
|
|
}
|
|
labeled_candidate = self._candidate(mmsi='440123456', score=0.62, sources=['CORRELATION'])
|
|
labeled_candidate.evidence = {
|
|
'sources': ['CORRELATION'],
|
|
'scoreBreakdown': {'preBonusScore': 0.62},
|
|
}
|
|
|
|
row = _label_tracking_row(
|
|
observed_at='2026-04-03T00:00:00Z',
|
|
label_session={
|
|
'id': 10,
|
|
'label_parent_mmsi': '440123456',
|
|
'label_parent_name': 'TARGET',
|
|
},
|
|
auto_status='REVIEW_REQUIRED',
|
|
top_candidate=top_candidate,
|
|
margin=0.19,
|
|
candidates=[top_candidate, labeled_candidate],
|
|
)
|
|
self.assertEqual(row[0], 10)
|
|
self.assertEqual(row[8], 2)
|
|
self.assertTrue(row[9])
|
|
self.assertEqual(row[10], 2)
|
|
self.assertEqual(row[11], 0.62)
|
|
self.assertEqual(row[12], 0.62)
|
|
self.assertFalse(row[14])
|
|
self.assertTrue(row[15])
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|