diff --git a/prediction/fleet_tracker.py b/prediction/fleet_tracker.py index 7c49e29..a81d3e9 100644 --- a/prediction/fleet_tracker.py +++ b/prediction/fleet_tracker.py @@ -23,13 +23,40 @@ GEAR_IDENTITY_LOG = qualified_table('gear_identity_log') GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') FLEET_TRACKING_SNAPSHOT = qualified_table('fleet_tracking_snapshot') +# 선박명 정규화: 공백/구두점 제거, 선박번호 suffix 제거, upper() 통일. +# AIS 이름과 fishery_permit_cn 이름 간 suffix/공백 차이로 NAME_EXACT 매칭률 8.7% → 정규화 기반 매칭으로 회복. +_NAME_STRIP_SUFFIX = re.compile( + r'(?:' + r'[\s_]*(?:NO\.?|#|#)?[\s_]*\d+\s*(?:호|號|号)?' # 123 / No.123 / #123 / 123호 / 12号 + r'|[\s_]*(?:호|號|号)' # 말미 호/號/号 단독 + r')\s*$', + re.IGNORECASE, +) +_NAME_STRIP_CHARS = re.compile(r'[\s\-_./,()\[\]·•\u3000]+') + + +def _normalize_vessel_name(name: Optional[str]) -> str: + """선박명을 매칭용으로 정규화. + + 1. upper() + strip + 2. 말미 선박번호 패턴 제거 (호/號/号, No.N, #N, 공백숫자) + 3. 남은 공백/구두점 제거 + """ + if not name: + return '' + s = name.strip().upper() + s = _NAME_STRIP_SUFFIX.sub('', s) + s = _NAME_STRIP_CHARS.sub('', s) + return s + class FleetTracker: def __init__(self) -> None: self._companies: dict[int, dict] = {} # id → {name_cn, name_en} self._vessels: dict[int, dict] = {} # id → {permit_no, name_cn, ...} - self._name_cn_map: dict[str, int] = {} # name_cn → vessel_id - self._name_en_map: dict[str, int] = {} # name_en(lowercase) → vessel_id + self._name_cn_map: dict[str, int] = {} # name_cn → vessel_id (정확일치) + self._name_en_map: dict[str, int] = {} # name_en(lowercase) → vessel_id (정확일치) + self._name_fuzzy_map: dict[str, list[int]] = {} # 정규화 이름 → [vessel_id, ...] self._mmsi_to_vid: dict[str, int] = {} # mmsi → vessel_id (매칭된 것만) self._gear_active: dict[str, dict] = {} # mmsi → {name, parent_mmsi, ...} self._last_registry_load: float = 0.0 @@ -54,6 +81,7 @@ class FleetTracker: self._vessels = {} self._name_cn_map = {} self._name_en_map = {} + self._name_fuzzy_map = {} self._mmsi_to_vid = {} for r in cur.fetchall(): @@ -73,8 +101,14 @@ class FleetTracker: self._vessels[vid] = v if r[3]: self._name_cn_map[r[3]] = vid + key_cn = _normalize_vessel_name(r[3]) + if key_cn: + self._name_fuzzy_map.setdefault(key_cn, []).append(vid) if r[4]: self._name_en_map[r[4].lower().strip()] = vid + key_en = _normalize_vessel_name(r[4]) + if key_en: + self._name_fuzzy_map.setdefault(key_en, []).append(vid) if r[9]: self._mmsi_to_vid[r[9]] = vid @@ -165,7 +199,8 @@ class FleetTracker: ais_vessels: [{mmsi, name, lat, lon, sog, cog}, ...] """ cur = conn.cursor() - matched = 0 + matched_exact = 0 + matched_fuzzy = 0 for v in ais_vessels: mmsi = v.get('mmsi', '') @@ -185,22 +220,47 @@ class FleetTracker: vid: Optional[int] = self._name_cn_map.get(name) if not vid: vid = self._name_en_map.get(name.lower().strip()) + method = 'NAME_EXACT' + confidence = 0.95 + + # NAME_FUZZY 매칭 (정규화 후 lookup) + if not vid: + key = _normalize_vessel_name(name) + if key: + candidates = self._name_fuzzy_map.get(key, []) + # 이미 다른 MMSI에 할당된 vid 제외 → 동명이 중복 매칭 방지 + unassigned = [ + c for c in candidates + if not self._vessels.get(c, {}).get('mmsi') + or self._vessels[c].get('mmsi') == mmsi + ] + if len(unassigned) == 1: + vid = unassigned[0] + method = 'NAME_FUZZY' + confidence = 0.80 if vid: cur.execute( f"""UPDATE {FLEET_VESSELS} - SET mmsi = %s, match_confidence = 0.95, match_method = 'NAME_EXACT', + SET mmsi = %s, match_confidence = %s, match_method = %s, last_seen_at = NOW(), updated_at = NOW() WHERE id = %s AND (mmsi IS NULL OR mmsi = %s)""", - (mmsi, vid, mmsi), + (mmsi, confidence, method, vid, mmsi), ) self._mmsi_to_vid[mmsi] = vid - matched += 1 + self._vessels[vid]['mmsi'] = mmsi + if method == 'NAME_FUZZY': + matched_fuzzy += 1 + else: + matched_exact += 1 conn.commit() cur.close() - if matched > 0: - logger.info('AIS→registry matched: %d vessels', matched) + if matched_exact or matched_fuzzy: + logger.info( + 'AIS→registry matched: exact=%d, fuzzy=%d', + matched_exact, matched_fuzzy, + ) def track_gear_identity(self, gear_signals: list[dict], conn) -> None: """어구/어망 정체성 추적. @@ -242,13 +302,19 @@ class FleetTracker: if not is_trackable_parent_name(effective_parent_name): continue - # 모선 매칭 + # 모선 매칭 (EXACT → FUZZY 순) parent_mmsi: Optional[str] = None parent_vid: Optional[int] = None if parent_name: vid = self._name_cn_map.get(parent_name) if not vid: vid = self._name_en_map.get(parent_name.lower()) + if not vid: + key = _normalize_vessel_name(parent_name) + if key: + candidates = self._name_fuzzy_map.get(key, []) + if len(candidates) == 1: + vid = candidates[0] if vid: parent_vid = vid parent_mmsi = self._vessels[vid].get('mmsi')