From 7dd46f20784d7838e0a727645f983fea6a3b45fa Mon Sep 17 00:00:00 2001 From: htlee Date: Sat, 4 Apr 2026 00:42:31 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20=EC=96=B4=EA=B5=AC=20=EB=AA=A8=EC=84=A0?= =?UTF-8?q?=20=EC=B6=94=EB=A1=A0(Gear=20Parent=20Inference)=20=EC=8B=9C?= =?UTF-8?q?=EC=8A=A4=ED=85=9C=20=EC=9D=B4=EC=8B=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex Lab 환경(iran-airstrike-replay-codex)에서 검증 완료된 어구 모선 자동 추론 + 검토 워크플로우 전체를 이식. ## Python (prediction/) - gear_parent_inference(1,428줄): 다층 점수 모델 (correlation + name + track + prior bonus) - gear_parent_episode(631줄): Episode 연속성 (Jaccard + 공간거리) - gear_name_rules: 모선 이름 정규화 + 4자 미만 필터 - scheduler: 추론 호출 단계 추가 (4.8) - fleet_tracker/kcgdb: SQL qualified_table() 동적화 - gear_correlation: timestamp 필드 추가 ## DB (database/migration/ 012~015) - 후보 스냅샷, resolution, episode, 라벨 세션, 제외 관리 테이블 9개 + VIEW 2개 ## Backend (Java) - 12개 DTO/Controller (ParentInferenceWorkflowController 등) - GroupPolygonService: parent_resolution LEFT JOIN + 15개 API 메서드 ## Frontend - ParentReviewPanel: 모선 검토 대시보드 - vesselAnalysis: 10개 신규 API 함수 + 6개 타입 Co-Authored-By: Claude Opus 4.6 (1M context) --- ...GlobalParentCandidateExclusionRequest.java | 12 + .../GroupParentCandidateExclusionRequest.java | 13 + .../domain/fleet/GroupParentInferenceDto.java | 26 + .../GroupParentInferenceReviewRequest.java | 13 + .../fleet/GroupParentLabelSessionRequest.java | 13 + .../domain/fleet/GroupPolygonController.java | 57 + .../mda/kcg/domain/fleet/GroupPolygonDto.java | 1 + .../kcg/domain/fleet/GroupPolygonService.java | 1491 ++++++++++++++++- .../fleet/ParentCandidateExclusionDto.java | 28 + .../fleet/ParentInferenceCandidateDto.java | 30 + .../fleet/ParentInferenceSummaryDto.java | 22 + .../ParentInferenceWorkflowController.java | 95 ++ .../domain/fleet/ParentLabelSessionDto.java | 31 + .../fleet/ParentLabelTrackingCycleDto.java | 31 + .../fleet/ParentWorkflowActionRequest.java | 11 + .../migration/012_gear_parent_inference.sql | 176 ++ .../013_short_parent_name_cleanup.sql | 23 + .../014_gear_parent_workflow_v2_phase1.sql | 125 ++ .../015_gear_parent_episode_tracking.sql | 111 ++ docs/GEAR-PARENT-INFERENCE-ALGORITHM-SPEC.md | 514 ++++++ docs/GEAR-PARENT-INFERENCE-DATAFLOW-PAPER.md | 677 ++++++++ ...EAR-PARENT-INFERENCE-WORKFLOW-V2-PHASE1.md | 706 ++++++++ docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2.md | 693 ++++++++ docs/RELEASE-NOTES.md | 12 + .../components/korea/ParentReviewPanel.tsx | 1391 +++++++++++++++ .../korea/parentInferenceConstants.ts | 15 + .../korea/parentReviewCandidateColors.ts | 13 + .../korea/useReplayCenterPanelLayout.ts | 69 + frontend/src/hooks/useGroupPolygons.ts | 2 +- frontend/src/services/vesselAnalysis.ts | 387 ++++- prediction/algorithms/gear_correlation.py | 17 +- prediction/algorithms/gear_name_rules.py | 19 + prediction/algorithms/gear_parent_episode.py | 631 +++++++ .../algorithms/gear_parent_inference.py | 1428 ++++++++++++++++ prediction/config.py | 14 + prediction/db/kcgdb.py | 17 +- prediction/fleet_tracker.py | 42 +- prediction/main.py | 10 +- prediction/scheduler.py | 21 + prediction/tests/test_gear_parent_episode.py | 177 ++ .../tests/test_gear_parent_inference.py | 279 +++ prediction/tests/test_time_bucket.py | 90 + 42 files changed, 9429 insertions(+), 104 deletions(-) create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/GlobalParentCandidateExclusionRequest.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentCandidateExclusionRequest.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentInferenceDto.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentInferenceReviewRequest.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentLabelSessionRequest.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/ParentCandidateExclusionDto.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceCandidateDto.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceSummaryDto.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceWorkflowController.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/ParentLabelSessionDto.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/ParentLabelTrackingCycleDto.java create mode 100644 backend/src/main/java/gc/mda/kcg/domain/fleet/ParentWorkflowActionRequest.java create mode 100644 database/migration/012_gear_parent_inference.sql create mode 100644 database/migration/013_short_parent_name_cleanup.sql create mode 100644 database/migration/014_gear_parent_workflow_v2_phase1.sql create mode 100644 database/migration/015_gear_parent_episode_tracking.sql create mode 100644 docs/GEAR-PARENT-INFERENCE-ALGORITHM-SPEC.md create mode 100644 docs/GEAR-PARENT-INFERENCE-DATAFLOW-PAPER.md create mode 100644 docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2-PHASE1.md create mode 100644 docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2.md create mode 100644 frontend/src/components/korea/ParentReviewPanel.tsx create mode 100644 frontend/src/components/korea/parentInferenceConstants.ts create mode 100644 frontend/src/components/korea/parentReviewCandidateColors.ts create mode 100644 frontend/src/components/korea/useReplayCenterPanelLayout.ts create mode 100644 prediction/algorithms/gear_name_rules.py create mode 100644 prediction/algorithms/gear_parent_episode.py create mode 100644 prediction/algorithms/gear_parent_inference.py create mode 100644 prediction/tests/test_gear_parent_episode.py create mode 100644 prediction/tests/test_gear_parent_inference.py create mode 100644 prediction/tests/test_time_bucket.py diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/GlobalParentCandidateExclusionRequest.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/GlobalParentCandidateExclusionRequest.java new file mode 100644 index 0000000..ac1ecda --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/GlobalParentCandidateExclusionRequest.java @@ -0,0 +1,12 @@ +package gc.mda.kcg.domain.fleet; + +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class GlobalParentCandidateExclusionRequest { + private String candidateMmsi; + private String actor; + private String comment; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentCandidateExclusionRequest.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentCandidateExclusionRequest.java new file mode 100644 index 0000000..61e0cc8 --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentCandidateExclusionRequest.java @@ -0,0 +1,13 @@ +package gc.mda.kcg.domain.fleet; + +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class GroupParentCandidateExclusionRequest { + private String candidateMmsi; + private Integer durationDays; + private String actor; + private String comment; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentInferenceDto.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentInferenceDto.java new file mode 100644 index 0000000..13251b1 --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentInferenceDto.java @@ -0,0 +1,26 @@ +package gc.mda.kcg.domain.fleet; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.Builder; +import lombok.Getter; + +import java.util.List; +import java.util.Map; + +@Getter +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +public class GroupParentInferenceDto { + private String groupType; + private String groupKey; + private String groupLabel; + private int subClusterId; + private String snapshotTime; + private String zoneName; + private Integer memberCount; + private String resolution; + private Integer candidateCount; + private ParentInferenceSummaryDto parentInference; + private List candidates; + private Map evidenceSummary; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentInferenceReviewRequest.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentInferenceReviewRequest.java new file mode 100644 index 0000000..66db64a --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentInferenceReviewRequest.java @@ -0,0 +1,13 @@ +package gc.mda.kcg.domain.fleet; + +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class GroupParentInferenceReviewRequest { + private String action; + private String selectedParentMmsi; + private String actor; + private String comment; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentLabelSessionRequest.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentLabelSessionRequest.java new file mode 100644 index 0000000..866c9a2 --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupParentLabelSessionRequest.java @@ -0,0 +1,13 @@ +package gc.mda.kcg.domain.fleet; + +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class GroupParentLabelSessionRequest { + private String selectedParentMmsi; + private Integer durationDays; + private String actor; + private String comment; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonController.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonController.java index 5786155..08729ab 100644 --- a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonController.java +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonController.java @@ -63,4 +63,61 @@ public class GroupPolygonController { "items", correlations )); } + + @GetMapping("/parent-inference/review") + public ResponseEntity> getParentInferenceReview( + @RequestParam(defaultValue = "REVIEW_REQUIRED") String status, + @RequestParam(defaultValue = "100") int limit) { + List items = groupPolygonService.getParentInferenceReview(status, limit); + return ResponseEntity.ok(Map.of( + "count", items.size(), + "items", items + )); + } + + @GetMapping("/{groupKey}/parent-inference") + public ResponseEntity> getGroupParentInference(@PathVariable String groupKey) { + List items = groupPolygonService.getGroupParentInference(groupKey); + return ResponseEntity.ok(Map.of( + "groupKey", groupKey, + "count", items.size(), + "items", items + )); + } + + @PostMapping("/{groupKey}/parent-inference/{subClusterId}/review") + public ResponseEntity reviewGroupParentInference( + @PathVariable String groupKey, + @PathVariable int subClusterId, + @RequestBody GroupParentInferenceReviewRequest request) { + try { + return ResponseEntity.ok(groupPolygonService.reviewParentInference(groupKey, subClusterId, request)); + } catch (IllegalArgumentException e) { + return ResponseEntity.badRequest().body(Map.of("error", e.getMessage())); + } + } + + @PostMapping("/{groupKey}/parent-inference/{subClusterId}/label-sessions") + public ResponseEntity createGroupParentLabelSession( + @PathVariable String groupKey, + @PathVariable int subClusterId, + @RequestBody GroupParentLabelSessionRequest request) { + try { + return ResponseEntity.ok(groupPolygonService.createGroupParentLabelSession(groupKey, subClusterId, request)); + } catch (IllegalArgumentException e) { + return ResponseEntity.badRequest().body(Map.of("error", e.getMessage())); + } + } + + @PostMapping("/{groupKey}/parent-inference/{subClusterId}/candidate-exclusions") + public ResponseEntity createGroupCandidateExclusion( + @PathVariable String groupKey, + @PathVariable int subClusterId, + @RequestBody GroupParentCandidateExclusionRequest request) { + try { + return ResponseEntity.ok(groupPolygonService.createGroupCandidateExclusion(groupKey, subClusterId, request)); + } catch (IllegalArgumentException e) { + return ResponseEntity.badRequest().body(Map.of("error", e.getMessage())); + } + } } diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonDto.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonDto.java index 1bfaf4d..ae3c03b 100644 --- a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonDto.java +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonDto.java @@ -26,4 +26,5 @@ public class GroupPolygonDto { private List> members; private String color; private String resolution; + private ParentInferenceSummaryDto parentInference; } diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonService.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonService.java index 4cb954e..db6b9bb 100644 --- a/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonService.java +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonService.java @@ -5,13 +5,18 @@ import com.fasterxml.jackson.databind.ObjectMapper; import gc.mda.kcg.config.CacheConfig; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; import org.springframework.cache.Cache; import org.springframework.cache.CacheManager; import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -24,42 +29,87 @@ public class GroupPolygonService { private final CacheManager cacheManager; private final ObjectMapper objectMapper; + @Value("${DB_SCHEMA:kcg}") + private String dbSchema; + private static final long CACHE_TTL_MS = 5 * 60_000L; // 5분 private volatile long lastCacheTime = 0; - private static final String LATEST_GROUPS_SQL = """ - SELECT group_type, group_key, group_label, sub_cluster_id, snapshot_time, - ST_AsGeoJSON(polygon) AS polygon_geojson, - ST_Y(center_point) AS center_lat, ST_X(center_point) AS center_lon, - area_sq_nm, member_count, zone_id, zone_name, members, color, resolution - FROM kcg.group_polygon_snapshots - WHERE snapshot_time = (SELECT MAX(snapshot_time) FROM kcg.group_polygon_snapshots WHERE resolution = '1h') - AND resolution = '1h' - ORDER BY group_type, member_count DESC + private static final String LATEST_GROUPS_SQL_TEMPLATE = """ + SELECT g.group_type, g.group_key, g.group_label, g.sub_cluster_id, g.snapshot_time, + ST_AsGeoJSON(g.polygon) AS polygon_geojson, + ST_Y(g.center_point) AS center_lat, ST_X(g.center_point) AS center_lon, + g.area_sq_nm, g.member_count, g.zone_id, g.zone_name, g.members, g.color, g.resolution, + r.normalized_parent_name, + r.status AS parent_inference_status, + r.selected_parent_mmsi, + r.selected_parent_name, + r.confidence AS parent_inference_confidence, + r.decision_source, + r.top_score, + r.score_margin, + r.stable_cycles, + r.evidence_summary + FROM %s g + LEFT JOIN %s r + ON r.group_key = g.group_key + AND r.sub_cluster_id = g.sub_cluster_id + AND r.last_evaluated_at >= g.snapshot_time + WHERE g.snapshot_time = (SELECT MAX(snapshot_time) FROM %s WHERE resolution = '1h') + AND g.resolution = '1h' + ORDER BY g.group_type, g.member_count DESC """; - private static final String GROUP_DETAIL_SQL = """ - SELECT group_type, group_key, group_label, sub_cluster_id, snapshot_time, - ST_AsGeoJSON(polygon) AS polygon_geojson, - ST_Y(center_point) AS center_lat, ST_X(center_point) AS center_lon, - area_sq_nm, member_count, zone_id, zone_name, members, color, resolution - FROM kcg.group_polygon_snapshots - WHERE group_key = ? - ORDER BY snapshot_time DESC + private static final String GROUP_DETAIL_SQL_TEMPLATE = """ + SELECT g.group_type, g.group_key, g.group_label, g.sub_cluster_id, g.snapshot_time, + ST_AsGeoJSON(g.polygon) AS polygon_geojson, + ST_Y(g.center_point) AS center_lat, ST_X(g.center_point) AS center_lon, + g.area_sq_nm, g.member_count, g.zone_id, g.zone_name, g.members, g.color, g.resolution, + r.normalized_parent_name, + r.status AS parent_inference_status, + r.selected_parent_mmsi, + r.selected_parent_name, + r.confidence AS parent_inference_confidence, + r.decision_source, + r.top_score, + r.score_margin, + r.stable_cycles, + r.evidence_summary + FROM %s g + LEFT JOIN %s r + ON r.group_key = g.group_key + AND r.sub_cluster_id = g.sub_cluster_id + AND r.last_evaluated_at >= g.snapshot_time + WHERE g.group_key = ? + ORDER BY g.snapshot_time DESC LIMIT 1 """; - private static final String GROUP_HISTORY_SQL = """ - SELECT group_type, group_key, group_label, sub_cluster_id, snapshot_time, - ST_AsGeoJSON(polygon) AS polygon_geojson, - ST_Y(center_point) AS center_lat, ST_X(center_point) AS center_lon, - area_sq_nm, member_count, zone_id, zone_name, members, color, resolution - FROM kcg.group_polygon_snapshots - WHERE group_key = ? AND snapshot_time > NOW() - CAST(? || ' hours' AS INTERVAL) - ORDER BY snapshot_time DESC + private static final String GROUP_HISTORY_SQL_TEMPLATE = """ + SELECT g.group_type, g.group_key, g.group_label, g.sub_cluster_id, g.snapshot_time, + ST_AsGeoJSON(g.polygon) AS polygon_geojson, + ST_Y(g.center_point) AS center_lat, ST_X(g.center_point) AS center_lon, + g.area_sq_nm, g.member_count, g.zone_id, g.zone_name, g.members, g.color, g.resolution, + r.normalized_parent_name, + r.status AS parent_inference_status, + r.selected_parent_mmsi, + r.selected_parent_name, + r.confidence AS parent_inference_confidence, + r.decision_source, + r.top_score, + r.score_margin, + r.stable_cycles, + r.evidence_summary + FROM %s g + LEFT JOIN %s r + ON r.group_key = g.group_key + AND r.sub_cluster_id = g.sub_cluster_id + AND r.last_evaluated_at >= g.snapshot_time + WHERE g.group_key = ? AND g.snapshot_time > NOW() - CAST(? || ' hours' AS INTERVAL) + ORDER BY g.snapshot_time DESC """; - private static final String GROUP_CORRELATIONS_SQL = """ + private static final String GROUP_CORRELATIONS_SQL_TEMPLATE = """ WITH best_scores AS ( SELECT DISTINCT ON (m.id, s.sub_cluster_id, s.target_mmsi) s.target_mmsi, s.target_type, s.target_name, @@ -67,8 +117,8 @@ public class GroupPolygonService { s.freeze_state, s.shadow_bonus_total, s.sub_cluster_id, m.id AS model_id, m.name AS model_name, m.is_default - FROM kcg.gear_correlation_scores s - JOIN kcg.correlation_param_models m ON s.model_id = m.id + FROM %s s + JOIN %s m ON s.model_id = m.id WHERE s.group_key = ? AND s.current_score >= ? AND m.is_active = TRUE ORDER BY m.id, s.sub_cluster_id, s.target_mmsi, s.current_score DESC ) @@ -77,28 +127,301 @@ public class GroupPolygonService { FROM best_scores bs LEFT JOIN LATERAL ( SELECT proximity_ratio, visit_score, heading_coherence - FROM kcg.gear_correlation_raw_metrics + FROM %s WHERE group_key = ? AND target_mmsi = bs.target_mmsi ORDER BY observed_at DESC LIMIT 1 ) r ON TRUE ORDER BY bs.model_id, bs.current_score DESC """; - private static final String GEAR_STATS_SQL = """ + private static final String GEAR_STATS_SQL_TEMPLATE = """ SELECT COUNT(*) AS gear_groups, COALESCE(SUM(member_count), 0) AS gear_count - FROM kcg.group_polygon_snapshots - WHERE snapshot_time = (SELECT MAX(snapshot_time) FROM kcg.group_polygon_snapshots WHERE resolution = '1h') + FROM %s + WHERE snapshot_time = (SELECT MAX(snapshot_time) FROM %s WHERE resolution = '1h') AND group_type IN ('GEAR_IN_ZONE', 'GEAR_OUT_ZONE') AND resolution = '1h' """; - /** - * 어구 그룹 집계 통계 (최신 스냅샷 기준). - */ + private static final String PARENT_INFERENCE_REVIEW_SQL_TEMPLATE = """ + WITH latest_groups AS ( + SELECT DISTINCT ON (g.group_key, g.sub_cluster_id) + g.group_type, g.group_key, g.group_label, g.sub_cluster_id, + g.snapshot_time, g.zone_name, g.member_count, g.resolution + FROM %s g + WHERE g.snapshot_time = (SELECT MAX(snapshot_time) FROM %s WHERE resolution = '1h') + AND g.resolution = '1h' + AND g.group_type IN ('GEAR_IN_ZONE', 'GEAR_OUT_ZONE') + ORDER BY g.group_key, g.sub_cluster_id, g.snapshot_time DESC + ), + latest_candidate_snapshot AS ( + SELECT c.group_key, c.sub_cluster_id, MAX(c.observed_at) AS observed_at + FROM %s c + JOIN latest_groups lg + ON lg.group_key = c.group_key + AND lg.sub_cluster_id = c.sub_cluster_id + AND c.observed_at >= lg.snapshot_time + GROUP BY c.group_key, c.sub_cluster_id + ), + candidate_counts AS ( + SELECT c.group_key, c.sub_cluster_id, COUNT(*) AS candidate_count + FROM %s c + JOIN latest_candidate_snapshot l + ON l.group_key = c.group_key + AND l.sub_cluster_id = c.sub_cluster_id + AND l.observed_at = c.observed_at + GROUP BY c.group_key, c.sub_cluster_id + ) + SELECT lg.group_type, lg.group_key, lg.group_label, lg.sub_cluster_id, + lg.snapshot_time, lg.zone_name, lg.member_count, lg.resolution, + COALESCE(cc.candidate_count, 0) AS candidate_count, + r.normalized_parent_name, + r.status AS parent_inference_status, + r.selected_parent_mmsi, + r.selected_parent_name, + r.confidence AS parent_inference_confidence, + r.decision_source, + r.top_score, + r.score_margin, + r.stable_cycles, + r.evidence_summary + FROM latest_groups lg + JOIN %s r + ON r.group_key = lg.group_key + AND r.sub_cluster_id = lg.sub_cluster_id + AND r.last_evaluated_at >= lg.snapshot_time + LEFT JOIN candidate_counts cc + ON cc.group_key = lg.group_key + AND cc.sub_cluster_id = lg.sub_cluster_id + WHERE r.status <> 'SKIPPED_SHORT_NAME' + AND (? IS NULL OR r.status = ?) + ORDER BY CASE WHEN r.status = 'REVIEW_REQUIRED' THEN 0 ELSE 1 END, + r.top_score DESC NULLS LAST, + lg.member_count DESC, + lg.group_key ASC, + lg.sub_cluster_id ASC + LIMIT ? + """; + + private static final String PARENT_INFERENCE_DETAIL_SQL_TEMPLATE = """ + WITH latest_groups AS ( + SELECT DISTINCT ON (g.group_key, g.sub_cluster_id) + g.group_type, g.group_key, g.group_label, g.sub_cluster_id, + g.snapshot_time, g.zone_name, g.member_count, g.resolution + FROM %s g + WHERE g.snapshot_time = (SELECT MAX(snapshot_time) FROM %s WHERE resolution = '1h') + AND g.resolution = '1h' + AND g.group_type IN ('GEAR_IN_ZONE', 'GEAR_OUT_ZONE') + AND g.group_key = ? + ORDER BY g.group_key, g.sub_cluster_id, g.snapshot_time DESC + ), + latest_candidate_snapshot AS ( + SELECT c.group_key, c.sub_cluster_id, MAX(c.observed_at) AS observed_at + FROM %s c + JOIN latest_groups lg + ON lg.group_key = c.group_key + AND lg.sub_cluster_id = c.sub_cluster_id + AND c.observed_at >= lg.snapshot_time + WHERE c.group_key = ? + GROUP BY c.group_key, c.sub_cluster_id + ), + candidate_counts AS ( + SELECT c.group_key, c.sub_cluster_id, COUNT(*) AS candidate_count + FROM %s c + JOIN latest_candidate_snapshot l + ON l.group_key = c.group_key + AND l.sub_cluster_id = c.sub_cluster_id + AND l.observed_at = c.observed_at + GROUP BY c.group_key, c.sub_cluster_id + ) + SELECT lg.group_type, lg.group_key, lg.group_label, lg.sub_cluster_id, + lg.snapshot_time, lg.zone_name, lg.member_count, lg.resolution, + COALESCE(cc.candidate_count, 0) AS candidate_count, + r.normalized_parent_name, + r.status AS parent_inference_status, + r.selected_parent_mmsi, + r.selected_parent_name, + r.confidence AS parent_inference_confidence, + r.decision_source, + r.top_score, + r.score_margin, + r.stable_cycles, + r.evidence_summary + FROM latest_groups lg + LEFT JOIN %s r + ON r.group_key = lg.group_key + AND r.sub_cluster_id = lg.sub_cluster_id + AND r.last_evaluated_at >= lg.snapshot_time + LEFT JOIN candidate_counts cc + ON cc.group_key = lg.group_key + AND cc.sub_cluster_id = lg.sub_cluster_id + ORDER BY lg.sub_cluster_id ASC + """; + + private static final String PARENT_INFERENCE_CANDIDATES_SQL_TEMPLATE = """ + WITH latest_groups AS ( + SELECT DISTINCT ON (g.group_key, g.sub_cluster_id) + g.group_key, g.sub_cluster_id, g.snapshot_time + FROM %s g + WHERE g.snapshot_time = (SELECT MAX(snapshot_time) FROM %s WHERE resolution = '1h') + AND g.resolution = '1h' + AND g.group_type IN ('GEAR_IN_ZONE', 'GEAR_OUT_ZONE') + AND g.group_key = ? + ORDER BY g.group_key, g.sub_cluster_id, g.snapshot_time DESC + ), + latest_candidate_snapshot AS ( + SELECT c.group_key, c.sub_cluster_id, MAX(c.observed_at) AS observed_at + FROM %s c + JOIN latest_groups lg + ON lg.group_key = c.group_key + AND lg.sub_cluster_id = c.sub_cluster_id + AND c.observed_at >= lg.snapshot_time + GROUP BY c.group_key, c.sub_cluster_id + ) + SELECT c.group_key, c.sub_cluster_id, c.candidate_mmsi, c.candidate_name, + c.candidate_vessel_id, c.rank, c.candidate_source, + c.final_score, c.base_corr_score, c.name_match_score, + c.track_similarity_score, c.visit_score_6h, c.proximity_score_6h, + c.activity_sync_score_6h, c.stability_score, c.registry_bonus, + c.margin_from_top, c.evidence + FROM %s c + JOIN latest_candidate_snapshot l + ON l.group_key = c.group_key + AND l.sub_cluster_id = c.sub_cluster_id + AND l.observed_at = c.observed_at + WHERE c.group_key = ? + AND c.rank <= 5 + ORDER BY c.sub_cluster_id ASC, c.rank ASC + """; + + private static final String CURRENT_RESOLUTION_SQL_TEMPLATE = """ + SELECT group_key, sub_cluster_id, status, selected_parent_mmsi, selected_parent_name, + selected_vessel_id, confidence, top_score, second_score, score_margin, + stable_cycles, approved_by, approved_at, manual_comment, evidence_summary, last_evaluated_at + FROM %s + WHERE group_key = ? AND sub_cluster_id = ? + """; + + private static final String REVIEW_LOG_INSERT_SQL_TEMPLATE = """ + INSERT INTO %s ( + group_key, sub_cluster_id, action, selected_parent_mmsi, actor, comment, payload + ) VALUES (?, ?, ?, ?, ?, ?, ?::jsonb) + """; + + private static final String GLOBAL_WORKFLOW_GROUP_KEY = "__GLOBAL__"; + + private static final String LATEST_CANDIDATE_LOOKUP_SQL_TEMPLATE = """ + WITH latest_candidate_snapshot AS ( + SELECT MAX(observed_at) AS observed_at + FROM %s + WHERE group_key = ? AND sub_cluster_id = ? + ) + SELECT c.candidate_mmsi, c.candidate_name, c.candidate_vessel_id, c.final_score + FROM %s c + JOIN latest_candidate_snapshot l ON l.observed_at = c.observed_at + WHERE c.group_key = ? AND c.sub_cluster_id = ? + AND c.candidate_mmsi = ? + LIMIT 1 + """; + + private String table(String name) { + return dbSchema + "." + name; + } + + private String latestGroupsSql() { + String groupPolygonSnapshots = table("group_polygon_snapshots"); + return LATEST_GROUPS_SQL_TEMPLATE.formatted( + groupPolygonSnapshots, + table("gear_group_parent_resolution"), + groupPolygonSnapshots + ); + } + + private String groupDetailSql() { + return GROUP_DETAIL_SQL_TEMPLATE.formatted( + table("group_polygon_snapshots"), + table("gear_group_parent_candidate_snapshots"), + table("gear_group_parent_candidate_snapshots"), + table("gear_group_parent_resolution") + ); + } + + private String groupHistorySql() { + return GROUP_HISTORY_SQL_TEMPLATE.formatted( + table("group_polygon_snapshots"), + table("gear_group_parent_resolution") + ); + } + + private String groupCorrelationsSql() { + return GROUP_CORRELATIONS_SQL_TEMPLATE.formatted( + table("gear_correlation_scores"), + table("correlation_param_models"), + table("gear_correlation_raw_metrics") + ); + } + + private String gearStatsSql() { + String groupPolygonSnapshots = table("group_polygon_snapshots"); + return GEAR_STATS_SQL_TEMPLATE.formatted(groupPolygonSnapshots, groupPolygonSnapshots); + } + + private String parentInferenceReviewSql() { + String groupPolygonSnapshots = table("group_polygon_snapshots"); + String candidateSnapshots = table("gear_group_parent_candidate_snapshots"); + return PARENT_INFERENCE_REVIEW_SQL_TEMPLATE.formatted( + groupPolygonSnapshots, + groupPolygonSnapshots, + candidateSnapshots, + candidateSnapshots, + table("gear_group_parent_resolution") + ); + } + + private String parentInferenceDetailSql() { + return PARENT_INFERENCE_DETAIL_SQL_TEMPLATE.formatted( + table("group_polygon_snapshots"), + table("group_polygon_snapshots"), + table("gear_group_parent_candidate_snapshots"), + table("gear_group_parent_candidate_snapshots"), + table("gear_group_parent_resolution") + ); + } + + private String parentInferenceCandidatesSql() { + return PARENT_INFERENCE_CANDIDATES_SQL_TEMPLATE.formatted( + table("group_polygon_snapshots"), + table("group_polygon_snapshots"), + table("gear_group_parent_candidate_snapshots"), + table("gear_group_parent_candidate_snapshots") + ); + } + + private String currentResolutionSql() { + return CURRENT_RESOLUTION_SQL_TEMPLATE.formatted(table("gear_group_parent_resolution")); + } + + private String reviewLogInsertSql() { + return REVIEW_LOG_INSERT_SQL_TEMPLATE.formatted(table("gear_group_parent_review_log")); + } + + private String latestCandidateLookupSql() { + String candidateSnapshots = table("gear_group_parent_candidate_snapshots"); + return LATEST_CANDIDATE_LOOKUP_SQL_TEMPLATE.formatted(candidateSnapshots, candidateSnapshots); + } + + private String normalizeParentName(String value) { + if (!StringUtils.hasText(value)) { + return null; + } + return value + .toUpperCase() + .replaceAll("[\\s_\\-%]+", ""); + } + public Map getGearStats() { try { - return jdbcTemplate.queryForObject(GEAR_STATS_SQL, (rs, rowNum) -> Map.of( + return jdbcTemplate.queryForObject(gearStatsSql(), (rs, rowNum) -> Map.of( "gearGroups", rs.getInt("gear_groups"), "gearCount", rs.getInt("gear_count") )); @@ -108,13 +431,10 @@ public class GroupPolygonService { } } - /** - * 특정 어구 그룹의 연관성 점수 (멀티모델). - */ public List> getGroupCorrelations(String groupKey, double minScore) { try { - return jdbcTemplate.query(GROUP_CORRELATIONS_SQL, (rs, rowNum) -> { - Map row = new java.util.LinkedHashMap<>(); + return jdbcTemplate.query(groupCorrelationsSql(), (rs, rowNum) -> { + Map row = new LinkedHashMap<>(); row.put("targetMmsi", rs.getString("target_mmsi")); row.put("targetType", rs.getString("target_type")); row.put("targetName", rs.getString("target_name")); @@ -138,9 +458,6 @@ public class GroupPolygonService { } } - /** - * 최신 스냅샷의 전체 그룹 폴리곤 목록 (5분 캐시). - */ @SuppressWarnings("unchecked") public List getLatestGroups() { Cache cache = cacheManager.getCache(CacheConfig.GROUP_POLYGONS); @@ -153,7 +470,7 @@ public class GroupPolygonService { } } - List results = jdbcTemplate.query(LATEST_GROUPS_SQL, this::mapRow); + List results = jdbcTemplate.query(latestGroupsSql(), this::mapGroupRow); if (cache != null) { cache.put("data", results); @@ -162,42 +479,890 @@ public class GroupPolygonService { return results; } - /** - * 특정 그룹의 최신 상세 정보. - */ public GroupPolygonDto getGroupDetail(String groupKey) { - List results = jdbcTemplate.query(GROUP_DETAIL_SQL, this::mapRow, groupKey); + List results = jdbcTemplate.query(groupDetailSql(), this::mapGroupRow, groupKey); return results.isEmpty() ? null : results.get(0); } - /** - * 특정 그룹의 시간별 히스토리. - * sub_cluster_id 포함하여 raw 반환 — 프론트에서 서브클러스터별 독립 center trail 구성. - */ public List getGroupHistory(String groupKey, int hours) { - return jdbcTemplate.query(GROUP_HISTORY_SQL, this::mapRow, groupKey, String.valueOf(hours)); + return jdbcTemplate.query(groupHistorySql(), this::mapGroupRow, groupKey, String.valueOf(hours)); } - private GroupPolygonDto mapRow(ResultSet rs, int rowNum) throws SQLException { - Object polygonObj = null; - String polygonJson = rs.getString("polygon_geojson"); - if (polygonJson != null) { - try { - polygonObj = objectMapper.readValue(polygonJson, new TypeReference>() {}); - } catch (Exception e) { - log.warn("Failed to parse polygon GeoJSON: {}", e.getMessage()); - } + public List getParentInferenceReview(String status, int limit) { + return jdbcTemplate.query( + parentInferenceReviewSql(), + (rs, rowNum) -> mapParentInferenceRow(rs, List.of()), + status, + status, + limit + ); + } + + public List getGroupParentInference(String groupKey) { + Map> candidateMap = loadParentInferenceCandidates(groupKey); + return jdbcTemplate.query( + parentInferenceDetailSql(), + (rs, rowNum) -> { + String key = parentInferenceKey(rs.getString("group_key"), rs.getInt("sub_cluster_id")); + return mapParentInferenceRow(rs, candidateMap.getOrDefault(key, List.of())); + }, + groupKey, + groupKey + ); + } + + @Transactional + public Map reviewParentInference(String groupKey, int subClusterId, GroupParentInferenceReviewRequest request) { + String action = StringUtils.hasText(request.getAction()) ? request.getAction().trim().toUpperCase() : ""; + String actor = request.getActor() == null ? "" : request.getActor().trim(); + String selectedParentMmsi = StringUtils.hasText(request.getSelectedParentMmsi()) + ? request.getSelectedParentMmsi().trim() + : null; + String comment = StringUtils.hasText(request.getComment()) ? request.getComment().trim() : null; + + if (!StringUtils.hasText(actor)) { + throw new IllegalArgumentException("actor is required"); + } + if (!List.of("CONFIRM", "REJECT", "RESET").contains(action)) { + throw new IllegalArgumentException("action must be one of CONFIRM, REJECT, RESET"); + } + if ("CONFIRM".equals(action) && !StringUtils.hasText(selectedParentMmsi)) { + throw new IllegalArgumentException("selectedParentMmsi is required for CONFIRM"); } - List> members = List.of(); - String membersJson = rs.getString("members"); - if (membersJson != null) { - try { - members = objectMapper.readValue(membersJson, new TypeReference<>() {}); - } catch (Exception e) { - log.warn("Failed to parse members JSON: {}", e.getMessage()); + Map current = loadCurrentResolution(groupKey, subClusterId); + if (current == null) { + throw new IllegalArgumentException("parent inference not found"); + } + + Map payload = new LinkedHashMap<>(); + payload.put("previousStatus", current.get("status")); + payload.put("previousSelectedParentMmsi", current.get("selectedParentMmsi")); + + switch (action) { + case "CONFIRM" -> { + Map candidate = loadLatestCandidate(groupKey, subClusterId, selectedParentMmsi); + jdbcTemplate.update( + """ + UPDATE %s + SET status = 'MANUAL_CONFIRMED', + selected_parent_mmsi = ?, + selected_parent_name = ?, + selected_vessel_id = ?, + confidence = COALESCE(?, confidence), + decision_source = 'MANUAL', + last_promoted_at = NOW(), + approved_by = ?, + approved_at = NOW(), + manual_comment = ?, + rejected_candidate_mmsi = NULL, + rejected_at = NULL, + updated_at = NOW() + WHERE group_key = ? AND sub_cluster_id = ? + """.formatted(table("gear_group_parent_resolution")), + selectedParentMmsi, + candidate.get("candidateName"), + candidate.get("candidateVesselId"), + candidate.get("finalScore"), + actor, + comment, + groupKey, + subClusterId + ); + payload.put("confirmedCandidate", candidate); + } + case "REJECT" -> jdbcTemplate.update( + """ + UPDATE %s + SET status = 'REVIEW_REQUIRED', + selected_parent_mmsi = NULL, + selected_parent_name = NULL, + selected_vessel_id = NULL, + confidence = NULL, + decision_source = 'MANUAL_REJECT', + approved_by = NULL, + approved_at = NULL, + manual_comment = ?, + rejected_candidate_mmsi = COALESCE(?, selected_parent_mmsi), + rejected_at = NOW(), + updated_at = NOW() + WHERE group_key = ? AND sub_cluster_id = ? + """.formatted(table("gear_group_parent_resolution")), + comment, + selectedParentMmsi, + groupKey, + subClusterId + ); + case "RESET" -> jdbcTemplate.update( + """ + UPDATE %s + SET status = 'UNRESOLVED', + selected_parent_mmsi = NULL, + selected_parent_name = NULL, + selected_vessel_id = NULL, + confidence = NULL, + decision_source = 'RESET', + approved_by = NULL, + approved_at = NULL, + manual_comment = ?, + rejected_candidate_mmsi = NULL, + rejected_at = NULL, + last_promoted_at = NULL, + updated_at = NOW() + WHERE group_key = ? AND sub_cluster_id = ? + """.formatted(table("gear_group_parent_resolution")), + comment, + groupKey, + subClusterId + ); + default -> throw new IllegalArgumentException("unsupported action"); + } + + jdbcTemplate.update( + reviewLogInsertSql(), + groupKey, + subClusterId, + action, + selectedParentMmsi, + actor, + comment, + toJson(payload) + ); + evictGroupPolygonCache(); + + List updated = getGroupParentInference(groupKey); + GroupParentInferenceDto target = updated.stream() + .filter(item -> item.getSubClusterId() == subClusterId) + .findFirst() + .orElse(null); + Map response = new LinkedHashMap<>(); + response.put("groupKey", groupKey); + response.put("subClusterId", subClusterId); + response.put("action", action); + response.put("item", target); + return response; + } + + public List getCandidateExclusions( + String scopeType, + String groupKey, + Integer subClusterId, + String candidateMmsi, + boolean activeOnly, + int limit + ) { + String normalizedScopeType = normalizeOptionalUpper(scopeType); + String normalizedGroupKey = normalizeOptionalText(groupKey); + String normalizedCandidateMmsi = normalizeOptionalText(candidateMmsi); + int safeLimit = normalizeLimit(limit, 200); + List params = new ArrayList<>(); + StringBuilder sql = new StringBuilder(""" + SELECT id, scope_type, group_key, sub_cluster_id, candidate_mmsi, reason_type, + duration_days, active_from, active_until, released_at, released_by, + actor, comment, metadata, + (released_at IS NULL + AND active_from <= NOW() + AND (active_until IS NULL OR active_until > NOW())) AS active + FROM %s + WHERE 1 = 1 + """.formatted(table("gear_parent_candidate_exclusions"))); + if (normalizedScopeType != null) { + sql.append(" AND scope_type = ?"); + params.add(normalizedScopeType); + } + if (normalizedGroupKey != null) { + sql.append(" AND group_key = ?"); + params.add(normalizedGroupKey); + } + if (subClusterId != null) { + sql.append(" AND sub_cluster_id = ?"); + params.add(subClusterId); + } + if (normalizedCandidateMmsi != null) { + sql.append(" AND candidate_mmsi = ?"); + params.add(normalizedCandidateMmsi); + } + if (activeOnly) { + sql.append(""" + AND released_at IS NULL + AND active_from <= NOW() + AND (active_until IS NULL OR active_until > NOW()) + """); + } + sql.append(" ORDER BY active DESC, active_from DESC, id DESC LIMIT ?"); + params.add(safeLimit); + return jdbcTemplate.query(sql.toString(), this::mapCandidateExclusionRow, params.toArray()); + } + + public List getLabelSessions( + String groupKey, + Integer subClusterId, + String status, + boolean activeOnly, + int limit + ) { + expireLabelSessions(); + String normalizedGroupKey = normalizeOptionalText(groupKey); + String normalizedStatus = normalizeOptionalUpper(status); + int safeLimit = normalizeLimit(limit, 200); + List params = new ArrayList<>(); + StringBuilder sql = new StringBuilder(""" + SELECT id, group_key, sub_cluster_id, label_parent_mmsi, label_parent_name, + label_parent_vessel_id, duration_days, status, active_from, active_until, + actor, comment, anchor_snapshot_time, + ST_Y(anchor_center_point) AS anchor_center_lat, + ST_X(anchor_center_point) AS anchor_center_lon, + jsonb_array_length(anchor_member_mmsis) AS anchor_member_count, + metadata, + (status = 'ACTIVE' + AND active_from <= NOW() + AND active_until > NOW()) AS active + FROM %s + WHERE 1 = 1 + """.formatted(table("gear_parent_label_sessions"))); + if (normalizedGroupKey != null) { + sql.append(" AND group_key = ?"); + params.add(normalizedGroupKey); + } + if (subClusterId != null) { + sql.append(" AND sub_cluster_id = ?"); + params.add(subClusterId); + } + if (normalizedStatus != null) { + sql.append(" AND status = ?"); + params.add(normalizedStatus); + } + if (activeOnly) { + sql.append(""" + AND status = 'ACTIVE' + AND active_from <= NOW() + AND active_until > NOW() + """); + } + sql.append(" ORDER BY active DESC, active_from DESC, id DESC LIMIT ?"); + params.add(safeLimit); + return jdbcTemplate.query(sql.toString(), this::mapLabelSessionRow, params.toArray()); + } + + public List getLabelSessionTracking(long labelSessionId, int limit) { + expireLabelSessions(); + int safeLimit = normalizeLimit(limit, 500); + return jdbcTemplate.query( + """ + SELECT id, label_session_id, observed_at, candidate_snapshot_observed_at, + auto_status, top_candidate_mmsi, top_candidate_name, top_candidate_score, + top_candidate_margin, candidate_count, labeled_candidate_present, + labeled_candidate_rank, labeled_candidate_score, + labeled_candidate_pre_bonus_score, labeled_candidate_margin_from_top, + matched_top1, matched_top3, evidence_summary + FROM %s + WHERE label_session_id = ? + ORDER BY observed_at DESC, id DESC + LIMIT ? + """.formatted(table("gear_parent_label_tracking_cycles")), + this::mapLabelTrackingCycleRow, + labelSessionId, + safeLimit + ); + } + + @Transactional + public Map createGroupCandidateExclusion( + String groupKey, + int subClusterId, + GroupParentCandidateExclusionRequest request + ) { + String candidateMmsi = normalizeRequiredText(request.getCandidateMmsi(), "candidateMmsi"); + String actor = normalizeRequiredText(request.getActor(), "actor"); + String comment = normalizeOptionalText(request.getComment()); + int durationDays = validateDurationDays(request.getDurationDays(), "durationDays"); + Map anchor = loadLatestGroupAnchor(groupKey, subClusterId); + if (anchor == null) { + throw new IllegalArgumentException("group not found"); + } + + ParentCandidateExclusionDto existing = loadActiveCandidateExclusion("GROUP", groupKey, subClusterId, candidateMmsi); + if (existing != null) { + return Map.of( + "groupKey", groupKey, + "subClusterId", subClusterId, + "action", "EXCLUDE_GROUP", + "item", existing + ); + } + + Map candidate = loadLatestCandidate(groupKey, subClusterId, candidateMmsi); + Map metadata = new LinkedHashMap<>(); + metadata.put("candidateName", candidate.get("candidateName")); + metadata.put("candidateVesselId", candidate.get("candidateVesselId")); + metadata.put("candidateScore", candidate.get("finalScore")); + metadata.put("anchorSnapshotTime", anchor.get("snapshotTime")); + metadata.put("anchorMemberCount", anchor.get("memberCount")); + + ParentCandidateExclusionDto created = jdbcTemplate.queryForObject( + """ + INSERT INTO %s ( + scope_type, group_key, sub_cluster_id, candidate_mmsi, reason_type, + duration_days, active_until, actor, comment, metadata + ) VALUES ( + 'GROUP', ?, ?, ?, 'GROUP_WRONG_PARENT', + ?, NOW() + (? * INTERVAL '1 day'), ?, ?, ?::jsonb + ) + RETURNING id, scope_type, group_key, sub_cluster_id, candidate_mmsi, reason_type, + duration_days, active_from, active_until, released_at, released_by, + actor, comment, metadata, TRUE AS active + """.formatted(table("gear_parent_candidate_exclusions")), + this::mapCandidateExclusionRow, + groupKey, + subClusterId, + candidateMmsi, + durationDays, + durationDays, + actor, + comment, + toJson(metadata) + ); + + jdbcTemplate.update( + reviewLogInsertSql(), + groupKey, + subClusterId, + "EXCLUDE_GROUP", + candidateMmsi, + actor, + comment, + toJson(reviewPayload( + "durationDays", durationDays, + "candidateName", candidate.get("candidateName"), + "candidateVesselId", candidate.get("candidateVesselId") + )) + ); + return Map.of( + "groupKey", groupKey, + "subClusterId", subClusterId, + "action", "EXCLUDE_GROUP", + "item", created + ); + } + + @Transactional + public Map createGlobalCandidateExclusion(GlobalParentCandidateExclusionRequest request) { + String candidateMmsi = normalizeRequiredText(request.getCandidateMmsi(), "candidateMmsi"); + String actor = normalizeRequiredText(request.getActor(), "actor"); + String comment = normalizeOptionalText(request.getComment()); + + ParentCandidateExclusionDto existing = loadActiveCandidateExclusion("GLOBAL", null, null, candidateMmsi); + if (existing != null) { + return Map.of( + "action", "EXCLUDE_GLOBAL", + "item", existing + ); + } + + ParentCandidateExclusionDto created = jdbcTemplate.queryForObject( + """ + INSERT INTO %s ( + scope_type, candidate_mmsi, reason_type, actor, comment, metadata + ) VALUES ( + 'GLOBAL', ?, 'GLOBAL_NOT_PARENT_TARGET', ?, ?, '{}'::jsonb + ) + RETURNING id, scope_type, group_key, sub_cluster_id, candidate_mmsi, reason_type, + duration_days, active_from, active_until, released_at, released_by, + actor, comment, metadata, TRUE AS active + """.formatted(table("gear_parent_candidate_exclusions")), + this::mapCandidateExclusionRow, + candidateMmsi, + actor, + comment + ); + + jdbcTemplate.update( + reviewLogInsertSql(), + GLOBAL_WORKFLOW_GROUP_KEY, + 0, + "EXCLUDE_GLOBAL", + candidateMmsi, + actor, + comment, + toJson(Map.of("candidateMmsi", candidateMmsi)) + ); + return Map.of("action", "EXCLUDE_GLOBAL", "item", created); + } + + @Transactional + public Map releaseCandidateExclusion(long exclusionId, ParentWorkflowActionRequest request) { + String actor = normalizeRequiredText(request.getActor(), "actor"); + String comment = normalizeOptionalText(request.getComment()); + ParentCandidateExclusionDto current = loadCandidateExclusionById(exclusionId); + if (current == null) { + throw new IllegalArgumentException("candidate exclusion not found"); + } + + jdbcTemplate.update( + """ + UPDATE %s + SET released_at = COALESCE(released_at, NOW()), + released_by = ?, + updated_at = NOW() + WHERE id = ? + """.formatted(table("gear_parent_candidate_exclusions")), + actor, + exclusionId + ); + + ParentCandidateExclusionDto updated = loadCandidateExclusionById(exclusionId); + String reviewGroupKey = "GROUP".equals(current.getScopeType()) + ? current.getGroupKey() + : GLOBAL_WORKFLOW_GROUP_KEY; + int reviewSubClusterId = "GROUP".equals(current.getScopeType()) + ? (current.getSubClusterId() == null ? 0 : current.getSubClusterId()) + : 0; + jdbcTemplate.update( + reviewLogInsertSql(), + reviewGroupKey, + reviewSubClusterId, + "RELEASE_EXCLUSION", + current.getCandidateMmsi(), + actor, + comment, + toJson(reviewPayload( + "exclusionId", exclusionId, + "scopeType", current.getScopeType(), + "groupKey", current.getGroupKey(), + "subClusterId", current.getSubClusterId() + )) + ); + return Map.of("action", "RELEASE_EXCLUSION", "item", updated); + } + + @Transactional + public Map createGroupParentLabelSession( + String groupKey, + int subClusterId, + GroupParentLabelSessionRequest request + ) { + expireLabelSessions(); + String selectedParentMmsi = normalizeRequiredText(request.getSelectedParentMmsi(), "selectedParentMmsi"); + String actor = normalizeRequiredText(request.getActor(), "actor"); + String comment = normalizeOptionalText(request.getComment()); + int durationDays = validateDurationDays(request.getDurationDays(), "durationDays"); + + Map anchor = loadLatestGroupAnchor(groupKey, subClusterId); + if (anchor == null) { + throw new IllegalArgumentException("group not found"); + } + + ParentLabelSessionDto activeSession = loadActiveLabelSession(groupKey, subClusterId); + if (activeSession != null) { + if (selectedParentMmsi.equals(activeSession.getLabelParentMmsi())) { + return Map.of( + "groupKey", groupKey, + "subClusterId", subClusterId, + "action", "LABEL_PARENT", + "item", activeSession + ); + } + throw new IllegalArgumentException("active label session already exists for the group"); + } + + Map candidate = loadLatestCandidate(groupKey, subClusterId, selectedParentMmsi); + List anchorMemberMmsis = extractAnchorMemberMmsis(anchor.get("members")); + Double anchorCenterLat = anchor.get("centerLat") instanceof Number number ? number.doubleValue() : null; + Double anchorCenterLon = anchor.get("centerLon") instanceof Number number ? number.doubleValue() : null; + + Map metadata = new LinkedHashMap<>(); + metadata.put("labelSource", "MANUAL"); + metadata.put("candidateScore", candidate.get("finalScore")); + metadata.put("anchorMemberCount", anchor.get("memberCount")); + + ParentLabelSessionDto created = jdbcTemplate.queryForObject( + """ + INSERT INTO %s ( + group_key, sub_cluster_id, label_parent_mmsi, label_parent_name, + label_parent_vessel_id, normalized_parent_name, duration_days, active_until, actor, comment, + anchor_snapshot_time, anchor_center_point, anchor_member_mmsis, metadata + ) VALUES ( + ?, ?, ?, ?, ?, ?, ?, NOW() + (? * INTERVAL '1 day'), ?, ?, + ?::timestamptz, CASE + WHEN ? IS NULL OR ? IS NULL THEN NULL + ELSE ST_SetSRID(ST_MakePoint(?, ?), 4326) + END, + ?::jsonb, ?::jsonb + ) + RETURNING id, group_key, sub_cluster_id, label_parent_mmsi, label_parent_name, + label_parent_vessel_id, duration_days, status, active_from, active_until, + actor, comment, anchor_snapshot_time, + ST_Y(anchor_center_point) AS anchor_center_lat, + ST_X(anchor_center_point) AS anchor_center_lon, + jsonb_array_length(anchor_member_mmsis) AS anchor_member_count, + metadata, TRUE AS active + """.formatted(table("gear_parent_label_sessions")), + this::mapLabelSessionRow, + groupKey, + subClusterId, + selectedParentMmsi, + candidate.get("candidateName"), + candidate.get("candidateVesselId"), + normalizeParentName(groupKey), + durationDays, + durationDays, + actor, + comment, + anchor.get("snapshotTime"), + anchorCenterLon, + anchorCenterLat, + anchorCenterLon, + anchorCenterLat, + toJson(anchorMemberMmsis), + toJson(metadata) + ); + + jdbcTemplate.update( + reviewLogInsertSql(), + groupKey, + subClusterId, + "LABEL_PARENT", + selectedParentMmsi, + actor, + comment, + toJson(reviewPayload( + "durationDays", durationDays, + "labelParentName", candidate.get("candidateName"), + "labelParentVesselId", candidate.get("candidateVesselId") + )) + ); + return Map.of( + "groupKey", groupKey, + "subClusterId", subClusterId, + "action", "LABEL_PARENT", + "item", created + ); + } + + @Transactional + public Map cancelLabelSession(long labelSessionId, ParentWorkflowActionRequest request) { + expireLabelSessions(); + String actor = normalizeRequiredText(request.getActor(), "actor"); + String comment = normalizeOptionalText(request.getComment()); + ParentLabelSessionDto current = loadLabelSessionById(labelSessionId); + if (current == null) { + throw new IllegalArgumentException("label session not found"); + } + + jdbcTemplate.update( + """ + UPDATE %s + SET status = CASE WHEN status = 'ACTIVE' THEN 'CANCELLED' ELSE status END, + updated_at = NOW() + WHERE id = ? + """.formatted(table("gear_parent_label_sessions")), + labelSessionId + ); + + ParentLabelSessionDto updated = loadLabelSessionById(labelSessionId); + jdbcTemplate.update( + reviewLogInsertSql(), + current.getGroupKey(), + current.getSubClusterId() == null ? 0 : current.getSubClusterId(), + "CANCEL_LABEL", + current.getLabelParentMmsi(), + actor, + comment, + toJson(reviewPayload("labelSessionId", labelSessionId)) + ); + return Map.of("action", "CANCEL_LABEL", "item", updated); + } + + private void evictGroupPolygonCache() { + Cache cache = cacheManager.getCache(CacheConfig.GROUP_POLYGONS); + if (cache != null) { + cache.evict("data"); + } + lastCacheTime = 0; + } + + private Map> loadParentInferenceCandidates(String groupKey) { + Map> grouped = new LinkedHashMap<>(); + jdbcTemplate.query( + parentInferenceCandidatesSql(), + rs -> { + String key = parentInferenceKey(rs.getString("group_key"), rs.getInt("sub_cluster_id")); + grouped.computeIfAbsent(key, ignored -> new ArrayList<>()).add(mapCandidateRow(rs, 0)); + }, + groupKey, + groupKey + ); + return grouped; + } + + private Map loadCurrentResolution(String groupKey, int subClusterId) { + List> rows = jdbcTemplate.query( + currentResolutionSql(), + (rs, rowNum) -> { + Map row = new LinkedHashMap<>(); + row.put("groupKey", rs.getString("group_key")); + row.put("subClusterId", rs.getInt("sub_cluster_id")); + row.put("status", rs.getString("status")); + row.put("selectedParentMmsi", rs.getString("selected_parent_mmsi")); + row.put("selectedParentName", rs.getString("selected_parent_name")); + row.put("selectedVesselId", rs.getObject("selected_vessel_id")); + row.put("confidence", nullableDouble(rs, "confidence")); + row.put("topScore", nullableDouble(rs, "top_score")); + row.put("secondScore", nullableDouble(rs, "second_score")); + row.put("scoreMargin", nullableDouble(rs, "score_margin")); + row.put("stableCycles", nullableInt(rs, "stable_cycles")); + row.put("approvedBy", rs.getString("approved_by")); + row.put("approvedAt", rs.getString("approved_at")); + row.put("manualComment", rs.getString("manual_comment")); + row.put("evidenceSummary", parseJsonObject(rs.getString("evidence_summary"))); + return row; + }, + groupKey, + subClusterId + ); + return rows.isEmpty() ? null : rows.get(0); + } + + private Map loadLatestCandidate(String groupKey, int subClusterId, String candidateMmsi) { + List> rows = jdbcTemplate.query( + latestCandidateLookupSql(), + (rs, rowNum) -> { + Map row = new LinkedHashMap<>(); + row.put("candidateMmsi", rs.getString("candidate_mmsi")); + row.put("candidateName", rs.getString("candidate_name")); + row.put("candidateVesselId", rs.getObject("candidate_vessel_id")); + row.put("finalScore", nullableDouble(rs, "final_score")); + return row; + }, + groupKey, + subClusterId, + groupKey, + subClusterId, + candidateMmsi + ); + if (rows.isEmpty()) { + Map fallback = new LinkedHashMap<>(); + fallback.put("candidateMmsi", candidateMmsi); + fallback.put("candidateName", candidateMmsi); + fallback.put("candidateVesselId", null); + fallback.put("finalScore", null); + return fallback; + } + return rows.get(0); + } + + private ParentCandidateExclusionDto loadActiveCandidateExclusion( + String scopeType, + String groupKey, + Integer subClusterId, + String candidateMmsi + ) { + List rows = jdbcTemplate.query( + """ + SELECT id, scope_type, group_key, sub_cluster_id, candidate_mmsi, reason_type, + duration_days, active_from, active_until, released_at, released_by, + actor, comment, metadata, + TRUE AS active + FROM %s + WHERE scope_type = ? + AND candidate_mmsi = ? + AND (? IS NULL OR group_key = ?) + AND (? IS NULL OR sub_cluster_id = ?) + AND released_at IS NULL + AND active_from <= NOW() + AND (active_until IS NULL OR active_until > NOW()) + ORDER BY active_from DESC, id DESC + LIMIT 1 + """.formatted(table("gear_parent_candidate_exclusions")), + this::mapCandidateExclusionRow, + scopeType, + candidateMmsi, + groupKey, + groupKey, + subClusterId, + subClusterId + ); + return rows.isEmpty() ? null : rows.get(0); + } + + private ParentCandidateExclusionDto loadCandidateExclusionById(long exclusionId) { + List rows = jdbcTemplate.query( + """ + SELECT id, scope_type, group_key, sub_cluster_id, candidate_mmsi, reason_type, + duration_days, active_from, active_until, released_at, released_by, + actor, comment, metadata, + (released_at IS NULL + AND active_from <= NOW() + AND (active_until IS NULL OR active_until > NOW())) AS active + FROM %s + WHERE id = ? + """.formatted(table("gear_parent_candidate_exclusions")), + this::mapCandidateExclusionRow, + exclusionId + ); + return rows.isEmpty() ? null : rows.get(0); + } + + private ParentLabelSessionDto loadActiveLabelSession(String groupKey, int subClusterId) { + List rows = jdbcTemplate.query( + """ + SELECT id, group_key, sub_cluster_id, label_parent_mmsi, label_parent_name, + label_parent_vessel_id, duration_days, status, active_from, active_until, + actor, comment, anchor_snapshot_time, + ST_Y(anchor_center_point) AS anchor_center_lat, + ST_X(anchor_center_point) AS anchor_center_lon, + jsonb_array_length(anchor_member_mmsis) AS anchor_member_count, + metadata, TRUE AS active + FROM %s + WHERE group_key = ? + AND sub_cluster_id = ? + AND status = 'ACTIVE' + AND active_from <= NOW() + AND active_until > NOW() + ORDER BY active_from DESC, id DESC + LIMIT 1 + """.formatted(table("gear_parent_label_sessions")), + this::mapLabelSessionRow, + groupKey, + subClusterId + ); + return rows.isEmpty() ? null : rows.get(0); + } + + private ParentLabelSessionDto loadLabelSessionById(long labelSessionId) { + List rows = jdbcTemplate.query( + """ + SELECT id, group_key, sub_cluster_id, label_parent_mmsi, label_parent_name, + label_parent_vessel_id, duration_days, status, active_from, active_until, + actor, comment, anchor_snapshot_time, + ST_Y(anchor_center_point) AS anchor_center_lat, + ST_X(anchor_center_point) AS anchor_center_lon, + jsonb_array_length(anchor_member_mmsis) AS anchor_member_count, + metadata, + (status = 'ACTIVE' + AND active_from <= NOW() + AND active_until > NOW()) AS active + FROM %s + WHERE id = ? + """.formatted(table("gear_parent_label_sessions")), + this::mapLabelSessionRow, + labelSessionId + ); + return rows.isEmpty() ? null : rows.get(0); + } + + private void expireLabelSessions() { + jdbcTemplate.update( + """ + UPDATE %s + SET status = 'EXPIRED', + updated_at = NOW() + WHERE status = 'ACTIVE' + AND active_until <= NOW() + """.formatted(table("gear_parent_label_sessions")) + ); + } + + private Map loadLatestGroupAnchor(String groupKey, int subClusterId) { + List> rows = jdbcTemplate.query( + """ + SELECT group_key, sub_cluster_id, snapshot_time, + ST_Y(center_point) AS center_lat, + ST_X(center_point) AS center_lon, + member_count, members + FROM %s + WHERE group_key = ? AND sub_cluster_id = ? + ORDER BY snapshot_time DESC + LIMIT 1 + """.formatted(table("group_polygon_snapshots")), + (rs, rowNum) -> { + Map row = new LinkedHashMap<>(); + row.put("groupKey", rs.getString("group_key")); + row.put("subClusterId", rs.getInt("sub_cluster_id")); + row.put("snapshotTime", rs.getString("snapshot_time")); + row.put("centerLat", nullableDouble(rs, "center_lat")); + row.put("centerLon", nullableDouble(rs, "center_lon")); + row.put("memberCount", nullableInt(rs, "member_count")); + row.put("members", parseJsonValue( + rs.getString("members"), + new TypeReference>>() {}, + List.of() + )); + return row; + }, + groupKey, + subClusterId + ); + return rows.isEmpty() ? null : rows.get(0); + } + + @SuppressWarnings("unchecked") + private List extractAnchorMemberMmsis(Object membersObject) { + if (!(membersObject instanceof List members)) { + return List.of(); + } + List mmsis = new ArrayList<>(); + for (Object member : members) { + if (member instanceof Map map) { + Object mmsi = map.get("mmsi"); + if (mmsi instanceof String value && StringUtils.hasText(value)) { + mmsis.add(value); + } } } + return mmsis; + } + + private String normalizeRequiredText(String value, String fieldName) { + String normalized = normalizeOptionalText(value); + if (!StringUtils.hasText(normalized)) { + throw new IllegalArgumentException(fieldName + " is required"); + } + return normalized; + } + + private String normalizeOptionalText(String value) { + return StringUtils.hasText(value) ? value.trim() : null; + } + + private String normalizeOptionalUpper(String value) { + String normalized = normalizeOptionalText(value); + return normalized == null ? null : normalized.toUpperCase(); + } + + private int validateDurationDays(Integer durationDays, String fieldName) { + if (durationDays == null || !List.of(1, 3, 5).contains(durationDays)) { + throw new IllegalArgumentException(fieldName + " must be one of 1, 3, 5"); + } + return durationDays; + } + + private int normalizeLimit(int limit, int maxLimit) { + if (limit <= 0) { + return 100; + } + return Math.min(limit, maxLimit); + } + + private Map reviewPayload(Object... entries) { + Map payload = new LinkedHashMap<>(); + for (int i = 0; i + 1 < entries.length; i += 2) { + Object key = entries[i]; + if (!(key instanceof String fieldName)) { + continue; + } + payload.put(fieldName, entries[i + 1]); + } + return payload; + } + + private GroupPolygonDto mapGroupRow(ResultSet rs, int rowNum) throws SQLException { + Object polygonObj = parseJsonValue(rs.getString("polygon_geojson"), new TypeReference>() {}); + List> members = parseJsonValue( + rs.getString("members"), + new TypeReference>>() {}, + List.of() + ); return GroupPolygonDto.builder() .groupType(rs.getString("group_type")) @@ -215,6 +1380,192 @@ public class GroupPolygonService { .members(members) .color(rs.getString("color")) .resolution(rs.getString("resolution")) + .parentInference(mapParentInferenceSummary(rs)) .build(); } + + private GroupParentInferenceDto mapParentInferenceRow(ResultSet rs, List candidates) throws SQLException { + Map evidenceSummary = parseJsonObject(rs.getString("evidence_summary")); + return GroupParentInferenceDto.builder() + .groupType(rs.getString("group_type")) + .groupKey(rs.getString("group_key")) + .groupLabel(rs.getString("group_label")) + .subClusterId(rs.getInt("sub_cluster_id")) + .snapshotTime(rs.getString("snapshot_time")) + .zoneName(rs.getString("zone_name")) + .memberCount(rs.getInt("member_count")) + .resolution(rs.getString("resolution")) + .candidateCount(rs.getInt("candidate_count")) + .parentInference(mapParentInferenceSummary(rs)) + .candidates(candidates) + .evidenceSummary(evidenceSummary) + .build(); + } + + private ParentInferenceCandidateDto mapCandidateRow(ResultSet rs, int rowNum) throws SQLException { + Map evidence = parseJsonObject(rs.getString("evidence")); + Boolean trackAvailable = null; + Object value = evidence.get("trackAvailable"); + if (value instanceof Boolean bool) { + trackAvailable = bool; + } + + return ParentInferenceCandidateDto.builder() + .candidateMmsi(rs.getString("candidate_mmsi")) + .candidateName(rs.getString("candidate_name")) + .candidateVesselId(nullableInt(rs, "candidate_vessel_id")) + .rank(rs.getInt("rank")) + .candidateSource(rs.getString("candidate_source")) + .finalScore(nullableDouble(rs, "final_score")) + .baseCorrScore(nullableDouble(rs, "base_corr_score")) + .nameMatchScore(nullableDouble(rs, "name_match_score")) + .trackSimilarityScore(nullableDouble(rs, "track_similarity_score")) + .visitScore6h(nullableDouble(rs, "visit_score_6h")) + .proximityScore6h(nullableDouble(rs, "proximity_score_6h")) + .activitySyncScore6h(nullableDouble(rs, "activity_sync_score_6h")) + .stabilityScore(nullableDouble(rs, "stability_score")) + .registryBonus(nullableDouble(rs, "registry_bonus")) + .marginFromTop(nullableDouble(rs, "margin_from_top")) + .trackAvailable(trackAvailable) + .evidence(evidence) + .build(); + } + + private ParentInferenceSummaryDto mapParentInferenceSummary(ResultSet rs) throws SQLException { + String status = rs.getString("parent_inference_status"); + if (!StringUtils.hasText(status)) { + return null; + } + Map evidenceSummary = parseJsonObject(rs.getString("evidence_summary")); + Object skipReason = evidenceSummary.get("skipReason"); + Object statusReason = evidenceSummary.get("statusReason"); + return ParentInferenceSummaryDto.builder() + .status(status) + .normalizedParentName(rs.getString("normalized_parent_name")) + .selectedParentMmsi(rs.getString("selected_parent_mmsi")) + .selectedParentName(rs.getString("selected_parent_name")) + .confidence(nullableDouble(rs, "parent_inference_confidence")) + .decisionSource(rs.getString("decision_source")) + .topScore(nullableDouble(rs, "top_score")) + .scoreMargin(nullableDouble(rs, "score_margin")) + .stableCycles(nullableInt(rs, "stable_cycles")) + .skipReason(skipReason instanceof String ? (String) skipReason : null) + .statusReason(statusReason instanceof String ? (String) statusReason : null) + .build(); + } + + private ParentCandidateExclusionDto mapCandidateExclusionRow(ResultSet rs, int rowNum) throws SQLException { + return ParentCandidateExclusionDto.builder() + .id(rs.getLong("id")) + .scopeType(rs.getString("scope_type")) + .groupKey(rs.getString("group_key")) + .subClusterId(nullableInt(rs, "sub_cluster_id")) + .candidateMmsi(rs.getString("candidate_mmsi")) + .reasonType(rs.getString("reason_type")) + .durationDays(nullableInt(rs, "duration_days")) + .activeFrom(rs.getString("active_from")) + .activeUntil(rs.getString("active_until")) + .releasedAt(rs.getString("released_at")) + .releasedBy(rs.getString("released_by")) + .actor(rs.getString("actor")) + .comment(rs.getString("comment")) + .active(rs.getBoolean("active")) + .metadata(parseJsonObject(rs.getString("metadata"))) + .build(); + } + + private ParentLabelSessionDto mapLabelSessionRow(ResultSet rs, int rowNum) throws SQLException { + return ParentLabelSessionDto.builder() + .id(rs.getLong("id")) + .groupKey(rs.getString("group_key")) + .subClusterId(nullableInt(rs, "sub_cluster_id")) + .labelParentMmsi(rs.getString("label_parent_mmsi")) + .labelParentName(rs.getString("label_parent_name")) + .labelParentVesselId(nullableInt(rs, "label_parent_vessel_id")) + .durationDays(nullableInt(rs, "duration_days")) + .status(rs.getString("status")) + .activeFrom(rs.getString("active_from")) + .activeUntil(rs.getString("active_until")) + .actor(rs.getString("actor")) + .comment(rs.getString("comment")) + .anchorSnapshotTime(rs.getString("anchor_snapshot_time")) + .anchorCenterLat(nullableDouble(rs, "anchor_center_lat")) + .anchorCenterLon(nullableDouble(rs, "anchor_center_lon")) + .anchorMemberCount(nullableInt(rs, "anchor_member_count")) + .active(rs.getBoolean("active")) + .metadata(parseJsonObject(rs.getString("metadata"))) + .build(); + } + + private ParentLabelTrackingCycleDto mapLabelTrackingCycleRow(ResultSet rs, int rowNum) throws SQLException { + return ParentLabelTrackingCycleDto.builder() + .id(rs.getLong("id")) + .labelSessionId(rs.getLong("label_session_id")) + .observedAt(rs.getString("observed_at")) + .candidateSnapshotObservedAt(rs.getString("candidate_snapshot_observed_at")) + .autoStatus(rs.getString("auto_status")) + .topCandidateMmsi(rs.getString("top_candidate_mmsi")) + .topCandidateName(rs.getString("top_candidate_name")) + .topCandidateScore(nullableDouble(rs, "top_candidate_score")) + .topCandidateMargin(nullableDouble(rs, "top_candidate_margin")) + .candidateCount(nullableInt(rs, "candidate_count")) + .labeledCandidatePresent(rs.getBoolean("labeled_candidate_present")) + .labeledCandidateRank(nullableInt(rs, "labeled_candidate_rank")) + .labeledCandidateScore(nullableDouble(rs, "labeled_candidate_score")) + .labeledCandidatePreBonusScore(nullableDouble(rs, "labeled_candidate_pre_bonus_score")) + .labeledCandidateMarginFromTop(nullableDouble(rs, "labeled_candidate_margin_from_top")) + .matchedTop1(rs.getBoolean("matched_top1")) + .matchedTop3(rs.getBoolean("matched_top3")) + .evidenceSummary(parseJsonObject(rs.getString("evidence_summary"))) + .build(); + } + + private Double nullableDouble(ResultSet rs, String column) throws SQLException { + Object value = rs.getObject(column); + if (value == null) { + return null; + } + return ((Number) value).doubleValue(); + } + + private Integer nullableInt(ResultSet rs, String column) throws SQLException { + Object value = rs.getObject(column); + if (value == null) { + return null; + } + return ((Number) value).intValue(); + } + + private Map parseJsonObject(String json) { + return parseJsonValue(json, new TypeReference>() {}, Map.of()); + } + + private T parseJsonValue(String json, TypeReference typeReference, T fallback) { + if (json == null) { + return fallback; + } + try { + return objectMapper.readValue(json, typeReference); + } catch (Exception e) { + log.warn("Failed to parse JSON payload: {}", e.getMessage()); + return fallback; + } + } + + private T parseJsonValue(String json, TypeReference typeReference) { + return parseJsonValue(json, typeReference, null); + } + + private String toJson(Object value) { + try { + return objectMapper.writeValueAsString(value); + } catch (Exception e) { + log.warn("Failed to serialize JSON payload: {}", e.getMessage()); + return "{}"; + } + } + + private String parentInferenceKey(String groupKey, int subClusterId) { + return groupKey + "#" + subClusterId; + } } diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentCandidateExclusionDto.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentCandidateExclusionDto.java new file mode 100644 index 0000000..9789d6c --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentCandidateExclusionDto.java @@ -0,0 +1,28 @@ +package gc.mda.kcg.domain.fleet; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.Builder; +import lombok.Getter; + +import java.util.Map; + +@Getter +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ParentCandidateExclusionDto { + private Long id; + private String scopeType; + private String groupKey; + private Integer subClusterId; + private String candidateMmsi; + private String reasonType; + private Integer durationDays; + private String activeFrom; + private String activeUntil; + private String releasedAt; + private String releasedBy; + private String actor; + private String comment; + private Boolean active; + private Map metadata; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceCandidateDto.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceCandidateDto.java new file mode 100644 index 0000000..4eea043 --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceCandidateDto.java @@ -0,0 +1,30 @@ +package gc.mda.kcg.domain.fleet; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.Builder; +import lombok.Getter; + +import java.util.Map; + +@Getter +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ParentInferenceCandidateDto { + private String candidateMmsi; + private String candidateName; + private Integer candidateVesselId; + private Integer rank; + private String candidateSource; + private Double finalScore; + private Double baseCorrScore; + private Double nameMatchScore; + private Double trackSimilarityScore; + private Double visitScore6h; + private Double proximityScore6h; + private Double activitySyncScore6h; + private Double stabilityScore; + private Double registryBonus; + private Double marginFromTop; + private Boolean trackAvailable; + private Map evidence; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceSummaryDto.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceSummaryDto.java new file mode 100644 index 0000000..509fa1a --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceSummaryDto.java @@ -0,0 +1,22 @@ +package gc.mda.kcg.domain.fleet; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.Builder; +import lombok.Getter; + +@Getter +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ParentInferenceSummaryDto { + private String status; + private String normalizedParentName; + private String selectedParentMmsi; + private String selectedParentName; + private Double confidence; + private String decisionSource; + private Double topScore; + private Double scoreMargin; + private Integer stableCycles; + private String skipReason; + private String statusReason; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceWorkflowController.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceWorkflowController.java new file mode 100644 index 0000000..031dcee --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceWorkflowController.java @@ -0,0 +1,95 @@ +package gc.mda.kcg.domain.fleet; + +import lombok.RequiredArgsConstructor; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.*; + +import java.util.List; +import java.util.Map; + +@RestController +@RequestMapping("/api/vessel-analysis/parent-inference") +@RequiredArgsConstructor +public class ParentInferenceWorkflowController { + + private final GroupPolygonService groupPolygonService; + + @GetMapping("/candidate-exclusions") + public ResponseEntity> getCandidateExclusions( + @RequestParam(required = false) String scopeType, + @RequestParam(required = false) String groupKey, + @RequestParam(required = false) Integer subClusterId, + @RequestParam(required = false) String candidateMmsi, + @RequestParam(defaultValue = "true") boolean activeOnly, + @RequestParam(defaultValue = "100") int limit) { + List items = groupPolygonService.getCandidateExclusions( + scopeType, + groupKey, + subClusterId, + candidateMmsi, + activeOnly, + limit + ); + return ResponseEntity.ok(Map.of("count", items.size(), "items", items)); + } + + @PostMapping("/candidate-exclusions/global") + public ResponseEntity createGlobalCandidateExclusion(@RequestBody GlobalParentCandidateExclusionRequest request) { + try { + return ResponseEntity.ok(groupPolygonService.createGlobalCandidateExclusion(request)); + } catch (IllegalArgumentException e) { + return ResponseEntity.badRequest().body(Map.of("error", e.getMessage())); + } + } + + @PostMapping("/candidate-exclusions/{exclusionId}/release") + public ResponseEntity releaseCandidateExclusion( + @PathVariable long exclusionId, + @RequestBody ParentWorkflowActionRequest request) { + try { + return ResponseEntity.ok(groupPolygonService.releaseCandidateExclusion(exclusionId, request)); + } catch (IllegalArgumentException e) { + return ResponseEntity.badRequest().body(Map.of("error", e.getMessage())); + } + } + + @GetMapping("/label-sessions") + public ResponseEntity> getLabelSessions( + @RequestParam(required = false) String groupKey, + @RequestParam(required = false) Integer subClusterId, + @RequestParam(required = false) String status, + @RequestParam(defaultValue = "true") boolean activeOnly, + @RequestParam(defaultValue = "100") int limit) { + List items = groupPolygonService.getLabelSessions( + groupKey, + subClusterId, + status, + activeOnly, + limit + ); + return ResponseEntity.ok(Map.of("count", items.size(), "items", items)); + } + + @PostMapping("/label-sessions/{labelSessionId}/cancel") + public ResponseEntity cancelLabelSession( + @PathVariable long labelSessionId, + @RequestBody ParentWorkflowActionRequest request) { + try { + return ResponseEntity.ok(groupPolygonService.cancelLabelSession(labelSessionId, request)); + } catch (IllegalArgumentException e) { + return ResponseEntity.badRequest().body(Map.of("error", e.getMessage())); + } + } + + @GetMapping("/label-sessions/{labelSessionId}/tracking") + public ResponseEntity> getLabelSessionTracking( + @PathVariable long labelSessionId, + @RequestParam(defaultValue = "200") int limit) { + List items = groupPolygonService.getLabelSessionTracking(labelSessionId, limit); + return ResponseEntity.ok(Map.of( + "labelSessionId", labelSessionId, + "count", items.size(), + "items", items + )); + } +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentLabelSessionDto.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentLabelSessionDto.java new file mode 100644 index 0000000..0eee8e9 --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentLabelSessionDto.java @@ -0,0 +1,31 @@ +package gc.mda.kcg.domain.fleet; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.Builder; +import lombok.Getter; + +import java.util.Map; + +@Getter +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ParentLabelSessionDto { + private Long id; + private String groupKey; + private Integer subClusterId; + private String labelParentMmsi; + private String labelParentName; + private Integer labelParentVesselId; + private Integer durationDays; + private String status; + private String activeFrom; + private String activeUntil; + private String actor; + private String comment; + private String anchorSnapshotTime; + private Double anchorCenterLat; + private Double anchorCenterLon; + private Integer anchorMemberCount; + private Boolean active; + private Map metadata; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentLabelTrackingCycleDto.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentLabelTrackingCycleDto.java new file mode 100644 index 0000000..c4d0089 --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentLabelTrackingCycleDto.java @@ -0,0 +1,31 @@ +package gc.mda.kcg.domain.fleet; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.Builder; +import lombok.Getter; + +import java.util.Map; + +@Getter +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +public class ParentLabelTrackingCycleDto { + private Long id; + private Long labelSessionId; + private String observedAt; + private String candidateSnapshotObservedAt; + private String autoStatus; + private String topCandidateMmsi; + private String topCandidateName; + private Double topCandidateScore; + private Double topCandidateMargin; + private Integer candidateCount; + private Boolean labeledCandidatePresent; + private Integer labeledCandidateRank; + private Double labeledCandidateScore; + private Double labeledCandidatePreBonusScore; + private Double labeledCandidateMarginFromTop; + private Boolean matchedTop1; + private Boolean matchedTop3; + private Map evidenceSummary; +} diff --git a/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentWorkflowActionRequest.java b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentWorkflowActionRequest.java new file mode 100644 index 0000000..45c85ce --- /dev/null +++ b/backend/src/main/java/gc/mda/kcg/domain/fleet/ParentWorkflowActionRequest.java @@ -0,0 +1,11 @@ +package gc.mda.kcg.domain.fleet; + +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +public class ParentWorkflowActionRequest { + private String actor; + private String comment; +} diff --git a/database/migration/012_gear_parent_inference.sql b/database/migration/012_gear_parent_inference.sql new file mode 100644 index 0000000..07dc4ba --- /dev/null +++ b/database/migration/012_gear_parent_inference.sql @@ -0,0 +1,176 @@ +-- 012: 어구 그룹 모선 추론 저장소 + sub_cluster/resolution 스키마 정합성 + +SET search_path TO kcg, public; + +-- ── live lab과 repo 마이그레이션 정합성 맞추기 ───────────────────── + +ALTER TABLE kcg.group_polygon_snapshots + ADD COLUMN IF NOT EXISTS sub_cluster_id SMALLINT NOT NULL DEFAULT 0; + +ALTER TABLE kcg.group_polygon_snapshots + ADD COLUMN IF NOT EXISTS resolution VARCHAR(20) NOT NULL DEFAULT '6h'; + +CREATE INDEX IF NOT EXISTS idx_gps_type_res_time + ON kcg.group_polygon_snapshots(group_type, resolution, snapshot_time DESC); + +CREATE INDEX IF NOT EXISTS idx_gps_key_res_time + ON kcg.group_polygon_snapshots(group_key, resolution, snapshot_time DESC); + +CREATE INDEX IF NOT EXISTS idx_gps_key_sub_time + ON kcg.group_polygon_snapshots(group_key, sub_cluster_id, snapshot_time DESC); + +ALTER TABLE kcg.gear_correlation_raw_metrics + ADD COLUMN IF NOT EXISTS sub_cluster_id SMALLINT NOT NULL DEFAULT 0; + +CREATE INDEX IF NOT EXISTS idx_raw_metrics_group_sub_time + ON kcg.gear_correlation_raw_metrics(group_key, sub_cluster_id, observed_at DESC); + +ALTER TABLE kcg.gear_correlation_scores + ADD COLUMN IF NOT EXISTS sub_cluster_id SMALLINT NOT NULL DEFAULT 0; + +ALTER TABLE kcg.gear_correlation_scores + DROP CONSTRAINT IF EXISTS gear_correlation_scores_model_id_group_key_target_mmsi_key; + +DROP INDEX IF EXISTS kcg.gear_correlation_scores_model_id_group_key_target_mmsi_key; + +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 + FROM pg_constraint + WHERE connamespace = 'kcg'::regnamespace + AND conrelid = 'kcg.gear_correlation_scores'::regclass + AND conname = 'gear_correlation_scores_unique' + ) THEN + ALTER TABLE kcg.gear_correlation_scores + ADD CONSTRAINT gear_correlation_scores_unique + UNIQUE (model_id, group_key, sub_cluster_id, target_mmsi); + END IF; +END; +$$ LANGUAGE plpgsql; + +CREATE INDEX IF NOT EXISTS idx_gc_model_group_sub + ON kcg.gear_correlation_scores(model_id, group_key, sub_cluster_id, current_score DESC); + +-- ── 그룹 단위 모선 추론 저장소 ───────────────────────────────────── + +CREATE TABLE IF NOT EXISTS kcg.gear_group_parent_candidate_snapshots ( + id BIGSERIAL PRIMARY KEY, + observed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + group_key VARCHAR(100) NOT NULL, + sub_cluster_id SMALLINT NOT NULL DEFAULT 0, + parent_name TEXT NOT NULL, + candidate_mmsi VARCHAR(20) NOT NULL, + candidate_name VARCHAR(200), + candidate_vessel_id INT REFERENCES kcg.fleet_vessels(id) ON DELETE SET NULL, + rank INT NOT NULL, + candidate_source VARCHAR(100) NOT NULL, + model_id INT REFERENCES kcg.correlation_param_models(id) ON DELETE SET NULL, + model_name VARCHAR(100), + base_corr_score DOUBLE PRECISION DEFAULT 0, + name_match_score DOUBLE PRECISION DEFAULT 0, + track_similarity_score DOUBLE PRECISION DEFAULT 0, + visit_score_6h DOUBLE PRECISION DEFAULT 0, + proximity_score_6h DOUBLE PRECISION DEFAULT 0, + activity_sync_score_6h DOUBLE PRECISION DEFAULT 0, + stability_score DOUBLE PRECISION DEFAULT 0, + registry_bonus DOUBLE PRECISION DEFAULT 0, + final_score DOUBLE PRECISION DEFAULT 0, + margin_from_top DOUBLE PRECISION DEFAULT 0, + evidence JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE (observed_at, group_key, sub_cluster_id, candidate_mmsi) +); + +CREATE INDEX IF NOT EXISTS idx_ggpcs_group_time + ON kcg.gear_group_parent_candidate_snapshots(group_key, sub_cluster_id, observed_at DESC, rank ASC); + +CREATE INDEX IF NOT EXISTS idx_ggpcs_candidate + ON kcg.gear_group_parent_candidate_snapshots(candidate_mmsi, observed_at DESC); + +CREATE TABLE IF NOT EXISTS kcg.gear_group_parent_resolution ( + group_key VARCHAR(100) NOT NULL, + sub_cluster_id SMALLINT NOT NULL DEFAULT 0, + parent_name TEXT NOT NULL, + normalized_parent_name VARCHAR(200), + status VARCHAR(40) NOT NULL, + selected_parent_mmsi VARCHAR(20), + selected_parent_name VARCHAR(200), + selected_vessel_id INT REFERENCES kcg.fleet_vessels(id) ON DELETE SET NULL, + confidence DOUBLE PRECISION, + decision_source VARCHAR(40), + top_score DOUBLE PRECISION DEFAULT 0, + second_score DOUBLE PRECISION DEFAULT 0, + score_margin DOUBLE PRECISION DEFAULT 0, + stable_cycles INT DEFAULT 0, + last_evaluated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + last_promoted_at TIMESTAMPTZ, + approved_by VARCHAR(100), + approved_at TIMESTAMPTZ, + manual_comment TEXT, + rejected_candidate_mmsi VARCHAR(20), + rejected_at TIMESTAMPTZ, + evidence_summary JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + PRIMARY KEY (group_key, sub_cluster_id) +); + +CREATE INDEX IF NOT EXISTS idx_ggpr_status + ON kcg.gear_group_parent_resolution(status, last_evaluated_at DESC); + +CREATE INDEX IF NOT EXISTS idx_ggpr_parent + ON kcg.gear_group_parent_resolution(selected_parent_mmsi); + +CREATE TABLE IF NOT EXISTS kcg.gear_group_parent_review_log ( + id BIGSERIAL PRIMARY KEY, + group_key VARCHAR(100) NOT NULL, + sub_cluster_id SMALLINT NOT NULL DEFAULT 0, + action VARCHAR(20) NOT NULL, + selected_parent_mmsi VARCHAR(20), + actor VARCHAR(100) NOT NULL, + comment TEXT, + payload JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX IF NOT EXISTS idx_ggprl_group_time + ON kcg.gear_group_parent_review_log(group_key, sub_cluster_id, created_at DESC); + +-- ── copied schema 환경의 sequence 정렬 ───────────────────────────── + +SELECT setval( + pg_get_serial_sequence('kcg.fleet_companies', 'id'), + COALESCE((SELECT MAX(id) FROM kcg.fleet_companies), 1), + TRUE +); + +SELECT setval( + pg_get_serial_sequence('kcg.fleet_vessels', 'id'), + COALESCE((SELECT MAX(id) FROM kcg.fleet_vessels), 1), + TRUE +); + +SELECT setval( + pg_get_serial_sequence('kcg.gear_identity_log', 'id'), + COALESCE((SELECT MAX(id) FROM kcg.gear_identity_log), 1), + TRUE +); + +SELECT setval( + pg_get_serial_sequence('kcg.fleet_tracking_snapshot', 'id'), + COALESCE((SELECT MAX(id) FROM kcg.fleet_tracking_snapshot), 1), + TRUE +); + +SELECT setval( + pg_get_serial_sequence('kcg.group_polygon_snapshots', 'id'), + COALESCE((SELECT MAX(id) FROM kcg.group_polygon_snapshots), 1), + TRUE +); + +SELECT setval( + pg_get_serial_sequence('kcg.gear_correlation_scores', 'id'), + COALESCE((SELECT MAX(id) FROM kcg.gear_correlation_scores), 1), + TRUE +); diff --git a/database/migration/013_short_parent_name_cleanup.sql b/database/migration/013_short_parent_name_cleanup.sql new file mode 100644 index 0000000..2622d49 --- /dev/null +++ b/database/migration/013_short_parent_name_cleanup.sql @@ -0,0 +1,23 @@ +SET search_path TO kcg, public; + +DELETE FROM kcg.gear_group_parent_candidate_snapshots +WHERE LENGTH(REGEXP_REPLACE(UPPER(group_key), '[ _%\\-]', '', 'g')) < 4; + +DELETE FROM kcg.gear_group_parent_review_log +WHERE LENGTH(REGEXP_REPLACE(UPPER(group_key), '[ _%\\-]', '', 'g')) < 4; + +DELETE FROM kcg.gear_group_parent_resolution +WHERE LENGTH(REGEXP_REPLACE(UPPER(group_key), '[ _%\\-]', '', 'g')) < 4; + +DELETE FROM kcg.gear_correlation_raw_metrics +WHERE LENGTH(REGEXP_REPLACE(UPPER(group_key), '[ _%\\-]', '', 'g')) < 4; + +DELETE FROM kcg.gear_correlation_scores +WHERE LENGTH(REGEXP_REPLACE(UPPER(group_key), '[ _%\\-]', '', 'g')) < 4; + +DELETE FROM kcg.group_polygon_snapshots +WHERE group_type IN ('GEAR_IN_ZONE', 'GEAR_OUT_ZONE') + AND LENGTH(REGEXP_REPLACE(UPPER(group_key), '[ _%\\-]', '', 'g')) < 4; + +DELETE FROM kcg.gear_identity_log +WHERE LENGTH(REGEXP_REPLACE(UPPER(COALESCE(parent_name, name)), '[ _%\\-]', '', 'g')) < 4; diff --git a/database/migration/014_gear_parent_workflow_v2_phase1.sql b/database/migration/014_gear_parent_workflow_v2_phase1.sql new file mode 100644 index 0000000..18f848a --- /dev/null +++ b/database/migration/014_gear_parent_workflow_v2_phase1.sql @@ -0,0 +1,125 @@ +-- 014: 어구 모선 검토 워크플로우 v2 phase 1 + +SET search_path TO kcg, public; + +-- ── 그룹/전역 후보 제외 ─────────────────────────────────────────── + +CREATE TABLE IF NOT EXISTS kcg.gear_parent_candidate_exclusions ( + id BIGSERIAL PRIMARY KEY, + scope_type VARCHAR(16) NOT NULL, + group_key VARCHAR(100), + sub_cluster_id SMALLINT, + candidate_mmsi VARCHAR(20) NOT NULL, + reason_type VARCHAR(32) NOT NULL, + duration_days INT, + active_from TIMESTAMPTZ NOT NULL DEFAULT NOW(), + active_until TIMESTAMPTZ, + released_at TIMESTAMPTZ, + released_by VARCHAR(100), + actor VARCHAR(100) NOT NULL, + comment TEXT, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT chk_gpce_scope CHECK (scope_type IN ('GROUP', 'GLOBAL')), + CONSTRAINT chk_gpce_reason CHECK (reason_type IN ('GROUP_WRONG_PARENT', 'GLOBAL_NOT_PARENT_TARGET')), + CONSTRAINT chk_gpce_group_scope CHECK ( + (scope_type = 'GROUP' AND group_key IS NOT NULL AND sub_cluster_id IS NOT NULL AND duration_days IN (1, 3, 5) AND active_until IS NOT NULL) + OR + (scope_type = 'GLOBAL' AND duration_days IS NULL) + ) +); + +CREATE INDEX IF NOT EXISTS idx_gpce_scope_mmsi_active + ON kcg.gear_parent_candidate_exclusions(scope_type, candidate_mmsi, active_from DESC) + WHERE released_at IS NULL; + +CREATE INDEX IF NOT EXISTS idx_gpce_group_active + ON kcg.gear_parent_candidate_exclusions(group_key, sub_cluster_id, active_from DESC) + WHERE released_at IS NULL; + +CREATE INDEX IF NOT EXISTS idx_gpce_active_until + ON kcg.gear_parent_candidate_exclusions(active_until); + +-- ── 기간형 정답 라벨 세션 ──────────────────────────────────────── + +CREATE TABLE IF NOT EXISTS kcg.gear_parent_label_sessions ( + id BIGSERIAL PRIMARY KEY, + group_key VARCHAR(100) NOT NULL, + sub_cluster_id SMALLINT NOT NULL, + label_parent_mmsi VARCHAR(20) NOT NULL, + label_parent_name VARCHAR(200), + label_parent_vessel_id INT REFERENCES kcg.fleet_vessels(id) ON DELETE SET NULL, + duration_days INT NOT NULL, + active_from TIMESTAMPTZ NOT NULL DEFAULT NOW(), + active_until TIMESTAMPTZ NOT NULL, + status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE', + actor VARCHAR(100) NOT NULL, + comment TEXT, + anchor_snapshot_time TIMESTAMPTZ, + anchor_center_point geometry(Point, 4326), + anchor_member_mmsis JSONB NOT NULL DEFAULT '[]'::jsonb, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT chk_gpls_duration CHECK (duration_days IN (1, 3, 5)), + CONSTRAINT chk_gpls_status CHECK (status IN ('ACTIVE', 'EXPIRED', 'CANCELLED')) +); + +CREATE INDEX IF NOT EXISTS idx_gpls_group_active + ON kcg.gear_parent_label_sessions(group_key, sub_cluster_id, active_from DESC) + WHERE status = 'ACTIVE'; + +CREATE INDEX IF NOT EXISTS idx_gpls_mmsi_active + ON kcg.gear_parent_label_sessions(label_parent_mmsi, active_from DESC) + WHERE status = 'ACTIVE'; + +CREATE INDEX IF NOT EXISTS idx_gpls_active_until + ON kcg.gear_parent_label_sessions(active_until); + +-- ── 라벨 세션 기간 중 cycle별 자동 추론 기록 ───────────────────── + +CREATE TABLE IF NOT EXISTS kcg.gear_parent_label_tracking_cycles ( + id BIGSERIAL PRIMARY KEY, + label_session_id BIGINT NOT NULL REFERENCES kcg.gear_parent_label_sessions(id) ON DELETE CASCADE, + observed_at TIMESTAMPTZ NOT NULL, + candidate_snapshot_observed_at TIMESTAMPTZ, + auto_status VARCHAR(40), + top_candidate_mmsi VARCHAR(20), + top_candidate_name VARCHAR(200), + top_candidate_score DOUBLE PRECISION, + top_candidate_margin DOUBLE PRECISION, + candidate_count INT NOT NULL DEFAULT 0, + labeled_candidate_present BOOLEAN NOT NULL DEFAULT FALSE, + labeled_candidate_rank INT, + labeled_candidate_score DOUBLE PRECISION, + labeled_candidate_pre_bonus_score DOUBLE PRECISION, + labeled_candidate_margin_from_top DOUBLE PRECISION, + matched_top1 BOOLEAN NOT NULL DEFAULT FALSE, + matched_top3 BOOLEAN NOT NULL DEFAULT FALSE, + evidence_summary JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT uq_gpltc_session_observed UNIQUE (label_session_id, observed_at) +); + +CREATE INDEX IF NOT EXISTS idx_gpltc_session_observed + ON kcg.gear_parent_label_tracking_cycles(label_session_id, observed_at DESC); + +CREATE INDEX IF NOT EXISTS idx_gpltc_top_candidate + ON kcg.gear_parent_label_tracking_cycles(top_candidate_mmsi); + +-- ── active view ──────────────────────────────────────────────── + +CREATE OR REPLACE VIEW kcg.vw_active_gear_parent_candidate_exclusions AS +SELECT * +FROM kcg.gear_parent_candidate_exclusions +WHERE released_at IS NULL + AND active_from <= NOW() + AND (active_until IS NULL OR active_until > NOW()); + +CREATE OR REPLACE VIEW kcg.vw_active_gear_parent_label_sessions AS +SELECT * +FROM kcg.gear_parent_label_sessions +WHERE status = 'ACTIVE' + AND active_from <= NOW() + AND active_until > NOW(); diff --git a/database/migration/015_gear_parent_episode_tracking.sql b/database/migration/015_gear_parent_episode_tracking.sql new file mode 100644 index 0000000..3cdefa9 --- /dev/null +++ b/database/migration/015_gear_parent_episode_tracking.sql @@ -0,0 +1,111 @@ +-- 015: 어구 모선 추론 episode continuity + prior bonus + +SET search_path TO kcg, public; + +ALTER TABLE kcg.gear_group_parent_candidate_snapshots + ADD COLUMN IF NOT EXISTS normalized_parent_name VARCHAR(200); + +ALTER TABLE kcg.gear_group_parent_candidate_snapshots + ADD COLUMN IF NOT EXISTS episode_id VARCHAR(64); + +ALTER TABLE kcg.gear_group_parent_candidate_snapshots + ADD COLUMN IF NOT EXISTS episode_prior_bonus DOUBLE PRECISION NOT NULL DEFAULT 0; + +ALTER TABLE kcg.gear_group_parent_candidate_snapshots + ADD COLUMN IF NOT EXISTS lineage_prior_bonus DOUBLE PRECISION NOT NULL DEFAULT 0; + +ALTER TABLE kcg.gear_group_parent_candidate_snapshots + ADD COLUMN IF NOT EXISTS label_prior_bonus DOUBLE PRECISION NOT NULL DEFAULT 0; + +UPDATE kcg.gear_group_parent_candidate_snapshots + SET normalized_parent_name = regexp_replace(upper(COALESCE(parent_name, '')), '[[:space:]_%-]+', '', 'g') + WHERE normalized_parent_name IS NULL; + +CREATE INDEX IF NOT EXISTS idx_ggpcs_episode_time + ON kcg.gear_group_parent_candidate_snapshots(episode_id, observed_at DESC); + +CREATE INDEX IF NOT EXISTS idx_ggpcs_lineage_time + ON kcg.gear_group_parent_candidate_snapshots(normalized_parent_name, observed_at DESC); + +ALTER TABLE kcg.gear_group_parent_resolution + ADD COLUMN IF NOT EXISTS episode_id VARCHAR(64); + +ALTER TABLE kcg.gear_group_parent_resolution + ADD COLUMN IF NOT EXISTS continuity_source VARCHAR(32); + +ALTER TABLE kcg.gear_group_parent_resolution + ADD COLUMN IF NOT EXISTS continuity_score DOUBLE PRECISION; + +ALTER TABLE kcg.gear_group_parent_resolution + ADD COLUMN IF NOT EXISTS prior_bonus_total DOUBLE PRECISION NOT NULL DEFAULT 0; + +CREATE INDEX IF NOT EXISTS idx_ggpr_episode + ON kcg.gear_group_parent_resolution(episode_id); + +ALTER TABLE kcg.gear_parent_label_sessions + ADD COLUMN IF NOT EXISTS normalized_parent_name VARCHAR(200); + +UPDATE kcg.gear_parent_label_sessions + SET normalized_parent_name = regexp_replace(upper(COALESCE(group_key, '')), '[[:space:]_%-]+', '', 'g') + WHERE normalized_parent_name IS NULL; + +CREATE INDEX IF NOT EXISTS idx_gpls_lineage_active + ON kcg.gear_parent_label_sessions(normalized_parent_name, active_from DESC); + +CREATE TABLE IF NOT EXISTS kcg.gear_group_episodes ( + episode_id VARCHAR(64) PRIMARY KEY, + lineage_key VARCHAR(200) NOT NULL, + group_key VARCHAR(100) NOT NULL, + normalized_parent_name VARCHAR(200) NOT NULL, + current_sub_cluster_id SMALLINT NOT NULL DEFAULT 0, + status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE', + continuity_source VARCHAR(32) NOT NULL DEFAULT 'NEW', + continuity_score DOUBLE PRECISION, + first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + last_snapshot_time TIMESTAMPTZ NOT NULL DEFAULT NOW(), + current_member_count INT NOT NULL DEFAULT 0, + current_member_mmsis JSONB NOT NULL DEFAULT '[]'::jsonb, + current_center_point geometry(Point, 4326), + split_from_episode_id VARCHAR(64), + merged_from_episode_ids JSONB NOT NULL DEFAULT '[]'::jsonb, + merged_into_episode_id VARCHAR(64), + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT chk_gge_status CHECK (status IN ('ACTIVE', 'MERGED', 'EXPIRED')), + CONSTRAINT chk_gge_continuity CHECK (continuity_source IN ('NEW', 'CONTINUED', 'SPLIT_CONTINUE', 'SPLIT_NEW', 'MERGE_NEW', 'DIRECT_PARENT_MATCH')) +); + +CREATE INDEX IF NOT EXISTS idx_gge_lineage_status_time + ON kcg.gear_group_episodes(lineage_key, status, last_snapshot_time DESC); + +CREATE INDEX IF NOT EXISTS idx_gge_group_time + ON kcg.gear_group_episodes(group_key, current_sub_cluster_id, last_snapshot_time DESC); + +CREATE TABLE IF NOT EXISTS kcg.gear_group_episode_snapshots ( + id BIGSERIAL PRIMARY KEY, + episode_id VARCHAR(64) NOT NULL REFERENCES kcg.gear_group_episodes(episode_id) ON DELETE CASCADE, + lineage_key VARCHAR(200) NOT NULL, + group_key VARCHAR(100) NOT NULL, + normalized_parent_name VARCHAR(200) NOT NULL, + sub_cluster_id SMALLINT NOT NULL DEFAULT 0, + observed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + member_count INT NOT NULL DEFAULT 0, + member_mmsis JSONB NOT NULL DEFAULT '[]'::jsonb, + center_point geometry(Point, 4326), + continuity_source VARCHAR(32) NOT NULL, + continuity_score DOUBLE PRECISION, + parent_episode_ids JSONB NOT NULL DEFAULT '[]'::jsonb, + top_candidate_mmsi VARCHAR(20), + top_candidate_score DOUBLE PRECISION, + resolution_status VARCHAR(40), + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT uq_gges_episode_observed UNIQUE (episode_id, observed_at) +); + +CREATE INDEX IF NOT EXISTS idx_gges_lineage_observed + ON kcg.gear_group_episode_snapshots(lineage_key, observed_at DESC); + +CREATE INDEX IF NOT EXISTS idx_gges_group_observed + ON kcg.gear_group_episode_snapshots(group_key, sub_cluster_id, observed_at DESC); diff --git a/docs/GEAR-PARENT-INFERENCE-ALGORITHM-SPEC.md b/docs/GEAR-PARENT-INFERENCE-ALGORITHM-SPEC.md new file mode 100644 index 0000000..051e2ed --- /dev/null +++ b/docs/GEAR-PARENT-INFERENCE-ALGORITHM-SPEC.md @@ -0,0 +1,514 @@ +# Gear Parent Inference Algorithm Spec + +## 문서 목적 + +이 문서는 현재 구현된 어구 모선 추적 알고리즘을 모듈, 메서드, 파라미터, 판단 기준, 저장소, 엔드포인트, 영향 관계 기준으로 정리한 구현 명세다. `GEAR-PARENT-INFERENCE-DATAFLOW-PAPER.md`가 서술형 통합 문서라면, 이 문서는 구현과 후속 변경 작업에 바로 연결할 수 있는 참조 스펙이다. + +## 1. 시스템 요약 + +### 1.1 현재 목적 + +- 최근 24시간 한국 수역 AIS를 캐시에 유지한다. +- 어구 이름 패턴과 위치를 기준으로 어구 그룹을 만든다. +- 주변 선박/오분류 어구를 correlation 후보로 평가한다. +- 후보 중 대표 모선 가능성이 높은 선박을 추론한다. +- 사람의 라벨/제외를 별도 저장소에 남겨 향후 모델 평가와 자동화 전환에 활용한다. + +### 1.2 현재 점수 구조의 역할 구분 + +- `gear_correlation_scores.current_score` + - 후보 스크리닝용 correlation score + - EMA 기반 단기 메모리 +- `gear_group_parent_candidate_snapshots.final_score` + - 모선 추론용 최종 후보 점수 + - coverage-aware 보정과 이름/안정성/episode/lineage/label prior 반영 +- `gear_group_parent_resolution` + - 그룹별 현재 추론 상태 +- `gear_group_episodes`, `gear_group_episode_snapshots` + - `sub_cluster_id`와 분리된 continuity memory +- `gear_parent_label_tracking_cycles` + - 라벨 세션 동안의 자동 추론 성능 추적 + +## 2. 현재 DB 저장소와 유지 기간 + +| 저장소 | 역할 | 현재 유지 규칙 | +| --- | --- | --- | +| `group_polygon_snapshots` | `1h/1h-fb/6h` 그룹 스냅샷 | `7일` cleanup | +| `gear_correlation_raw_metrics` | correlation raw metric 시계열 | `7일` retention partition | +| `gear_correlation_scores` | correlation EMA score 현재 상태 | `30일` 미관측 시 cleanup | +| `gear_group_parent_candidate_snapshots` | cycle별 parent candidate snapshot | 현재 자동 cleanup 없음 | +| `gear_group_parent_resolution` | 그룹별 현재 추론 상태 1행 | 현재 자동 cleanup 없음 | +| `gear_group_episodes` | active/merged/expired episode 현재 상태 | 현재 자동 cleanup 없음 | +| `gear_group_episode_snapshots` | cycle별 episode continuity 스냅샷 | 현재 자동 cleanup 없음 | +| `gear_parent_candidate_exclusions` | 그룹/전역 후보 제외 | 기간 종료 또는 수동 해제까지 | +| `gear_parent_label_sessions` | 정답 라벨 세션 | 만료 시 `EXPIRED`, row는 유지 | +| `gear_parent_label_tracking_cycles` | 라벨 세션 cycle별 추적 | 현재 자동 cleanup 없음 | + +## 3. 모듈 인덱스 + +### 3.1 시간/원천 적재 + +| 모듈 | 메서드 | 역할 | +| --- | --- | --- | +| `prediction/time_bucket.py` | `compute_safe_bucket()` | DB 적재 완료 전 bucket 차단 | +| `prediction/time_bucket.py` | `compute_initial_window_start()` | 초기 24h window 시작점 | +| `prediction/time_bucket.py` | `compute_incremental_window_start()` | overlap backfill 시작점 | +| `prediction/db/snpdb.py` | `fetch_all_tracks()` | safe bucket까지 초기 bulk 적재 | +| `prediction/db/snpdb.py` | `fetch_incremental()` | backfill 포함 증분 적재 | +| `prediction/cache/vessel_store.py` | `load_initial()` | 초기 메모리 캐시 구성 | +| `prediction/cache/vessel_store.py` | `merge_incremental()` | 증분 merge + dedupe | +| `prediction/cache/vessel_store.py` | `evict_stale()` | 24h sliding window 유지 | + +### 3.2 어구 identity / 그룹 + +| 모듈 | 메서드 | 역할 | +| --- | --- | --- | +| `prediction/fleet_tracker.py` | `track_gear_identity()` | 어구 이름 파싱, identity log 관리 | +| `prediction/algorithms/gear_name_rules.py` | `normalize_parent_name()` | 모선명 정규화 | +| `prediction/algorithms/gear_name_rules.py` | `is_trackable_parent_name()` | 짧은 이름 제외 | +| `prediction/algorithms/polygon_builder.py` | `detect_gear_groups()` | 어구 그룹 및 서브클러스터 생성 | +| `prediction/algorithms/polygon_builder.py` | `build_all_group_snapshots()` | `1h/1h-fb/6h` 스냅샷 저장용 생성 | + +### 3.3 correlation / parent inference + +| 모듈 | 메서드 | 역할 | +| --- | --- | --- | +| `prediction/algorithms/gear_correlation.py` | `run_gear_correlation()` | raw metric + EMA score 계산 | +| `prediction/algorithms/gear_correlation.py` | `_compute_gear_vessel_metrics()` | proximity/visit/activity 계산 | +| `prediction/algorithms/gear_correlation.py` | `update_score()` | EMA + freeze/decay 상태 전이 | +| `prediction/algorithms/gear_parent_episode.py` | `build_episode_plan()` | continuity source와 episode assignment 계산 | +| `prediction/algorithms/gear_parent_episode.py` | `compute_prior_bonus_components()` | episode/lineage/label prior bonus 계산 | +| `prediction/algorithms/gear_parent_episode.py` | `sync_episode_states()` | `gear_group_episodes` upsert | +| `prediction/algorithms/gear_parent_episode.py` | `insert_episode_snapshots()` | episode snapshot append | +| `prediction/algorithms/gear_parent_inference.py` | `run_gear_parent_inference()` | 최종 모선 추론 실행 | +| `prediction/algorithms/gear_parent_inference.py` | `_build_candidate_scores()` | 후보별 상세 점수 계산 | +| `prediction/algorithms/gear_parent_inference.py` | `_name_match_score()` | 이름 점수 규칙 | +| `prediction/algorithms/gear_parent_inference.py` | `_build_track_coverage_metrics()` | coverage-aware evidence 계산 | +| `prediction/algorithms/gear_parent_inference.py` | `_select_status()` | 상태 전이 규칙 | + +### 3.4 backend read model / workflow + +| 모듈 | 메서드 | 역할 | +| --- | --- | --- | +| `GroupPolygonService.java` | group list/review/detail SQL | 최신 `1h` live + stale suppression read model | +| `ParentInferenceWorkflowController.java` | exclusion/label API | 사람 판단 저장소 API | + +## 4. 메서드 상세 + +## 4.1 `prediction/time_bucket.py` + +### `compute_safe_bucket(now: datetime | None = None) -> datetime` + +- 입력: + - 현재 시각 +- 출력: + - `safe_delay`를 뺀 뒤 5분 단위로 내림한 KST naive bucket +- 기준: + - `SNPDB_SAFE_DELAY_MIN` +- 영향: + - 초기 적재, 증분 적재, eviction 기준점 + +### `compute_incremental_window_start(last_bucket: datetime) -> datetime` + +- 입력: + - 현재 캐시의 마지막 처리 bucket +- 출력: + - `last_bucket - SNPDB_BACKFILL_BUCKETS * 5m` +- 의미: + - 늦게 들어온 같은 bucket row 재흡수 + +## 4.2 `prediction/db/snpdb.py` + +### `fetch_all_tracks(hours: int = 24) -> pd.DataFrame` + +- 역할: + - safe bucket까지 최근 `N`시간 full load +- 핵심 쿼리 조건: + - bbox: `122,31,132,39` + - `time_bucket > window_start` + - `time_bucket <= safe_bucket` +- 출력 컬럼: + - `mmsi`, `timestamp`, `time_bucket`, `lat`, `lon`, `raw_sog` + +### `fetch_incremental(last_bucket: datetime) -> pd.DataFrame` + +- 역할: + - overlap backfill 포함 증분 load +- 핵심 쿼리 조건: + - `time_bucket > from_bucket` + - `time_bucket <= safe_bucket` +- 주의: + - 이미 본 bucket도 일부 다시 읽는 구조다 + +## 4.3 `prediction/cache/vessel_store.py` + +### `load_initial(hours: int = 24) -> None` + +- 역할: + - 초기 bulk DataFrame을 MMSI별 track cache로 구성 +- 파생 효과: + - `_last_bucket` 갱신 + - static info refresh + - permit registry refresh + +### `merge_incremental(df_new: pd.DataFrame) -> None` + +- 역할: + - 증분 batch merge +- 기준: + - `timestamp`, `time_bucket` 정렬 + - `timestamp` 기준 dedupe +- 영향: + - 같은 bucket overlap backfill에서도 최종 row만 유지 + +### `evict_stale(hours: int = 24) -> None` + +- 역할: + - sliding 24h 유지 +- 기준: + - `time_bucket` 있으면 bucket 기준 + - 없으면 timestamp fallback + +## 4.4 `prediction/fleet_tracker.py` + +### `track_gear_identity(gear_signals, conn) -> None` + +- 역할: + - 어구 이름 패턴에서 `parent_name`, `gear_index_1`, `gear_index_2` 추출 + - `gear_identity_log` insert/update +- 입력: + - gear signal list +- 주요 기준: + - 정규화 길이 `< 4`면 건너뜀 + - 같은 이름, 다른 MMSI는 identity migration 처리 +- 영향: + - `gear_correlation_scores.target_mmsi`를 새 MMSI로 이전 가능 + +## 4.5 `prediction/algorithms/polygon_builder.py` + +### `detect_gear_groups(vessel_store) -> list[dict]` + +- 역할: + - 어구 이름 기반 raw group 생성 + - 거리 기반 서브클러스터 분리 + - 근접 병합 +- 입력: + - `all_positions` +- 주요 기준: + - 어구 패턴 매칭 + - `440/441` 제외 + - `is_trackable_parent_name()` + - `MAX_DIST_DEG = 0.15` +- 출력: + - `parent_name`, `parent_mmsi`, `sub_cluster_id`, `members` + +### `build_all_group_snapshots(vessel_store, company_vessels, companies) -> list[dict]` + +- 역할: + - `FLEET`, `GEAR_IN_ZONE`, `GEAR_OUT_ZONE`의 `1h/1h-fb/6h` snapshot 생성 +- 주요 기준: + - 같은 `parent_name` 전체 기준 1h active member 수 + - `GEAR_OUT_ZONE` 최소 멤버 수 + - parent nearby 시 `isParent=true` + +## 4.6 `prediction/algorithms/gear_correlation.py` + +### `run_gear_correlation(vessel_store, gear_groups, conn) -> dict` + +- 역할: + - 그룹당 후보 탐색 + - raw metric 저장 + - EMA score 갱신 +- 입력: + - `gear_groups` +- 출력: + - `updated`, `models`, `raw_inserted` + +### `_compute_gear_vessel_metrics(gear_center_lat, gear_center_lon, gear_radius_nm, vessel_track, params) -> dict` + +- 출력 metric: + - `proximity_ratio` + - `visit_score` + - `activity_sync` + - `composite` +- 한계: + - raw metric은 짧은 항적에 과대 우호적일 수 있음 + - 이 문제는 parent inference 단계의 coverage-aware 보정에서 완화 + +### `update_score(prev_score, raw_score, streak, last_observed, now, gear_group_active_ratio, shadow_bonus, params) -> tuple` + +- 상태: + - `ACTIVE` + - `PATTERN_DIVERGE` + - `GROUP_QUIET` + - `NORMAL_GAP` + - `SIGNAL_LOSS` +- 의미: + - correlation score는 장기 기억보다 short-memory EMA에 가깝다 + +## 4.7 `prediction/algorithms/gear_parent_inference.py` + +### `run_gear_parent_inference(vessel_store, gear_groups, conn) -> dict[str, int]` + +- 역할: + - direct parent 보강 + - active exclusion/label 적용 + - 후보 점수 계산 + - 상태 전이 + - snapshot/resolution/tracking 저장 + +### `_load_existing_resolution(conn, group_keys) -> dict` + +- 역할: + - 현재 그룹의 이전 resolution 상태 로드 +- 현재 쓰임: + - `PREVIOUS_SELECTION` 후보 seed + - `stable_cycles` + - `MANUAL_CONFIRMED` 유지 + - reject cooldown + +### `_build_candidate_scores(...) -> list[CandidateScore]` + +- 후보 집합 원천: + - 상위 correlation 후보 + - registry name exact bucket + - previous selection +- 제거 규칙: + - global exclusion + - group exclusion + - reject cooldown +- 점수 항목: + - `base_corr_score` + - `name_match_score` + - `track_similarity_score` + - `visit_score_6h` + - `proximity_score_6h` + - `activity_sync_score_6h` + - `stability_score` + - `registry_bonus` + - `china_mmsi_bonus` 후가산 + +### `_name_match_score(parent_name, candidate_name, registry) -> float` + +- 규칙: + - 원문 동일 `1.0` + - 정규화 동일 `0.8` + - prefix/contains `0.5` + - 숫자 제거 후 문자 부분 동일 `0.3` + - else `0.0` + +### `_build_track_coverage_metrics(center_track, vessel_track, gear_center_lat, gear_center_lon) -> dict` + +- 역할: + - short-track 과대평가 방지용 증거 강도 계산 +- 핵심 출력: + - `trackCoverageFactor` + - `visitCoverageFactor` + - `activityCoverageFactor` + - `coverageFactor` +- downstream: + - `track`, `visit`, `proximity`, `activity` raw score에 곱해 effective score 생성 + +## 4.8 `prediction/algorithms/gear_parent_episode.py` + +### `build_episode_plan(groups, previous_by_lineage) -> EpisodePlan` + +- 역할: + - 현재 cycle group을 이전 active episode와 매칭 + - `NEW`, `CONTINUED`, `SPLIT_CONTINUE`, `SPLIT_NEW`, `MERGE_NEW` 결정 +- 입력: + - `GroupEpisodeInput[]` + - 최근 `6h` active `EpisodeState[]` +- continuity score: + - `0.75 * member_jaccard + 0.25 * center_support` +- 기준: + - `member_jaccard` + - 중심점 거리 `12nm` + - continuity score threshold `0.45` + - merge score threshold `0.35` +- 출력: + - assignment map + - expired episode set + - merged target map + +### `compute_prior_bonus_components(...) -> dict[str, float]` + +- 역할: + - 동일 candidate에 대한 episode/lineage/label prior bonus 계산 +- 입력 집계 범위: + - episode prior: `24h` + - lineage prior: `7d` + - label prior: `30d` +- cap: + - `episode <= 0.10` + - `lineage <= 0.05` + - `label <= 0.10` + - `total <= 0.20` +- 출력: + - `episodePriorBonus` + - `lineagePriorBonus` + - `labelPriorBonus` + - `priorBonusTotal` + +### `sync_episode_states(conn, observed_at, plan) -> None` + +- 역할: + - active/merged/expired episode 상태를 `gear_group_episodes`에 반영 +- 기준: + - merge 대상은 `MERGED` + - continuity 없는 old episode는 `EXPIRED` + +### `insert_episode_snapshots(conn, observed_at, plan, payloads) -> int` + +- 역할: + - cycle별 continuity 결과와 top candidate/result를 `gear_group_episode_snapshots`에 저장 +- 기록: + - `episode_id` + - `parent_episode_ids` + - `top_candidate_mmsi` + - `top_candidate_score` + - `resolution_status` + +### `_select_status(top_candidate, margin, stable_cycles) -> tuple[str, str]` + +- 상태: + - `NO_CANDIDATE` + - `AUTO_PROMOTED` + - `REVIEW_REQUIRED` + - `UNRESOLVED` +- auto promotion 조건: + - `target_type == VESSEL` + - `CORRELATION` source 포함 + - `final_score >= 0.72` + - `margin >= 0.15` + - `stable_cycles >= 3` +- review 조건: + - `final_score >= 0.60` + +## 5. 현재 엔드포인트 스펙 + +## 5.1 조회 계열 + +### `/api/kcg/vessel-analysis/groups/parent-inference/review` + +- 역할: + - 최신 전역 `1h` 기준 검토 대기 목록 +- 조건: + - stale resolution 숨김 + - candidate count는 latest candidate snapshot 기준 + +### `/api/kcg/vessel-analysis/groups/{groupKey}/parent-inference` + +- 역할: + - 특정 그룹의 현재 live sub-cluster 상세 +- 주의: + - “현재 최신 전역 `1h`에 실제 존재하는 sub-cluster만” 반환 + +### `/api/kcg/vessel-analysis/parent-inference/candidate-exclusions` + +- 역할: + - 그룹/전역 제외 목록 조회 + +### `/api/kcg/vessel-analysis/parent-inference/label-sessions` + +- 역할: + - active 또는 전체 라벨 세션 조회 + +## 5.2 액션 계열 + +### `POST /candidate-exclusions/global` + +- 역할: + - 전역 후보 제외 생성 +- 영향: + - 다음 cycle부터 모든 그룹에서 해당 MMSI 제거 + +### `POST /groups/{groupKey}/parent-inference/{subClusterId}/exclude` + +- 역할: + - 그룹 단위 후보 제외 생성 +- 영향: + - 다음 cycle부터 해당 그룹에서만 제거 + +### `POST /groups/{groupKey}/parent-inference/{subClusterId}/label` + +- 역할: + - 기간형 정답 라벨 세션 생성 +- 영향: + - 다음 cycle부터 tracking row 누적 + +## 6. 현재 기억 구조와 prior bonus + +### 6.1 short-memory와 long-memory의 분리 + +- `gear_correlation_scores` + - EMA short-memory + - 미관측 시 decay + - 현재 후보 seed 역할 +- `gear_group_parent_resolution` + - 현재 상태 1행 + - same-episode가 아니면 `PREVIOUS_SELECTION` carry를 직접 사용하지 않음 +- `gear_group_episodes` + - continuity memory +- `candidate_snapshots` + - bonus 집계 원천 + +### 6.2 현재 final score의 장기 기억 반영 + +현재는 과거 점수를 직접 carry하지 않고, 약한 prior bonus만 후가산한다. + +```text +final_score = + current_signal_score + + china_mmsi_bonus + + prior_bonus_total +``` + +여기서 `prior_bonus_total`은: + +- `episode_prior_bonus` +- `lineage_prior_bonus` +- `label_prior_bonus` + +의 합이며 총합 cap은 `0.20`이다. + +### 6.3 왜 weak prior인가 + +과거 점수를 그대로 넘기면: + +- 다른 episode로 잘못 관성이 전이될 수 있다 +- split/merge 이후 잘못된 top1이 고착될 수 있다 +- 오래된 오답이 장기 drift로 남을 수 있다 + +그래서 현재 구현은 과거 점수를 “현재 score 자체”가 아니라 “작은 bonus”로만 쓴다. + +## 7. 현재 continuity / prior 동작 + +### 7.1 episode continuity + +- 같은 lineage 안에서 최근 `6h` active episode를 불러온다. +- continuity score가 높은 이전 episode가 있으면 `CONTINUED` +- 1개 parent episode가 여러 current cluster로 이어지면 `SPLIT_CONTINUE` + `SPLIT_NEW` +- 여러 previous episode가 하나 current cluster로 모이면 `MERGE_NEW` +- 어떤 current와도 연결되지 못한 old episode는 `EXPIRED` + +### 7.2 prior 집계 + +| prior | 참조 범위 | 현재 집계 값 | +| --- | --- | --- | +| episode prior | 최근 동일 episode `24h` | seen_count, top1_count, avg_score, last_seen_at | +| lineage prior | 동일 이름 lineage `7d` | seen_count, top1_count, top3_count, avg_score, last_seen_at | +| label prior | 라벨 세션 `30d` | session_count, last_labeled_at | + +### 7.3 구현 시 주의 + +- 과거 점수를 직접 누적하지 말 것 +- prior는 bonus로만 사용하고 cap을 둘 것 +- split/merge 이후 parent 후보 관성은 약하게만 상속할 것 +- stale live sub-cluster와 vanished old sub-cluster를 혼동하지 않도록, aggregation도 최신 episode anchor를 기준으로 할 것 + +## 8. 참조 문서 + +- [GEAR-PARENT-INFERENCE-DATAFLOW-PAPER.md](/Users/lht/work/devProjects/iran-airstrike-replay-codex/docs/GEAR-PARENT-INFERENCE-DATAFLOW-PAPER.md) +- [GEAR-PARENT-INFERENCE-WORKFLOW-V2.md](/Users/lht/work/devProjects/iran-airstrike-replay-codex/docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2.md) +- [GEAR-PARENT-INFERENCE-WORKFLOW-V2-PHASE1.md](/Users/lht/work/devProjects/iran-airstrike-replay-codex/docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2-PHASE1.md) diff --git a/docs/GEAR-PARENT-INFERENCE-DATAFLOW-PAPER.md b/docs/GEAR-PARENT-INFERENCE-DATAFLOW-PAPER.md new file mode 100644 index 0000000..ec29c2f --- /dev/null +++ b/docs/GEAR-PARENT-INFERENCE-DATAFLOW-PAPER.md @@ -0,0 +1,677 @@ +# Gear Parent Inference Dataflow Paper + +## 초록 + +이 문서는 `iran-airstrike-replay-codex`의 한국 수역 어구 모선 추적 체계를 코드 기준으로 복원하는 통합 기술 문서다. 범위는 `snpdb` 5분 궤적 적재, 인메모리 캐시 유지, 어구 그룹 검출, 서브클러스터 생성, `1h/1h-fb/6h` 폴리곤 스냅샷 저장, correlation 기반 후보 점수화, coverage-aware parent inference, `episode_id` 기반 연속성 계층, backend read model, review/exclusion/label v2까지 포함한다. 문서의 목적은 “현재 무엇이 구현되어 있고, 각 경우의 수에서 어떤 분기 규칙이 적용되는가”를 한 문서에서 복원 가능하게 만드는 것이다. + +## 1. 범위와 전제 + +### 1.1 구현 기준 + +- frontend: `frontend/` +- backend: `backend/` +- prediction: `prediction/` +- schema migration: `database/migration/012_gear_parent_inference.sql`, `database/migration/014_gear_parent_workflow_v2_phase1.sql`, `database/migration/015_gear_parent_episode_tracking.sql` + +### 1.2 실행 환경 + +- lab backend: `rocky-211:18083` +- lab prediction: `redis-211:18091` +- lab schema: `kcg_lab` +- 로컬 프론트 진입점: `yarn dev:lab`, `yarn dev:lab:ssh` + +### 1.3 문서의 구분 + +- 구현됨: + - 현재 repo 코드와 lab 배포에 이미 반영된 규칙 +- 후속 확장 후보: + - episode continuity 위에서 추가로 올릴 `focus mode`, richer episode lineage API, calibration report + +## 2. 문제 정의 + +이 시스템은 한국 수역에서 AIS 신호를 이용해 아래 문제를 단계적으로 푼다. + +1. 최근 24시간의 선박/어구 궤적을 메모리 캐시에 유지한다. +2. 동일한 어구 이름 계열을 공간적으로 묶어 어구 그룹을 만든다. +3. 각 그룹에 대해 `1h`, `1h-fb`, `6h` 스냅샷을 생성한다. +4. 주변 선박 또는 잘못 분류된 어구 AIS를 후보로 수집하고 correlation 점수를 만든다. +5. 후보를 모선 추론 점수로 다시 환산한다. +6. 사람이 라벨/제외를 누적해 모델 정확도 고도화용 데이터셋을 만든다. + +핵심 난점은 아래 세 가지다. + +- DB 적재 지연 때문에 live incremental cache와 fresh reload가 다를 수 있다. +- 같은 `parent_name` 아래에서도 실제로는 여러 공간 덩어리로 갈라질 수 있다. +- 짧은 항적이 `track/proximity/activity`에서 과대평가될 수 있다. + +## 3. 전체 아키텍처 흐름 + +```mermaid +flowchart LR + A["signal.t_vessel_tracks_5min
5분 bucket linestringM"] --> B["prediction/db/snpdb.py
safe bucket + overlap backfill"] + B --> C["prediction/cache/vessel_store.py
24h in-memory cache"] + C --> D["prediction/fleet_tracker.py
gear_identity_log / snapshot"] + C --> E["prediction/algorithms/polygon_builder.py
gear group detect + sub-cluster + snapshots"] + E --> F["kcg_lab.group_polygon_snapshots"] + C --> G["prediction/algorithms/gear_correlation.py
raw metrics + EMA score"] + G --> H["kcg_lab.gear_correlation_raw_metrics"] + G --> I["kcg_lab.gear_correlation_scores"] + F --> J["prediction/algorithms/gear_parent_inference.py
candidate build + scoring + status"] + H --> J + I --> J + K["v2 exclusions / labels"] --> J + J --> L["kcg_lab.gear_group_parent_candidate_snapshots"] + J --> M["kcg_lab.gear_group_parent_resolution"] + J --> N["kcg_lab.gear_parent_label_tracking_cycles"] + F --> O["backend GroupPolygonService"] + L --> O + M --> O + N --> O + O --> P["frontend ParentReviewPanel"] +``` + +## 4. 원천 데이터와 시간 모델 + +### 4.1 원천 데이터 형식 + +원천은 `signal.t_vessel_tracks_5min`이며, `1 row = 1 MMSI = 5분 구간의 궤적 전체`를 `LineStringM`으로 보관한다. 실제 위치 포인트는 `ST_DumpPoints(track_geom)`로 분해하고, 각 점의 timestamp는 `ST_M((dp).geom)`에서 꺼낸다. 구현 위치는 `prediction/db/snpdb.py`다. + +### 4.2 safe watermark + +현재 구현은 “DB 적재가 완료된 bucket만 읽는다”는 원칙을 따른다. + +- `prediction/time_bucket.py` + - `compute_safe_bucket()` + - `compute_initial_window_start()` + - `compute_incremental_window_start()` +- 기본값: + - `SNPDB_SAFE_DELAY_MIN` + - `SNPDB_BACKFILL_BUCKETS` + +핵심 규칙: + +1. 초기 적재는 `now - safe_delay`를 5분 내림한 `safe_bucket`까지만 읽는다. +2. 증분 적재는 `last_bucket - backfill_window`부터 `safe_bucket`까지 다시 읽는다. +3. live cache는 `timestamp`가 아니라 `time_bucket` 기준으로 24시간 cutoff를 맞춘다. + +### 4.3 왜 safe watermark가 필요한가 + +`time_bucket > last_bucket`만 사용하면, 늦게 들어온 같은 bucket row를 영구히 놓칠 수 있다. 현재 구현은 overlap backfill과 dedupe로 이 drift를 줄인다. + +- 조회: `prediction/db/snpdb.py` +- 병합 dedupe: `prediction/cache/vessel_store.py` + +## 5. Stage 1: 캐시 적재와 유지 + +### 5.1 초기 적재 + +`prediction/main.py`는 시작 시 `vessel_store.load_initial(24)`를 호출한다. + +`prediction/cache/vessel_store.py`의 규칙: + +1. `snpdb.fetch_all_tracks(hours)`로 최근 24시간을 safe bucket까지 읽는다. +2. MMSI별 DataFrame으로 `_tracks`를 구성한다. +3. 최대 `time_bucket`을 `_last_bucket`으로 저장한다. +4. static info와 permit registry를 함께 refresh한다. + +### 5.2 증분 병합 + +스케줄러는 `snpdb.fetch_incremental(vessel_store.last_bucket)`로 overlap backfill 구간을 다시 읽는다. + +`merge_incremental()` 규칙: + +1. 기존 DataFrame과 새 batch를 합친다. +2. `timestamp`, `time_bucket`으로 정렬한다. +3. `timestamp` 기준 중복은 `keep='last'`로 제거한다. +4. batch의 최대 `time_bucket`이 더 크면 `_last_bucket`을 갱신한다. + +### 5.3 stale eviction + +`evict_stale()`는 safe bucket 기준 24시간 이전 포인트를 제거한다. `time_bucket`이 있으면 bucket 기준, 없으면 timestamp 기준으로 fallback한다. + +## 6. Stage 2: 어구 identity 추출 + +`prediction/fleet_tracker.py`는 어구 이름 패턴에서 `parent_name`, `gear_index_1`, `gear_index_2`를 파싱하고 `gear_identity_log`를 관리한다. + +### 6.1 이름 기반 필터 + +공통 규칙은 `prediction/algorithms/gear_name_rules.py`에 있다. + +- 정규화: + - 대문자화 + - 공백, `_`, `-`, `%` 제거 +- 추적 가능 최소 길이: + - 정규화 길이 `>= 4` + +`fleet_tracker.py`와 `polygon_builder.py`는 모두 `is_trackable_parent_name()`을 사용한다. 즉 짧은 이름은 추론 이전, 그룹 생성 이전 단계부터 제외된다. + +### 6.2 identity log 동작 + +`fleet_tracker.py`의 핵심 분기: + +1. 같은 MMSI + 같은 이름: + - 기존 활성 row의 `last_seen_at`, 위치만 갱신 +2. 같은 MMSI + 다른 이름: + - 기존 row 비활성화 + - 새 row insert +3. 다른 MMSI + 같은 이름: + - 기존 row 비활성화 + - 새 MMSI로 row insert + - 기존 `gear_correlation_scores.target_mmsi`를 새 MMSI로 이전 + +## 7. Stage 3: 어구 그룹 생성과 서브클러스터 + +실제 어구 그룹은 `prediction/algorithms/polygon_builder.py`의 `detect_gear_groups()`가 만든다. + +### 7.1 1차 그룹화 + +규칙: + +1. 최신 position 이름이 어구 패턴에 맞아야 한다. +2. `STALE_SEC`를 넘는 오래된 신호는 제외한다. +3. `440`, `441` MMSI는 어구 AIS 미사용으로 간주해 제외한다. +4. `is_trackable_parent_name(parent_raw)`를 만족해야 한다. +5. 같은 `parent_name`은 공백 제거 버전으로 묶는다. + +### 7.2 서브클러스터 생성 + +같은 이름 아래에서도 거리 기반 연결성으로 덩어리를 나눈다. + +- 거리 임계치: `MAX_DIST_DEG = 0.15` +- 연결 규칙: + - 각 어구가 클러스터 내 최소 1개와 `MAX_DIST_DEG` 이내면 같은 연결 요소 +- 구현: + - Union-Find + +모선이 이미 있으면, 모선과 가장 가까운 클러스터를 seed cluster로 간주한다. + +### 7.3 `sub_cluster_id` 부여 규칙 + +현재 구현은 아래와 같다. + +1. 클러스터가 1개면 `sub_cluster_id = 0` +2. 클러스터가 여러 개면 `1..N` +3. 이후 동일 `parent_key`의 두 서브그룹이 다시 근접 병합되면 `sub_cluster_id = 0` + +즉 `sub_cluster_id`는 영구 식별자가 아니라 “그 시점의 공간 분리 라벨”이다. + +### 7.4 병합 규칙 + +동일 `parent_key`의 두 그룹이 다시 가까워지면: + +1. 멤버를 합친다. +2. 부모 MMSI가 없는 큰 그룹에 작은 그룹의 `parent_mmsi`를 승계할 수 있다. +3. `sub_cluster_id = 0`으로 재설정한다. + +### 7.5 스냅샷 생성 규칙 + +`build_all_group_snapshots()`는 각 그룹에 대해 `1h`, `1h-fb`, `6h` 스냅샷을 만든다. + +- `1h` + - 같은 `parent_name` 전체 기준 1시간 활성 멤버 수 `>= 2` +- `1h-fb` + - 같은 `parent_name` 전체 기준 1시간 활성 멤버 수 `< 2` + - 리플레이/일치율 추적용 + - 라이브 현황에서 제외 +- `6h` + - 6시간 내 stale이 아니어야 함 + +추가 규칙: + +1. 서브클러스터 내 1h 활성 멤버가 2개 미만이면 최신 2개로 fallback display를 만든다. +2. 수역 외(`GEAR_OUT_ZONE`)인데 멤버 수가 `MIN_GEAR_GROUP_SIZE` 미만이면 스킵한다. +3. 모선이 있고, 멤버와 충분히 근접하면 `members[].isParent = true`로 같이 넣는다. + +## 8. Stage 4: correlation 모델 + +`prediction/algorithms/gear_correlation.py`는 어구 그룹별 raw metric과 EMA score를 만든다. + +### 8.1 후보 생성 + +입력: + +- group center +- group radius +- active ratio +- group member MMSI set + +출력 후보: + +- 선박 후보(`VESSEL`) +- 잘못 분류된 어구 후보(`GEAR_BUOY`) + +후보 수는 그룹당 최대 `30`개로 제한된다. + +### 8.2 raw metric + +선박 후보는 최근 6시간 항적 기반으로 아래 값을 만든다. + +- `proximity_ratio` +- `visit_score` +- `activity_sync` +- `dtw_similarity` + +어구 후보는 단순 거리 기반 `proximity_ratio`만 사용한다. + +### 8.3 EMA score + +모델 파라미터(`gear_correlation_param_models`)별로 아래를 수행한다. + +1. composite score 계산 +2. 이전 score와 streak를 읽는다 +3. `update_score()`로 EMA 갱신 +4. threshold 이상이거나 기존 row가 있으면 upsert + +반대로 이번 사이클 후보군에서 빠진 기존 항목은 `OUT_OF_RANGE`로 fast decay된다. + +### 8.4 correlation 산출물 + +- `gear_correlation_raw_metrics` +- `gear_correlation_scores` + +여기까지는 “잠재적 모선/근접 대상”의 score이고, 최종 parent inference는 아직 아니다. + +## 9. Stage 5: parent inference + +`prediction/algorithms/gear_parent_inference.py`가 최종 모선 추론을 수행한다. + +전체 진입점은 `run_gear_parent_inference(vessel_store, gear_groups, conn)`이다. + +### 9.1 전체 분기 개요 + +```mermaid +flowchart TD + A["active gear group"] --> B{"direct parent member
exists?"} + B -- yes --> C["DIRECT_PARENT_MATCH
fresh resolution upsert"] + B -- no --> D{"trackable parent name?"} + D -- no --> E["SKIPPED_SHORT_NAME"] + D -- yes --> F["build candidate set"] + F --> G{"candidate exists?"} + G -- no --> H["NO_CANDIDATE"] + G -- yes --> I["score + rank + margin + stable cycles"] + I --> J{"auto promotion rule?"} + J -- yes --> K["AUTO_PROMOTED"] + J -- no --> L{"top score >= 0.60?"} + L -- yes --> M["REVIEW_REQUIRED"] + L -- no --> N["UNRESOLVED"] +``` + +### 9.1.1 episode continuity 선행 단계 + +현재 구현에서 `run_gear_parent_inference()`는 후보 점수를 만들기 전에 먼저 `prediction/algorithms/gear_parent_episode.py`를 호출해 active 그룹의 continuity를 계산한다. + +입력: + +- 현재 cycle `gear_groups` +- 정규화된 `parent_name` +- 최근 `6h` active `gear_group_episodes` +- 최근 `24h` episode prior, `7d` lineage prior, `30d` label prior 집계 + +핵심 규칙: + +1. continuity score는 `0.75 * member_jaccard + 0.25 * center_support`다. +2. 중심점 지원값은 `12nm` 이내일수록 커진다. +3. continuity score가 충분하거나, overlap member가 있고 거리 조건을 만족하면 연결 후보로 본다. +4. 두 개 이상 active episode가 하나의 현재 cluster로 들어오면 `MERGE_NEW`다. +5. 하나의 episode가 여러 현재 cluster로 갈라지면 하나는 `SPLIT_CONTINUE`, 나머지는 `SPLIT_NEW`다. +6. 아무 previous episode와도 연결되지 않으면 `NEW`다. +7. 현재 cycle과 연결되지 못한 active episode는 `EXPIRED` 또는 `MERGED`로 종료한다. + +현재 저장되는 continuity 메타데이터: + +- `gear_group_parent_candidate_snapshots.episode_id` +- `gear_group_parent_resolution.episode_id` +- `gear_group_parent_resolution.continuity_source` +- `gear_group_parent_resolution.continuity_score` +- `gear_group_parent_resolution.prior_bonus_total` +- `gear_group_episodes` +- `gear_group_episode_snapshots` + +### 9.2 direct parent 보강 + +최신 어구 그룹에 아래 중 하나가 있으면 후보 추론 대신 직접 모선 매칭으로 처리한다. + +1. `members[].isParent = true` +2. `group.parent_mmsi` 존재 + +이 경우: + +- `status = DIRECT_PARENT_MATCH` +- `decision_source = DIRECT_PARENT_MATCH` +- `confidence = 1.0` +- `candidateCount = 0` + +단, 기존 상태가 `MANUAL_CONFIRMED`면 그 수동 상태를 유지한다. + +### 9.3 짧은 이름 스킵 + +정규화 이름 길이 `< 4`면: + +- 후보 생성 자체를 수행하지 않는다. +- `status = SKIPPED_SHORT_NAME` +- `decision_source = AUTO_SKIP` + +### 9.4 후보 집합 + +후보 집합은 아래의 합집합이다. + +1. default correlation model 상위 후보 +2. registry name exact bucket +3. 기존 resolution의 `selected_parent_mmsi` 또는 이전 top candidate + +여기에 아래를 적용한다. + +- active global exclusion 제거 +- active group exclusion 제거 +- 최근 reject cooldown 후보 제거 + +### 9.5 이름 점수 + +현재 구현 규칙: + +1. 원문 완전일치: `1.0` +2. 정규화 완전일치: `0.8` +3. prefix/contains: `0.5` +4. 숫자를 제거한 순수 문자 부분만 동일: `0.3` +5. 그 외: `0.0` + +비교 대상: + +- `parent_name` +- 후보 AIS 이름 +- registry `name_cn` +- registry `name_en` + +### 9.6 coverage-aware evidence + +짧은 항적 과대평가를 막기 위해 raw score와 effective score를 분리한다. + +evidence에 남는 값: + +- `trackPointCount` +- `trackSpanMinutes` +- `overlapPointCount` +- `overlapSpanMinutes` +- `inZonePointCount` +- `inZoneSpanMinutes` +- `trackCoverageFactor` +- `visitCoverageFactor` +- `activityCoverageFactor` +- `coverageFactor` + +현재 최종 점수에는 raw가 아니라 adjusted score가 들어간다. + +### 9.7 점수 식 + +가중치 합은 아래다. + +- `0.40 * base_corr` +- `0.15 * name_match` +- `0.15 * track_similarity_effective` +- `0.10 * visit_effective` +- `0.05 * proximity_effective` +- `0.05 * activity_effective` +- `0.10 * stability` +- `+ registry_bonus(0.05)` + +그 다음 별도 후가산: + +- `412/413` MMSI 보너스 `+0.15` +- 단, `preBonusScore >= 0.30`일 때만 적용 +- `episode/lineage/label prior bonus` + - 최근 동일 episode `24h` + - 동일 lineage `7d` + - 라벨 세션 `30d` + - 총합 cap `0.20` + +### 9.8 상태 전이 + +분기 조건: + +- `NO_CANDIDATE` + - 후보가 하나도 없을 때 +- `AUTO_PROMOTED` + - `target_type == VESSEL` + - candidate source에 `CORRELATION` 포함 + - `final_score >= auto_promotion_threshold` + - `margin >= auto_promotion_margin` + - `stable_cycles >= auto_promotion_stable_cycles` +- `REVIEW_REQUIRED` + - `final_score >= 0.60` +- `UNRESOLVED` + - 나머지 + +추가 예외: + +- 기존 상태가 `MANUAL_CONFIRMED`면 수동 상태를 유지한다. +- active label session이 있으면 tracking row를 별도로 적재한다. + +### 9.9 산출물 + +- `gear_group_parent_candidate_snapshots` +- `gear_group_parent_resolution` +- `gear_parent_label_tracking_cycles` +- `gear_group_episodes` +- `gear_group_episode_snapshots` + +## 10. Stage 6: backend read model + +backend의 중심은 `backend/.../GroupPolygonService.java`다. + +### 10.1 최신 1h만 라이브로 간주 + +group list, review queue, detail API는 모두 최신 전역 `1h` 스냅샷만 기준으로 삼는다. + +핵심 효과: + +1. `1h-fb`는 라이브 현황에서 기본 제외된다. +2. 이미 사라진 과거 sub-cluster는 detail API에서 다시 보이지 않는다. + +### 10.2 stale inference 차단 + +`resolution.last_evaluated_at >= group.snapshot_time`인 경우만 join한다. + +즉 최신 group snapshot보다 오래된 candidate/resolution은 detail/review/list에서 숨긴다. 이 규칙이 `ZHEDAIYU02433`, `ZHEDAIYU02394` 유형 stale 표시를 막는다. + +### 10.3 detail API 의미 + +`/api/kcg/vessel-analysis/groups/{groupKey}/parent-inference` + +현재 의미: + +- 해당 그룹의 최신 전역 `1h` live sub-cluster 집합 +- 각 sub-cluster의 fresh resolution +- 각 sub-cluster의 latest candidate snapshot + +## 11. Stage 7: review / exclusion / label v2 + +v2 Phase 1은 “자동 추론 결과”와 “사람 판단 데이터”를 분리하는 구조다. + +### 11.1 사람 판단 저장소 + +- `gear_parent_candidate_exclusions` +- `gear_parent_label_sessions` +- `gear_parent_label_tracking_cycles` + +### 11.2 액션 의미 + +- 그룹 제외: + - 특정 `group_key + sub_cluster_id`에서 특정 후보 MMSI를 일정 기간 제거 +- 전체 후보 제외: + - 특정 MMSI를 모든 그룹 후보군에서 제거 +- 정답 라벨: + - 특정 그룹에 대해 정답 parent MMSI를 `1/3/5일` 세션으로 지정 + - prediction은 이후 cycle마다 top1/top3 여부를 추적 + +### 11.3 why v2 + +기존 `MANUAL_CONFIRMED`/`REJECT`는 운영 override 성격이 강했고, “모델 정확도 평가용 백데이터”와 섞였다. v2는 이 둘을 분리해 라벨을 평가 데이터로 쓰도록 한다. + +## 12. 실제 경우의 수 분기표 + +| 경우 | 구현 위치 | 현재 동작 | +| --- | --- | --- | +| 이름 길이 `< 4` | `gear_name_rules.py`, `fleet_tracker.py`, `polygon_builder.py`, `gear_parent_inference.py` | identity/grouping/inference 단계에서 제외 또는 `SKIPPED_SHORT_NAME` | +| 직접 모선 포함 | `polygon_builder.py`, `gear_parent_inference.py` | `DIRECT_PARENT_MATCH` fresh resolution | +| 같은 이름, 멀리 떨어진 어구 | `polygon_builder.py` | 별도 sub-cluster 생성 | +| 두 서브클러스터가 다시 근접 | `polygon_builder.py` | 하나로 병합, `sub_cluster_id = 0` | +| group 전체 1h 활성 멤버 `< 2` | `polygon_builder.py` | `1h-fb` 생성, live 현황 제외 | +| 후보가 하나도 없음 | `gear_parent_inference.py` | `NO_CANDIDATE` | +| 짧은 항적이 우연히 근접 | `gear_parent_inference.py` | coverage-aware 보정으로 effective score 감소 | +| stale old inference가 남아 있음 | `GroupPolygonService.java` | 최신 group snapshot보다 오래되면 숨김 | +| 직접 parent가 이미 있음 | `gear_parent_inference.py` | 후보 계산 대신 direct parent resolution | + +## 13. `sub_cluster_id`의 한계 + +현재 코드에서 `sub_cluster_id`는 영구 identity가 아니다. + +이유: + +1. 같은 이름 그룹의 공간 분리 수가 cycle마다 달라질 수 있다. +2. 병합되면 `0`으로 재설정된다. +3. 멤버가 추가/이탈해도 기존 번호 의미가 유지된다고 보장할 수 없다. + +따라서 `group_key + sub_cluster_id`는 “현재 cycle의 공간 덩어리”를 가리키는 키로는 유효하지만, 장기 연속 추적 키로는 부적합하다. + +## 14. Stage 8: `episode_id` continuity + prior bonus + +### 14.1 목적 + +현재 구현의 `episode_id`는 “같은 어구 덩어리의 시간적 연속성”을 추적하는 별도 식별자다. `sub_cluster_id`를 대체하지 않고, 그 위에 얹는 계층이다. + +핵심 목적: + +- 작은 멤버 변화는 같은 episode로 이어 붙인다. +- 구조적 split/merge는 continuity source로 기록한다. +- long-memory는 `stable_cycles` 직접 승계가 아니라 약한 prior bonus로만 전달한다. + +### 14.2 현재 저장소 + +- `gear_group_episodes` + - active/merged/expired episode 현재 상태 +- `gear_group_episode_snapshots` + - cycle별 episode 스냅샷 +- `gear_group_parent_candidate_snapshots` + - `episode_id`, `normalized_parent_name`, + `episode_prior_bonus`, `lineage_prior_bonus`, `label_prior_bonus` +- `gear_group_parent_resolution` + - `episode_id`, `continuity_source`, `continuity_score`, `prior_bonus_total` + +### 14.3 continuity score + +현재 continuity score는 아래다. + +```text +continuity_score = + 0.75 * member_jaccard + + 0.25 * center_support +``` + +- `member_jaccard` + - 현재/이전 episode 멤버 MMSI Jaccard +- `center_support` + - 중심점 거리 `12nm` 이내일수록 높아지는 값 + +연결 후보 판단: + +- continuity score `>= 0.45` +- 또는 overlap member가 있고 거리 조건을 만족하면 연결 후보로 인정 + +### 14.4 continuity source 규칙 + +- `NEW` + - 어떤 이전 episode와도 연결되지 않음 +- `CONTINUED` + - 1:1 continuity +- `SPLIT_CONTINUE` + - 하나의 이전 episode가 여러 현재 cluster로 갈라졌고, 그중 주 가지 +- `SPLIT_NEW` + - split로 새로 생성된 가지 +- `MERGE_NEW` + - 2개 이상 active episode가 의미 있게 하나의 현재 cluster로 합쳐짐 +- `DIRECT_PARENT_MATCH` + - 직접 모선 포함 그룹이 fresh resolution으로 정리되는 경우의 최종 resolution source + +### 14.5 merge / split / expire + +현재 구현 규칙: + +1. split + - 가장 유사한 현재 cluster 1개는 기존 episode 유지 + - 나머지는 새 episode 생성 + - 새 episode에는 `split_from_episode_id` 저장 +2. merge + - 2개 이상 previous episode가 같은 현재 cluster로 의미 있게 들어오면 새 episode 생성 + - 이전 episode들은 `MERGED`, `merged_into_episode_id = 새 episode` +3. expire + - 최근 `6h` active episode가 현재 cycle 어떤 cluster와도 연결되지 않으면 `EXPIRED` + +### 14.6 prior bonus 계층 + +현재 final score에는 signal score 뒤에 아래 prior bonus가 후가산된다. + +- `episode_prior_bonus` + - 최근 동일 episode `24h` + - cap `0.10` +- `lineage_prior_bonus` + - 동일 정규화 이름 lineage `7d` + - cap `0.05` +- `label_prior_bonus` + - 동일 lineage 라벨 세션 `30d` + - cap `0.10` +- 총합 cap + - `0.20` + +현재 후보가 이미 candidate set에 들어온 경우에만 적용하며, 과거 점수를 직접 carry하는 대신 약한 보너스로만 사용한다. + +### 14.7 병합 후 후보 관성 + +질문 사례처럼 `A` episode 후보 `a`, `B` episode 후보 `b`가 있다가 병합 후 `b`가 더 적합해질 수 있다. 현재 구현은 병합 시 무조건 `A`를 유지하지 않고 새 episode를 생성해 `A/B` 둘 다의 history를 prior bonus 풀에서 재평가한다. 따라서 `b`는 완전 신규 후보처럼 0에서 시작하지 않지만, `A`의 과거 `stable_cycles`가 그대로 지배하지도 않는다. + +## 15. 현재 episode 상태 흐름 + +```mermaid +stateDiagram-v2 + [*] --> Active + Active --> Active: "CONTINUED / 소규모 멤버 변동" + Active --> Active: "SPLIT_CONTINUE" + Active --> Active: "MERGE_NEW로 새 episode 생성 후 연결" + Active --> Merged: "merged_into_episode_id 기록" + Active --> Expired: "최근 6h continuity 없음" + Merged --> [*] + Expired --> [*] +``` + +## 16. 결론 + +현재 구현은 아래를 모두 포함한다. + +- safe watermark + overlap backfill 기반 incremental 안정화 +- 짧은 이름 그룹 제거 +- 거리 기반 sub-cluster와 `1h/1h-fb/6h` 스냅샷 +- correlation + parent inference 분리 +- coverage-aware score 보정 +- stale inference 차단 +- direct parent supplement +- v2 exclusion/label/tracking 저장소 +- `episode_id` continuity와 prior bonus + +남은 과제는 `episode` 자체보다, 이 continuity 계층을 read model과 시각화에서 더 설명력 있게 노출하는 것이다. 즉 다음 단계의 핵심은 episode 도입이 아니라, `episode lineage API`, calibration report, richer review analytics를 얹는 일이다. + +## 17. 참고 코드 + +- `prediction/main.py` +- `prediction/time_bucket.py` +- `prediction/db/snpdb.py` +- `prediction/cache/vessel_store.py` +- `prediction/fleet_tracker.py` +- `prediction/algorithms/gear_name_rules.py` +- `prediction/algorithms/polygon_builder.py` +- `prediction/algorithms/gear_correlation.py` +- `prediction/algorithms/gear_parent_episode.py` +- `prediction/algorithms/gear_parent_inference.py` +- `backend/src/main/java/gc/mda/kcg/domain/fleet/GroupPolygonService.java` +- `backend/src/main/java/gc/mda/kcg/domain/fleet/ParentInferenceWorkflowController.java` +- `database/migration/012_gear_parent_inference.sql` +- `database/migration/014_gear_parent_workflow_v2_phase1.sql` +- `database/migration/015_gear_parent_episode_tracking.sql` diff --git a/docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2-PHASE1.md b/docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2-PHASE1.md new file mode 100644 index 0000000..6746763 --- /dev/null +++ b/docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2-PHASE1.md @@ -0,0 +1,706 @@ +# Gear Parent Inference Workflow V2 Phase 1 Spec + +## 목적 + +이 문서는 `GEAR-PARENT-INFERENCE-WORKFLOW-V2.md`의 첫 구현 단계를 바로 개발할 수 있는 수준으로 구체화한 명세다. + +Phase 1 범위는 아래로 제한한다. + +- DB 마이그레이션 +- backend API 계약 +- prediction exclusion/label read-write 지점 +- 프론트의 최소 계약 변화 + +이번 단계에서는 실제 자동화/LLM 연결은 다루지 않는다. + +## 범위 요약 + +### 포함 + +- 그룹 단위 후보 제외 `1/3/5일` +- 전역 후보 제외 +- 정답 라벨 세션 `1/3/5일` +- 라벨 세션 기간 동안 cycle별 tracking 기록 +- active exclusion을 parent inference 후보 생성에 반영 +- exclusion/label 관리 API + +### 제외 + +- 운영 `kcg` 스키마 반영 +- 기존 `gear_correlation_scores` 산식 변경 +- LLM reviewer +- label session의 anchor 기반 재매칭 보강 +- UI 고도화 화면 전부 + +## 구현 원칙 + +1. 기존 자동 추론 저장소는 유지한다. +2. 새 사람 판단 데이터는 별도 테이블에 저장한다. +3. Phase 1에서는 `group_key + sub_cluster_id`를 세션 식별 기준으로 고정한다. +4. 기존 `CONFIRM/REJECT/RESET` API는 삭제하지 않지만, 새 UI에서는 사용하지 않는다. +5. 새 API와 prediction 로직은 `kcg_lab` 기준으로만 먼저 구현한다. + +## DB 명세 + +## 1. `gear_parent_candidate_exclusions` + +### 목적 + +- 그룹 단위 후보 제외와 전역 후보 제외를 단일 저장소에서 관리 + +### DDL 초안 + +```sql +CREATE TABLE IF NOT EXISTS kcg.gear_parent_candidate_exclusions ( + id BIGSERIAL PRIMARY KEY, + scope_type VARCHAR(16) NOT NULL, + group_key VARCHAR(100), + sub_cluster_id SMALLINT, + candidate_mmsi VARCHAR(20) NOT NULL, + reason_type VARCHAR(32) NOT NULL, + duration_days INT, + active_from TIMESTAMPTZ NOT NULL DEFAULT NOW(), + active_until TIMESTAMPTZ, + released_at TIMESTAMPTZ, + released_by VARCHAR(100), + actor VARCHAR(100) NOT NULL, + comment TEXT, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT chk_gpce_scope CHECK (scope_type IN ('GROUP', 'GLOBAL')), + CONSTRAINT chk_gpce_reason CHECK (reason_type IN ('GROUP_WRONG_PARENT', 'GLOBAL_NOT_PARENT_TARGET')), + CONSTRAINT chk_gpce_group_scope CHECK ( + (scope_type = 'GROUP' AND group_key IS NOT NULL AND sub_cluster_id IS NOT NULL AND duration_days IN (1, 3, 5) AND active_until IS NOT NULL) + OR + (scope_type = 'GLOBAL' AND duration_days IS NULL) + ) +); +``` + +### 인덱스 + +```sql +CREATE INDEX IF NOT EXISTS idx_gpce_scope_mmsi_active + ON kcg.gear_parent_candidate_exclusions(scope_type, candidate_mmsi, active_from DESC) + WHERE released_at IS NULL; + +CREATE INDEX IF NOT EXISTS idx_gpce_group_active + ON kcg.gear_parent_candidate_exclusions(group_key, sub_cluster_id, active_from DESC) + WHERE released_at IS NULL; + +CREATE INDEX IF NOT EXISTS idx_gpce_active_until + ON kcg.gear_parent_candidate_exclusions(active_until); +``` + +### active 판정 규칙 + +active exclusion은 아래를 만족해야 한다. + +```sql +released_at IS NULL +AND active_from <= NOW() +AND (active_until IS NULL OR active_until > NOW()) +``` + +### 해석 규칙 + +- `GROUP` + - 특정 그룹에서만 해당 후보 제거 +- `GLOBAL` + - 모든 그룹에서 해당 후보 제거 + +## 2. `gear_parent_label_sessions` + +### 목적 + +- 정답 라벨 세션 저장 + +### DDL 초안 + +```sql +CREATE TABLE IF NOT EXISTS kcg.gear_parent_label_sessions ( + id BIGSERIAL PRIMARY KEY, + group_key VARCHAR(100) NOT NULL, + sub_cluster_id SMALLINT NOT NULL, + label_parent_mmsi VARCHAR(20) NOT NULL, + label_parent_name VARCHAR(200), + label_parent_vessel_id INT REFERENCES kcg.fleet_vessels(id) ON DELETE SET NULL, + duration_days INT NOT NULL, + active_from TIMESTAMPTZ NOT NULL DEFAULT NOW(), + active_until TIMESTAMPTZ NOT NULL, + status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE', + actor VARCHAR(100) NOT NULL, + comment TEXT, + anchor_snapshot_time TIMESTAMPTZ, + anchor_center_point geometry(Point, 4326), + anchor_member_mmsis JSONB NOT NULL DEFAULT '[]'::jsonb, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT chk_gpls_duration CHECK (duration_days IN (1, 3, 5)), + CONSTRAINT chk_gpls_status CHECK (status IN ('ACTIVE', 'EXPIRED', 'CANCELLED')) +); +``` + +### 인덱스 + +```sql +CREATE INDEX IF NOT EXISTS idx_gpls_group_active + ON kcg.gear_parent_label_sessions(group_key, sub_cluster_id, active_from DESC) + WHERE status = 'ACTIVE'; + +CREATE INDEX IF NOT EXISTS idx_gpls_mmsi_active + ON kcg.gear_parent_label_sessions(label_parent_mmsi, active_from DESC) + WHERE status = 'ACTIVE'; + +CREATE INDEX IF NOT EXISTS idx_gpls_active_until + ON kcg.gear_parent_label_sessions(active_until); +``` + +### active 판정 규칙 + +```sql +status = 'ACTIVE' +AND active_from <= NOW() +AND active_until > NOW() +``` + +### 만료 처리 규칙 + +prediction 또는 backend batch에서 아래를 주기적으로 실행한다. + +```sql +UPDATE kcg.gear_parent_label_sessions +SET status = 'EXPIRED', updated_at = NOW() +WHERE status = 'ACTIVE' + AND active_until <= NOW(); +``` + +## 3. `gear_parent_label_tracking_cycles` + +### 목적 + +- 활성 정답 라벨 세션 동안 cycle별 자동 추론 결과 저장 + +### DDL 초안 + +```sql +CREATE TABLE IF NOT EXISTS kcg.gear_parent_label_tracking_cycles ( + id BIGSERIAL PRIMARY KEY, + label_session_id BIGINT NOT NULL REFERENCES kcg.gear_parent_label_sessions(id) ON DELETE CASCADE, + observed_at TIMESTAMPTZ NOT NULL, + candidate_snapshot_observed_at TIMESTAMPTZ, + auto_status VARCHAR(40), + top_candidate_mmsi VARCHAR(20), + top_candidate_name VARCHAR(200), + top_candidate_score DOUBLE PRECISION, + top_candidate_margin DOUBLE PRECISION, + candidate_count INT NOT NULL DEFAULT 0, + labeled_candidate_present BOOLEAN NOT NULL DEFAULT FALSE, + labeled_candidate_rank INT, + labeled_candidate_score DOUBLE PRECISION, + labeled_candidate_pre_bonus_score DOUBLE PRECISION, + labeled_candidate_margin_from_top DOUBLE PRECISION, + matched_top1 BOOLEAN NOT NULL DEFAULT FALSE, + matched_top3 BOOLEAN NOT NULL DEFAULT FALSE, + evidence_summary JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT uq_gpltc_session_observed UNIQUE (label_session_id, observed_at) +); +``` + +### 인덱스 + +```sql +CREATE INDEX IF NOT EXISTS idx_gpltc_session_observed + ON kcg.gear_parent_label_tracking_cycles(label_session_id, observed_at DESC); + +CREATE INDEX IF NOT EXISTS idx_gpltc_top_candidate + ON kcg.gear_parent_label_tracking_cycles(top_candidate_mmsi); +``` + +## 4. 기존 `gear_group_parent_review_log` action 확장 + +### 새 action 목록 + +- `LABEL_PARENT` +- `EXCLUDE_GROUP` +- `EXCLUDE_GLOBAL` +- `RELEASE_EXCLUSION` +- `CANCEL_LABEL` + +기존 action과 공존한다. + +## migration 파일 제안 + +- `014_gear_parent_workflow_v2_phase1.sql` + +구성 순서: + +1. 새 테이블 3개 생성 +2. 인덱스 생성 +3. review log action 확장은 schema 변경 불필요 +4. optional helper view 추가 + +## optional view 제안 + +### `vw_active_gear_parent_candidate_exclusions` + +```sql +CREATE OR REPLACE VIEW kcg.vw_active_gear_parent_candidate_exclusions AS +SELECT * +FROM kcg.gear_parent_candidate_exclusions +WHERE released_at IS NULL + AND active_from <= NOW() + AND (active_until IS NULL OR active_until > NOW()); +``` + +### `vw_active_gear_parent_label_sessions` + +```sql +CREATE OR REPLACE VIEW kcg.vw_active_gear_parent_label_sessions AS +SELECT * +FROM kcg.gear_parent_label_sessions +WHERE status = 'ACTIVE' + AND active_from <= NOW() + AND active_until > NOW(); +``` + +## backend API 명세 + +## 공통 정책 + +- 모든 write API는 `actor` 필수 +- `group_key`, `sub_cluster_id`, `candidate_mmsi`, `selected_parent_mmsi`는 trim 후 저장 +- 잘못된 기간은 `400 Bad Request` +- 중복 active session/exclusion 생성 시 `409 Conflict` 대신 동일 active row를 반환해도 됨 +- Phase 1에서는 멱등성을 우선한다 + +## 1. 정답 라벨 세션 생성 + +### endpoint + +`POST /api/vessel-analysis/groups/{groupKey}/parent-inference/{subClusterId}/label-sessions` + +### request + +```json +{ + "selectedParentMmsi": "412333326", + "durationDays": 3, + "actor": "analyst-01", + "comment": "수동 검토 확정" +} +``` + +### validation + +- `selectedParentMmsi` 필수 +- `durationDays in (1,3,5)` +- 동일 `groupKey + subClusterId`에 active label session이 이미 있으면 새 row 생성 금지 + +### response + +```json +{ + "groupKey": "58399", + "subClusterId": 0, + "action": "LABEL_PARENT", + "labelSession": { + "id": 12, + "status": "ACTIVE", + "labelParentMmsi": "412333326", + "labelParentName": "UWEIJINGYU51015", + "durationDays": 3, + "activeFrom": "2026-04-03T10:00:00+09:00", + "activeUntil": "2026-04-06T10:00:00+09:00", + "actor": "analyst-01", + "comment": "수동 검토 확정" + } +} +``` + +## 2. 그룹 후보 제외 생성 + +### endpoint + +`POST /api/vessel-analysis/groups/{groupKey}/parent-inference/{subClusterId}/candidate-exclusions` + +### request + +```json +{ + "candidateMmsi": "412333326", + "durationDays": 3, + "actor": "analyst-01", + "comment": "이 그룹에서는 오답" +} +``` + +### 생성 규칙 + +- 내부적으로 `scopeType='GROUP'` +- `reasonType='GROUP_WRONG_PARENT'` +- 동일 `groupKey + subClusterId + candidateMmsi` active row가 있으면 재사용 + +### response + +```json +{ + "groupKey": "58399", + "subClusterId": 0, + "action": "EXCLUDE_GROUP", + "exclusion": { + "id": 33, + "scopeType": "GROUP", + "candidateMmsi": "412333326", + "durationDays": 3, + "activeFrom": "2026-04-03T10:00:00+09:00", + "activeUntil": "2026-04-06T10:00:00+09:00" + } +} +``` + +## 3. 전역 후보 제외 생성 + +### endpoint + +`POST /api/vessel-analysis/parent-inference/candidate-exclusions/global` + +### request + +```json +{ + "candidateMmsi": "412333326", + "actor": "analyst-01", + "comment": "모든 어구에서 후보 제외" +} +``` + +### 생성 규칙 + +- `scopeType='GLOBAL'` +- `reasonType='GLOBAL_NOT_PARENT_TARGET'` +- `activeUntil = NULL` +- 동일 candidate active global exclusion이 있으면 재사용 + +## 4. exclusion 해제 + +### endpoint + +`POST /api/vessel-analysis/parent-inference/candidate-exclusions/{id}/release` + +### request + +```json +{ + "actor": "analyst-01", + "comment": "해제" +} +``` + +### 동작 + +- `released_at = NOW()` +- `released_by = actor` +- `updated_at = NOW()` + +## 5. label session 종료 + +### endpoint + +`POST /api/vessel-analysis/parent-inference/label-sessions/{id}/cancel` + +### request + +```json +{ + "actor": "analyst-01", + "comment": "조기 종료" +} +``` + +### 동작 + +- `status='CANCELLED'` +- `updated_at = NOW()` + +## 6. active exclusion 조회 + +### endpoint + +`GET /api/vessel-analysis/parent-inference/candidate-exclusions?status=ACTIVE&scopeType=GROUP|GLOBAL&candidateMmsi=...&groupKey=...` + +### response 필드 + +- `id` +- `scopeType` +- `groupKey` +- `subClusterId` +- `candidateMmsi` +- `reasonType` +- `durationDays` +- `activeFrom` +- `activeUntil` +- `releasedAt` +- `actor` +- `comment` +- `isActive` + +## 7. label session 목록 + +### endpoint + +`GET /api/vessel-analysis/parent-inference/label-sessions?status=ACTIVE|EXPIRED|CANCELLED&groupKey=...` + +### response 필드 + +- `id` +- `groupKey` +- `subClusterId` +- `labelParentMmsi` +- `labelParentName` +- `durationDays` +- `activeFrom` +- `activeUntil` +- `status` +- `actor` +- `comment` +- `latestTrackingSummary` + +## 8. label tracking 상세 + +### endpoint + +`GET /api/vessel-analysis/parent-inference/label-sessions/{id}/tracking` + +### response 필드 + +- `session` +- `count` +- `items[]` + - `observedAt` + - `autoStatus` + - `topCandidateMmsi` + - `topCandidateScore` + - `topCandidateMargin` + - `candidateCount` + - `labeledCandidatePresent` + - `labeledCandidateRank` + - `labeledCandidateScore` + - `labeledCandidatePreBonusScore` + - `matchedTop1` + - `matchedTop3` + +## backend 구현 위치 + +### 새 DTO/Request 제안 + +- `GroupParentLabelSessionRequest` +- `GroupParentCandidateExclusionRequest` +- `ReleaseParentCandidateExclusionRequest` +- `CancelParentLabelSessionRequest` +- `ParentCandidateExclusionDto` +- `ParentLabelSessionDto` +- `ParentLabelTrackingCycleDto` + +### service 추가 메서드 제안 + +- `createGroupCandidateExclusion(...)` +- `createGlobalCandidateExclusion(...)` +- `releaseCandidateExclusion(...)` +- `createLabelSession(...)` +- `cancelLabelSession(...)` +- `listCandidateExclusions(...)` +- `listLabelSessions(...)` +- `getLabelSessionTracking(...)` + +## prediction 명세 + +## 적용 함수 + +중심 파일은 [prediction/algorithms/gear_parent_inference.py](/Users/lht/work/devProjects/iran-airstrike-replay-codex/prediction/algorithms/gear_parent_inference.py)다. + +### 새 load 함수 + +- `_load_active_candidate_exclusions(conn, group_keys)` +- `_load_active_label_sessions(conn, group_keys)` + +### 반환 구조 + +`_load_active_candidate_exclusions` + +```python +{ + "global": {"412333326", "413000111"}, + "group": {("58399", 0): {"412333326"}} +} +``` + +`_load_active_label_sessions` + +```python +{ + ("58399", 0): { + "id": 12, + "label_parent_mmsi": "412333326", + "active_until": ..., + ... + } +} +``` + +### 후보 pruning 순서 + +1. 기존 candidate union 생성 +2. `GLOBAL` exclusion 제거 +3. 해당 그룹의 `GROUP` exclusion 제거 +4. 남은 후보만 scoring + +### tracking row write 규칙 + +각 그룹 처리 후: + +- active label session이 없으면 skip +- 있으면 현재 cycle 결과를 `gear_parent_label_tracking_cycles`에 upsert-like insert + +필수 기록값: + +- `label_session_id` +- `observed_at` +- `candidate_snapshot_observed_at` +- `auto_status` +- `top_candidate_mmsi` +- `top_candidate_score` +- `top_candidate_margin` +- `candidate_count` +- `labeled_candidate_present` +- `labeled_candidate_rank` +- `labeled_candidate_score` +- `labeled_candidate_pre_bonus_score` +- `matched_top1` +- `matched_top3` + +### pre-bonus score 취득 + +현재 candidate evidence에 이미 아래가 있다. + +- `evidence.scoreBreakdown.preBonusScore` + +tracking row에서는 이 값을 직접 읽어 저장한다. + +### resolution 처리 원칙 + +Phase 1에서는 다음을 적용한다. + +- `LABEL_PARENT`, `EXCLUDE_GROUP`, `EXCLUDE_GLOBAL`은 `gear_group_parent_resolution` 상태를 바꾸지 않는다. +- 자동 추론은 기존 상태 전이를 그대로 사용한다. +- legacy `MANUAL_CONFIRMED` 로직은 남겨두되, 새 UI에서는 호출하지 않는다. + +## 프론트 최소 계약 + +## 기존 패널 액션 치환 + +현재: + +- `확정` +- `24시간 제외` + +Phase 1 새 기본 액션: + +- `정답 라벨` +- `이 그룹에서 제외` +- `전체 후보 제외` + +### 기간 선택 UI + +- `정답 라벨`: `1일`, `3일`, `5일` +- `이 그룹에서 제외`: `1일`, `3일`, `5일` +- `전체 후보 제외`: 기간 없음 + +### 표시 정보 + +후보 card badge: + +- `이 그룹 제외 중` +- `전체 후보 제외 중` +- `정답 라벨 대상` + +그룹 summary box: + +- active label session 여부 +- active group exclusion count + +## API 에러 규약 + +### 400 + +- 잘못된 duration +- 필수 필드 누락 +- groupKey/subClusterId 없음 + +### 404 + +- 대상 group 없음 +- exclusion/session id 없음 + +### 409 + +- active label session 중복 생성 + +단, Phase 1에서는 backend에서 충돌 시 기존 active row를 그대로 반환하는 방식도 허용한다. + +## 테스트 기준 + +## DB + +- GROUP exclusion active query가 정확히 동작 +- GLOBAL exclusion active query가 정확히 동작 +- label session 만료 시 `EXPIRED` 전환 + +## backend + +- create/release exclusion API +- create/cancel label session API +- list APIs 필터 조건 + +## prediction + +- active exclusion candidate pruning +- global/group exclusion 우선 적용 +- label session tracking row 생성 +- labeled candidate absent/present/top1/top3 케이스 + +## 수용 기준 + +1. 특정 그룹에서 후보 제외를 걸면 다음 cycle부터 그 그룹 후보 목록에서만 빠진다. +2. 전역 후보 제외를 걸면 모든 그룹 후보 목록에서 빠진다. +3. 정답 라벨 세션 생성 후 다음 cycle부터 tracking row가 쌓인다. +4. 자동 resolution은 계속 자동 상태를 유지한다. +5. 기존 manual override API를 쓰지 않아도 review/label/exclusion 흐름이 독립적으로 동작한다. + +## Phase 1 이후 바로 이어질 일 + +### Phase 2 + +- 라벨 추적 대시보드 +- exclusion 관리 화면 +- 지표 요약 endpoint +- episode continuity read model 노출 +- prior bonus calibration report + +### Phase 3 + +- label session anchor 기반 재매칭 +- group case/episode lineage API 확장 +- calibration report + +## 권장 구현 순서 + +1. `014_gear_parent_workflow_v2_phase1.sql` +2. backend DTO + controller/service +3. prediction active exclusion/load + tracking write +4. frontend 버튼 교체와 최소 조회 화면 + +이 순서가 현재 코드 충돌과 운영 영향이 가장 적다. diff --git a/docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2.md b/docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2.md new file mode 100644 index 0000000..b5b5271 --- /dev/null +++ b/docs/GEAR-PARENT-INFERENCE-WORKFLOW-V2.md @@ -0,0 +1,693 @@ +# Gear Parent Inference Workflow V2 + +## 문서 목적 + +이 문서는 lab 환경의 어구 모선 추적 워크플로우를 v1 운영 override 중심 구조에서, +`평가 데이터 축적 + 후보 제외 관리 + 기간형 정답 라벨 추적` 중심 구조로 재정의하는 설계서다. + +대상 범위는 아래와 같다. + +- `kcg_lab` 스키마 +- `backend-lab` (`192.168.1.20:18083`) +- `prediction-lab` (`192.168.1.18:18091`) +- 로컬 프론트 `yarn dev:lab` + +운영 `kcg` 스키마와 기존 데모 동작은 이번 설계 단계에서 변경하지 않는다. + +현재 구현 기준으로는 v2 Phase 1 저장소/API가 이미 lab에 반영되어 있고, 그 위에 `015_gear_parent_episode_tracking.sql`과 `prediction/algorithms/gear_parent_episode.py`를 통해 `episode continuity + prior bonus` 계층이 추가되었다. 이 문서는 여전히 워크플로우 설계서지만, 사람 판단 저장소와 자동 추론 저장소 분리 원칙은 현재 코드의 실제 기준이기도 하다. + +## 배경 + +현재 v1은 자동 추론 결과와 사람 판단이 같은 저장소에 섞여 있다. + +- `확정`은 `gear_group_parent_resolution`을 `MANUAL_CONFIRMED`로 덮어쓴다. +- `24시간 제외`는 특정 그룹에서 후보 1개를 24시간 숨긴다. +- 자동 추론은 계속 돌지만, 수동 판단이 최종 상태를 override한다. + +이 구조는 단기 운용에는 편하지만, 아래 목적에는 맞지 않는다. + +- 사람이 보면서 모델 가중치와 후보 생성 품질을 평가 +- 정답/오답 사례를 데이터셋으로 축적 +- 충분한 정확도 확보 후 자동화 또는 LLM 연결 + +따라서 v2에서는 `자동 추론`, `사람 라벨`, `후보 제외`를 분리한다. + +## 핵심 목표 + +1. 자동 추론 상태는 계속 독립적으로 유지한다. +2. 사람 판단은 override가 아니라 별도 라벨/제외 데이터로 저장한다. +3. 그룹 단위 오답 라벨은 `1일 / 3일 / 5일` 기간형 후보 제외로 관리한다. +4. 전역 후보 제외는 모든 어구 그룹에서 동일 MMSI를 후보군에서 제거한다. +5. 정답 라벨은 `1일 / 3일 / 5일` 세션으로 만들고, 활성 기간 동안 자동 추론 결과를 별도 추적 로그로 남긴다. +6. 알고리즘은 DB exclusion/label 정보를 읽어 다음 cycle부터 바로 반영한다. +7. 향후 threshold 튜닝, 가산점 실험, LLM 연결 평가에 쓰일 수 있는 정량 지표를 만든다. + +## 용어 + +- 자동 추론 + - `gear_parent_inference`가 계산한 현재 cycle의 후보 점수와 추천 결과 +- 그룹 제외 + - 특정 `group_key + sub_cluster_id`에서 특정 후보 MMSI를 일정 기간 후보군에서 제거 +- 전역 후보 제외 + - 특정 MMSI를 모든 어구 그룹의 모선 후보군에서 제거 +- 정답 라벨 세션 + - 특정 어구 그룹에 대해 “이 MMSI가 정답 모선”이라고 사람이 지정하고, 일정 기간 자동 추론 결과를 추적하는 세션 +- 라벨 추적 + - 정답 라벨 세션 활성 기간 동안 자동 추론이 정답 후보를 어떻게 rank/score하는지 누적 저장하는 기록 + +## 현재 v1의 한계 + +### 1. `확정`이 평가 라벨이 아니라 운영 override다 + +- 현재 `CONFIRM`은 resolution을 `MANUAL_CONFIRMED`로 덮어쓴다. +- 이 경우 자동 추론의 실제 성능과 사람 판단이 섞여, 나중에 모델 정확도를 평가하기 어렵다. + +### 2. `24시간 제외`는 기간과 범위가 너무 좁다 + +- 현재는 그룹 단위 24시간 mute만 가능하다. +- `1/3/5일`처럼 길이를 다르게 두고 비교할 수 없다. +- “이 MMSI는 아예 모선 후보 대상이 아니다”라는 전역 규칙을 넣을 수 없다. + +### 3. 백데이터 축적 구조가 없다 + +- 현재는 review log는 남지만, “정답 후보가 cycle별로 몇 위였는지”, “점수가 어떻게 변했는지”, “후보군에 들어왔는지”를 체계적으로 저장하지 않는다. + +### 4. 장기 세션에 대한 그룹 스코프가 약하다 + +- 현재 그룹 기준은 `group_key + sub_cluster_id`다. +- 기간형 라벨/제외를 도입하면 subcluster 재편성 리스크를 고려해야 한다. + +## v2 설계 원칙 + +### 1. 자동 추론 저장소는 그대로 유지한다 + +아래 기존 저장소는 계속 자동 추론 전용으로 유지한다. + +- `gear_group_parent_candidate_snapshots` +- `gear_group_parent_resolution` +- `gear_group_parent_review_log` + +단, `review_log`의 의미는 “UI action audit”로 바꾸고, 더 이상 최종 라벨 저장소로 보지 않는다. + +### 2. 사람 판단은 새 저장소로 분리한다 + +사람이 내린 판단은 아래 두 축으로 분리한다. + +- 제외 축 + - 이 그룹에서 제외 + - 전체 후보 제외 +- 정답 축 + - 기간형 정답 라벨 세션 + +### 3. 제외는 후보 생성 이후의 gating layer로 둔다 + +전역 후보 제외는 raw correlation이나 원시 선박 분류를 지우지 않는다. + +- `gear_correlation_scores`는 계속 쌓는다. +- exclusion은 parent inference candidate set에서만 hard filter로 적용한다. + +이렇게 해야 원시 모델 출력과 사람 개입의 차이를 비교할 수 있다. + +### 4. 라벨 세션 동안 자동 추론은 계속 돈다 + +정답 라벨 세션이 활성화되어도 자동 추론은 그대로 수행한다. + +- UI의 기본 검토 대기에서는 숨길 수 있다. +- 하지만 prediction은 계속 candidate snapshot과 tracking record를 남긴다. + +### 5. lab에서는 override보다 평가를 우선한다 + +v2 이후 lab에서 사람 버튼은 기본적으로 자동 resolution을 덮어쓰지 않는다. + +- 운영 override가 필요해지면 추후 별도 action으로 분리한다. +- lab의 기본 목적은 평가 데이터 생성이다. + +## 사용자 액션 재정의 + +### `정답 라벨` + +의미: + +- 해당 어구 그룹의 정답 모선으로 특정 MMSI를 지정 +- `1일 / 3일 / 5일` 중 하나의 기간 동안 자동 추론 결과를 추적 + +동작: + +1. `gear_parent_label_sessions`에 active session 생성 +2. 다음 cycle부터 prediction이 이 그룹에 대한 추적 로그를 `gear_parent_label_tracking_cycles`에 누적 +3. 기본 review queue에서는 해당 그룹을 숨기고, 별도 `라벨 추적` 목록으로 이동 +4. 세션 종료 후에는 completed label dataset으로 남음 + +중요: + +- 자동 resolution은 계속 자동 상태를 유지 +- 점수에 수동 가산점/감점은 넣지 않음 + +### `이 그룹에서 제외` + +의미: + +- 해당 어구 그룹에서만 특정 후보 MMSI를 일정 기간 후보군에서 제외 + +기간: + +- `1일` +- `3일` +- `5일` + +동작: + +1. `gear_parent_candidate_exclusions`에 `scope_type='GROUP'` row 생성 +2. 다음 cycle부터 해당 그룹의 candidate set에서 제거 +3. 다른 그룹에는 영향 없음 +4. 기간이 끝나면 자동으로 inactive 처리 + +용도: + +- 이 후보는 이 어구 그룹의 모선이 아니라고 사람이 판단한 경우 +- 단기/중기 관찰을 위해 일정 기간만 빼고 싶을 때 + +### `전체 후보 제외` + +의미: + +- 특정 MMSI는 모든 어구 그룹에서 모선 후보 대상이 아님 + +동작: + +1. `gear_parent_candidate_exclusions`에 `scope_type='GLOBAL'` row 생성 +2. prediction candidate generation에서 모든 그룹에 대해 hard filter +3. 해제 전까지 계속 적용 + +초기 정책: + +- 전역 후보 제외는 기본적으로 기간 없이 active 상태 유지 +- 수동 `해제` 전까지 유지 + +용도: + +- 패턴 분류상 선박으로 들어왔지만 실제 모선 후보가 아니라고 판단한 AIS +- 잘못된 유형의 신호가 반복적으로 후보군에 유입되는 경우 + +### `해제` + +의미: + +- 활성 그룹 제외, 전역 제외, 정답 라벨 세션을 조기 종료 + +동작: + +- exclusion/session row에 `released_at`, `released_by` 또는 `status='CANCELLED'`를 기록 +- 다음 cycle부터 알고리즘 적용 대상에서 빠짐 + +## DB 설계 + +### 1. `gear_parent_candidate_exclusions` + +역할: + +- 그룹 단위 제외와 전역 후보 제외를 모두 저장 +- active list의 단일 진실원 + +권장 컬럼: + +```sql +CREATE TABLE kcg_lab.gear_parent_candidate_exclusions ( + id BIGSERIAL PRIMARY KEY, + scope_type VARCHAR(16) NOT NULL, -- GROUP | GLOBAL + group_key VARCHAR(100), -- GROUP scope에서만 사용 + sub_cluster_id SMALLINT, + candidate_mmsi VARCHAR(20) NOT NULL, + reason_type VARCHAR(32) NOT NULL, -- GROUP_WRONG_PARENT | GLOBAL_NOT_PARENT_TARGET + duration_days INT, -- GROUP scope는 1|3|5, GLOBAL은 NULL 허용 + active_from TIMESTAMPTZ NOT NULL DEFAULT NOW(), + active_until TIMESTAMPTZ, -- GROUP scope는 필수, GLOBAL은 NULL 가능 + released_at TIMESTAMPTZ, + released_by VARCHAR(100), + actor VARCHAR(100) NOT NULL, + comment TEXT, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +``` + +권장 인덱스: + +- `(scope_type, candidate_mmsi)` +- `(group_key, sub_cluster_id, active_from DESC)` +- `(released_at, active_until)` + +조회 규칙: + +active exclusion은 아래 조건으로 판단한다. + +```sql +released_at IS NULL +AND active_from <= NOW() +AND (active_until IS NULL OR active_until > NOW()) +``` + +### 2. `gear_parent_label_sessions` + +역할: + +- 특정 그룹에 대한 정답 라벨 세션 저장 + +권장 컬럼: + +```sql +CREATE TABLE kcg_lab.gear_parent_label_sessions ( + id BIGSERIAL PRIMARY KEY, + group_key VARCHAR(100) NOT NULL, + sub_cluster_id SMALLINT NOT NULL, + label_parent_mmsi VARCHAR(20) NOT NULL, + label_parent_name VARCHAR(200), + label_parent_vessel_id INT REFERENCES kcg_lab.fleet_vessels(id) ON DELETE SET NULL, + duration_days INT NOT NULL, -- 1 | 3 | 5 + active_from TIMESTAMPTZ NOT NULL DEFAULT NOW(), + active_until TIMESTAMPTZ NOT NULL, + status VARCHAR(20) NOT NULL DEFAULT 'ACTIVE', -- ACTIVE | EXPIRED | CANCELLED + actor VARCHAR(100) NOT NULL, + comment TEXT, + anchor_snapshot_time TIMESTAMPTZ, + anchor_center_point geometry(Point, 4326), + anchor_member_mmsis JSONB NOT NULL DEFAULT '[]'::jsonb, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +``` + +설명: + +- `anchor_*` 컬럼은 기간형 라벨 동안 subcluster가 재편성될 가능성에 대비한 보조 식별자다. +- phase 1에서는 실제 매칭은 `group_key + sub_cluster_id`를 기본으로 쓰고, anchor 정보는 저장만 한다. + +### 3. `gear_parent_label_tracking_cycles` + +역할: + +- 활성 정답 라벨 세션 동안 cycle별 자동 추론 결과 저장 +- 향후 정확도 지표의 기준 데이터 + +권장 컬럼: + +```sql +CREATE TABLE kcg_lab.gear_parent_label_tracking_cycles ( + id BIGSERIAL PRIMARY KEY, + label_session_id BIGINT NOT NULL REFERENCES kcg_lab.gear_parent_label_sessions(id) ON DELETE CASCADE, + observed_at TIMESTAMPTZ NOT NULL, + candidate_snapshot_observed_at TIMESTAMPTZ, + auto_status VARCHAR(40), + top_candidate_mmsi VARCHAR(20), + top_candidate_name VARCHAR(200), + top_candidate_score DOUBLE PRECISION, + top_candidate_margin DOUBLE PRECISION, + candidate_count INT NOT NULL DEFAULT 0, + labeled_candidate_present BOOLEAN NOT NULL DEFAULT FALSE, + labeled_candidate_rank INT, + labeled_candidate_score DOUBLE PRECISION, + labeled_candidate_pre_bonus_score DOUBLE PRECISION, + labeled_candidate_margin_from_top DOUBLE PRECISION, + matched_top1 BOOLEAN NOT NULL DEFAULT FALSE, + matched_top3 BOOLEAN NOT NULL DEFAULT FALSE, + evidence_summary JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +``` + +설명: + +- 전체 후보 상세는 기존 `gear_group_parent_candidate_snapshots`를 그대로 사용한다. +- 여기에는 지표 계산에 직접 필요한 값만 요약 저장한다. + +### 4. 기존 `gear_group_parent_review_log` 재사용 + +새 action 이름 예시: + +- `LABEL_PARENT` +- `EXCLUDE_GROUP` +- `EXCLUDE_GLOBAL` +- `RELEASE_EXCLUSION` +- `CANCEL_LABEL` + +즉, 별도 audit table를 또 만들기보다 기존 review log를 action log로 재사용한다. + +## prediction 변경 설계 + +### 적용 지점 + +핵심 변경 지점은 [gear_parent_inference.py](/Users/lht/work/devProjects/iran-airstrike-replay-codex/prediction/algorithms/gear_parent_inference.py), [fleet_tracker.py](/Users/lht/work/devProjects/iran-airstrike-replay-codex/prediction/fleet_tracker.py), [polygon_builder.py](/Users/lht/work/devProjects/iran-airstrike-replay-codex/prediction/algorithms/polygon_builder.py) 중 `gear_parent_inference.py`가 중심이다. + +### 1. active exclusion load + +cycle 시작 시 아래 두 집합을 읽는다. + +- `global_excluded_mmsis` +- `group_excluded_mmsis[(group_key, sub_cluster_id)]` + +적용 위치: + +- `_build_candidate_scores()`에서 candidate union 이후, 실제 scoring 전에 hard filter + +규칙: + +- GLOBAL exclusion은 모든 그룹에 적용 +- GROUP exclusion은 해당 그룹에만 적용 +- exclusion된 후보는 candidate snapshot에도 남기지 않음 + +중요: + +- raw correlation score는 그대로 계산/저장 +- exclusion은 parent inference candidate set에서만 적용 + +### 2. active label session load + +cycle 시작 시 현재 unresolved/active gear group에 매칭되는 active label session을 읽는다. + +phase 1 매칭 기준: + +- `group_key` +- `sub_cluster_id` + +phase 2 보강 기준: + +- member overlap +- center distance +- anchor snapshot similarity + +### 3. tracking cycle write + +각 그룹의 자동 추론이 끝난 뒤, active label session이 있으면 `gear_parent_label_tracking_cycles`에 1 row를 쓴다. + +기록 항목: + +- 현재 auto top-1 후보 +- auto top-1 점수/격차 +- 후보 수 +- 라벨 대상 MMSI가 현재 후보군에 존재하는지 +- 존재한다면 rank/score/pre-bonus score +- top1/top3 일치 여부 + +### 4. resolution 저장 원칙 변경 + +v2 이후 lab에서는 아래를 원칙으로 한다. + +- 자동 resolution은 자동 추론만 반영 +- 사람 라벨은 resolution을 덮어쓰지 않음 + +즉 아래 legacy 상태는 새로 만들지 않는다. + +- `MANUAL_CONFIRMED` +- `MANUAL_REJECT` + +기존 row는 읽기 전용으로 남겨둘 수 있지만, v2 새 액션은 이 상태를 만들지 않는다. + +### 5. exclusion이 적용된 경우의 상태 전이 + +후보 pruning 이후: + +- 후보가 남으면 기존 자동 상태 전이 사용 +- top1이 제외되어 후보가 비면 `NO_CANDIDATE` +- top1이 제외되어 top2가 승격되면 새 top1 기준으로 `AUTO_PROMOTED / REVIEW_REQUIRED / UNRESOLVED` 재판정 + +## backend API 설계 + +### 1. 정답 라벨 세션 생성 + +`POST /api/vessel-analysis/groups/{groupKey}/parent-inference/{subClusterId}/label-session` + +request: + +```json +{ + "selectedParentMmsi": "412333326", + "durationDays": 3, + "actor": "analyst-01", + "comment": "수동 확인" +} +``` + +response: + +- 생성된 label session +- 현재 active label summary + +### 2. 그룹 후보 제외 생성 + +`POST /api/vessel-analysis/groups/{groupKey}/parent-inference/{subClusterId}/candidate-exclusions` + +request: + +```json +{ + "candidateMmsi": "412333326", + "scopeType": "GROUP", + "durationDays": 3, + "actor": "analyst-01", + "comment": "이 그룹에서는 오답" +} +``` + +### 3. 전역 후보 제외 생성 + +`POST /api/vessel-analysis/parent-inference/candidate-exclusions` + +request: + +```json +{ + "candidateMmsi": "412333326", + "scopeType": "GLOBAL", + "actor": "analyst-01", + "comment": "모든 어구에서 모선 후보 대상 제외" +} +``` + +### 4. exclusion 해제 + +`POST /api/vessel-analysis/parent-inference/candidate-exclusions/{id}/release` + +### 5. label session 종료 + +`POST /api/vessel-analysis/parent-inference/label-sessions/{id}/cancel` + +### 6. active exclusion 조회 + +`GET /api/vessel-analysis/parent-inference/candidate-exclusions?status=ACTIVE&scopeType=GLOBAL` + +용도: + +- “대상 선박이 어느 어구에서 제외중인지” 목록 관리 +- 운영자 관리 화면 + +### 7. active label tracking 조회 + +`GET /api/vessel-analysis/parent-inference/label-sessions?status=ACTIVE` + +`GET /api/vessel-analysis/parent-inference/label-sessions/{id}/tracking` + +### 8. 기존 review/detail API 확장 + +기존 `GroupParentInferenceDto`에 아래 요약을 추가한다. + +- `activeLabelSession` +- `groupExclusionCount` +- `hasGlobalExclusionCandidate` +- `availableActions` + +`ParentInferenceCandidateDto`에는 아래를 추가한다. + +- `isExcludedInGroup` +- `isExcludedGlobally` +- `activeExclusionIds` + +## 프론트엔드 설계 + +### 버튼 재구성 + +현재: + +- `확정` +- `24시간 제외` + +v2: + +- `정답 라벨` +- `이 그룹에서 제외` +- `전체 후보 제외` +- `해제` + +### 기간 선택 + +`정답 라벨`과 `이 그룹에서 제외`는 버튼 클릭 후 아래 중 하나를 고르게 한다. + +- `1일` +- `3일` +- `5일` + +### 우측 모선 검토 패널 변화 + +- 후보 카드 상단 action area를 아래처럼 재구성 + - `정답 라벨` + - `이 그룹에서 제외` + - `전체 후보 제외` +- 현재 후보에 active exclusion이 있으면 badge 표시 + - `이 그룹 제외 중` + - `전체 후보 제외 중` +- 현재 그룹에 active label session이 있으면 summary box 표시 + - 라벨 MMSI + - 남은 기간 + - 최근 top1 일치율 + +### 새 목록 + +- `검토 대기` + - active label session이 없는 그룹만 기본 표시 +- `라벨 추적` + - active label session이 있는 그룹 +- `제외 대상 관리` + - active group/global exclusions + +### 지도 표시 원칙 + +- active label session 그룹은 기본 review 색과 다른 badge 색을 사용 +- globally excluded candidate는 raw correlation 패널에서는 참고로 보일 수 있지만, parent-review actionable candidate 목록에서는 숨김 + +## 지표 설계 + +정답 라벨 세션을 기반으로 최소 아래 지표를 계산한다. + +### 핵심 지표 + +- top1 exact match rate +- top3 hit rate +- labeled candidate mean rank +- labeled candidate mean score +- time-to-first-top1 +- session duration 동안 top1 일치 지속률 + +### 보정/실험 지표 + +- `412/413` 가산점 적용 전후 top1/top3 uplift +- pre-bonus score 대비 final score uplift +- global exclusion 적용 전후 오탐 감소량 +- group exclusion 이후 대체 top1 품질 변화 + +### 운영 준비 지표 + +- auto-promoted 후보 중 라벨과 일치하는 비율 +- high-confidence (`>= 0.72`) 구간 calibration +- label session 종료 시점 기준 `실무 참고 가능` threshold + +## 단계별 구현 순서 + +### Phase 1. DB/Backend 계약 + +- 마이그레이션 추가 + - `gear_parent_candidate_exclusions` + - `gear_parent_label_sessions` + - `gear_parent_label_tracking_cycles` +- backend DTO/API 추가 +- 기존 `CONFIRM/REJECT/RESET`는 lab UI에서 숨기고 legacy로만 남김 + +### Phase 2. prediction 연동 + +- active exclusion load +- candidate pruning +- active label session load +- tracking cycle write + +### Phase 3. 프론트 UI 전환 + +- 버튼 재구성 +- 기간 선택 UI +- 라벨 추적 목록 +- 제외 대상 관리 화면 + +### Phase 4. 지표와 리포트 + +- label session summary endpoint +- exclusion usage summary endpoint +- 실험 리포트 화면 또는 문서 산출 + +## 마이그레이션 전략 + +### 기존 v1 상태 처리 + +- `MANUAL_CONFIRMED`, `MANUAL_REJECT`는 새로 생성하지 않는다. +- 기존 row는 history로 남긴다. +- 필요하면 one-time migration으로 legacy `MANUAL_CONFIRMED`를 `expired label session`으로 변환할 수 있다. + +### 운영 영향 제한 + +- v2는 우선 `kcg_lab`에만 적용 +- 운영 `kcg` 반영 전에는 사람이 직접 누르는 흐름과 tracking 지표가 충분히 쌓여야 함 + +## 수용 기준 + +### 기능 기준 + +- 그룹 제외가 다음 cycle부터 해당 그룹에서만 적용된다. +- 전역 후보 제외가 다음 cycle부터 모든 그룹에 적용된다. +- active exclusion list가 DB/API/UI에서 동일하게 보인다. +- 정답 라벨 세션 동안 cycle별 tracking row가 누락 없이 쌓인다. + +### 데이터 기준 + +- label session당 최소 아래 값이 저장된다. + - top1 후보 + - labeled candidate rank + - labeled candidate score + - candidate count + - observed_at +- exclusion row에는 scope, duration, actor, comment, active 기간이 남는다. + +### 평가 기준 + +- `412/413` 가산점, threshold, exclusion 정책 변경 전후를 label session 데이터로 비교 가능해야 한다. +- 일정 기간 후 “자동 top1을 운영 참고값으로 써도 되는지”를 정량으로 판단할 수 있어야 한다. + +## 열린 이슈 + +### 1. 그룹 스코프 안정성 + +`group_key + sub_cluster_id`가 며칠 동안 완전히 안정적인지 추가 확인이 필요하다. + +현재 권장: + +- phase 1은 기존 키를 그대로 사용 +- 대신 `anchor_snapshot_time`, `anchor_center_point`, `anchor_member_mmsis`를 저장 + +### 2. 전역 후보 제외의 기간 정책 + +현재 제안은 “수동 해제 전까지 유지”다. + +이유: + +- 전역 제외는 단기 오답보다 “이 AIS는 parent candidate class가 아님”에 가깝다. + +필요 시 추후 `1/3/5일` 옵션을 추가할 수 있다. + +### 3. raw correlation UI 노출 + +전역 제외된 후보를 모델 패널에서 완전히 숨길지, `참고 제외` badge만 붙여 남길지는 사용성 확인이 필요하다. + +현재 권장은 아래다. + +- parent-review actionable 후보 목록에서는 숨김 +- raw model/correlation 참고 패널에서는 badge와 함께 유지 + +## 권장 결론 + +v2의 핵심은 `사람 판단을 자동 추론의 override가 아니라 평가 데이터로 축적하는 것`이다. + +따라서 다음 구현 우선순위는 아래가 맞다. + +1. exclusion/label DB 추가 +2. prediction candidate gating + tracking write +3. UI 액션 재정의 +4. 지표 산출 + +그 다음 단계에서만 threshold 자동화, 가산점 조정, LLM 연결을 검토하는 것이 안전하다. diff --git a/docs/RELEASE-NOTES.md b/docs/RELEASE-NOTES.md index 334f20d..d7369dc 100644 --- a/docs/RELEASE-NOTES.md +++ b/docs/RELEASE-NOTES.md @@ -4,11 +4,23 @@ ## [Unreleased] +### 추가 +- 어구 모선 추론(Gear Parent Inference) 시스템 — 다층 점수 모델 + Episode 연속성 + 자동 승격/검토 워크플로우 + - Python: gear_parent_inference(1,428줄), gear_parent_episode(631줄), gear_name_rules + - Backend: ParentInferenceWorkflowController + GroupPolygonService 15개 API + - Frontend: ParentReviewPanel (모선 검토 대시보드) + - DB: migration 012~015 (후보 스냅샷, resolution, episode, 라벨 세션, 제외 관리) + ### 수정 - 1h 활성 판정을 parent_name 전체 합산 기준으로 변경 (서브클러스터 분리 후 개별 소수 문제 해결) - vessel_store의 _last_bucket 타임존 오류 수정 (tz-naive KST → UTC 잘못 변환 → incremental fetch 0건) - time_bucket 수집 안전 윈도우 도입 — safe_bucket(12분 지연) + 3 bucket 백필로 데이터 누락 방지 +### 변경 +- fleet_tracker: SQL 테이블명 qualified_table() 동적화 + is_trackable_parent_name 필터 +- gear_correlation: 후보 track에 timestamp 필드 추가 +- kcgdb: SQL 스키마 하드코딩 → qualified_table() 패턴 전환 + ## [2026-04-01.2] ### 추가 diff --git a/frontend/src/components/korea/ParentReviewPanel.tsx b/frontend/src/components/korea/ParentReviewPanel.tsx new file mode 100644 index 0000000..f70a784 --- /dev/null +++ b/frontend/src/components/korea/ParentReviewPanel.tsx @@ -0,0 +1,1391 @@ +import { useEffect, useMemo, useRef, useState } from 'react'; +import type { + GroupParentInferenceItem, + ParentCandidateExclusion, + ParentInferenceCandidate, + ParentInferenceSummary, + ParentLabelSession, +} from '../../services/vesselAnalysis'; +import { FONT_MONO } from '../../styles/fonts'; +import { useTranslation } from 'react-i18next'; +import { getParentReviewCandidateColor } from './parentReviewCandidateColors'; +import { + MIN_PARENT_REVIEW_MEMBER_COUNT, + MIN_PARENT_REVIEW_SCORE, + MIN_PARENT_REVIEW_SCORE_PCT, +} from './parentInferenceConstants'; + +export type ReviewQueueSortMode = 'backend' | 'topScore' | 'memberCount' | 'candidateCount' | 'name' | 'zoneDistance'; +export type ParentWorkflowAction = + | 'LABEL' + | 'GROUP_EXCLUDE' + | 'GLOBAL_EXCLUDE' + | 'CANCEL_LABEL' + | 'RELEASE_GROUP_EXCLUSION' + | 'RELEASE_GLOBAL_EXCLUSION'; + +interface ParentReviewPanelProps { + selectedGearGroup: string; + items: GroupParentInferenceItem[]; + reviewQueue: GroupParentInferenceItem[]; + reviewQueueFilteredCount: number; + activeGroupExclusions: ParentCandidateExclusion[]; + activeGlobalExclusions: ParentCandidateExclusion[]; + activeLabelSessions: ParentLabelSession[]; + reviewQueueTotalCount: number; + filterFallbackActive: boolean; + isLoading: boolean; + submittingKey: string | null; + actor: string; + workflowDurationDays: 1 | 3 | 5; + hoveredCandidateMmsi: string | null; + minTopScorePct: number; + minMemberCount: number; + sortMode: ReviewQueueSortMode; + searchText: string; + selectedQueueKey: string | null; + focusedQueueKey: string | null; + scrollTargetQueueKey: string | null; + isSpatialFilterDrawing: boolean; + hasSpatialFilter: boolean; + spatialFilterPointCount: number; + onActorChange: (value: string) => void; + onWorkflowDurationDaysChange: (value: 1 | 3 | 5) => void; + onRefresh: () => void; + onSelectGroup: (groupKey: string, subClusterId: number) => void; + onJumpToGroup: (groupKey: string, subClusterId: number) => void; + onQueueHover: (queueKey: string | null) => void; + onCandidateHover: (mmsi: string | null) => void; + onMinTopScorePctChange: (value: number) => void; + onMinMemberCountChange: (value: number) => void; + onSortModeChange: (value: ReviewQueueSortMode) => void; + onSearchTextChange: (value: string) => void; + onResetFilters: () => void; + onStartSpatialFilter: () => void; + onFinishSpatialFilter: () => void; + onClearSpatialFilter: () => void; + onWorkflowAction: ( + subClusterId: number, + action: ParentWorkflowAction, + candidateMmsi?: string, + ) => void; +} + +const panelStyle: React.CSSProperties = { + position: 'absolute', + top: 16, + right: 16, + width: 'min(560px, calc(100vw - 32px))', + maxHeight: 'calc(100vh - 40px)', + overflow: 'visible', + display: 'flex', + flexDirection: 'column', + gap: 0, + background: 'rgba(12,24,37,0.95)', + border: '1px solid rgba(56,189,248,0.2)', + borderRadius: 10, + boxShadow: '0 14px 40px rgba(0,0,0,0.42)', + color: '#e2e8f0', + fontFamily: FONT_MONO, + zIndex: 11, + pointerEvents: 'auto', +}; + +function statusMeta(status: string | null | undefined, t: (key: string, options?: Record) => string) { + switch (status) { + case 'AUTO_PROMOTED': + return { label: t('parentInference.badges.AUTO_PROMOTED'), color: '#22c55e' }; + case 'MANUAL_CONFIRMED': + return { label: t('parentInference.badges.MANUAL_CONFIRMED'), color: '#38bdf8' }; + case 'DIRECT_PARENT_MATCH': + return { label: t('parentInference.badges.DIRECT_PARENT_MATCH'), color: '#2dd4bf' }; + case 'REVIEW_REQUIRED': + return { label: t('parentInference.badges.REVIEW_REQUIRED'), color: '#f59e0b' }; + case 'SKIPPED_SHORT_NAME': + return { label: t('parentInference.badges.SKIPPED_SHORT_NAME'), color: '#94a3b8' }; + case 'NO_CANDIDATE': + return { label: t('parentInference.badges.NO_CANDIDATE'), color: '#c084fc' }; + case 'UNRESOLVED': + return { label: t('parentInference.badges.UNRESOLVED'), color: '#64748b' }; + default: + return { label: t('parentInference.badges.NONE'), color: '#475569' }; + } +} + +function scorePct(value: number | null | undefined) { + if (value == null) return '-'; + return `${Math.round(value * 100)}%`; +} + +function scoreWidth(value: number | null | undefined) { + if (value == null) return '0%'; + const clamped = Math.max(0, Math.min(1, value)); + return `${Math.round(clamped * 100)}%`; +} + +function topScorePct(item: GroupParentInferenceItem) { + return Math.round((item.parentInference?.topScore ?? 0) * 100); +} + +function sourceList(candidate: ParentInferenceCandidate) { + const raw = candidate.evidence?.sources; + return Array.isArray(raw) ? raw.join(', ') : candidate.candidateSource; +} + +function queueItemKey(item: GroupParentInferenceItem) { + return `${item.groupKey}:${item.subClusterId}`; +} + +function queueCandidateKey(groupKey: string, subClusterId: number, candidateMmsi: string) { + return `${groupKey}:${subClusterId}:${candidateMmsi}`; +} + +function formatTimestamp(value: string | null | undefined) { + if (!value) return '-'; + const date = new Date(value); + if (Number.isNaN(date.getTime())) return value; + return new Intl.DateTimeFormat(undefined, { + month: '2-digit', + day: '2-digit', + hour: '2-digit', + minute: '2-digit', + hour12: false, + }).format(date); +} + +function chinaBonusInfo(candidate: ParentInferenceCandidate) { + const evidence = candidate.evidence as Record | undefined; + const scoreBreakdown = evidence?.scoreBreakdown as Record | undefined; + const rawBonus = scoreBreakdown?.chinaMmsiBonus; + const bonus = typeof rawBonus === 'number' ? rawBonus : 0; + const applied = evidence?.chinaMmsiBonusApplied === true || bonus > 0; + return { applied, bonus }; +} + +function evidenceConfidence(candidate: ParentInferenceCandidate) { + const evidence = candidate.evidence as Record | undefined; + const raw = evidence?.evidenceConfidence; + return typeof raw === 'number' ? raw : null; +} + +function coverageInfo(candidate: ParentInferenceCandidate) { + const evidence = candidate.evidence as Record | undefined; + const coverage = evidence?.coverage as Record | undefined; + if (!coverage) return null; + const numberValue = (key: string) => { + const value = coverage[key]; + return typeof value === 'number' ? value : null; + }; + return { + trackPointCount: numberValue('trackPointCount'), + trackSpanMinutes: numberValue('trackSpanMinutes'), + overlapPointCount: numberValue('overlapPointCount'), + overlapSpanMinutes: numberValue('overlapSpanMinutes'), + inZonePointCount: numberValue('inZonePointCount'), + inZoneSpanMinutes: numberValue('inZoneSpanMinutes'), + trackCoverageFactor: numberValue('trackCoverageFactor'), + visitCoverageFactor: numberValue('visitCoverageFactor'), + activityCoverageFactor: numberValue('activityCoverageFactor'), + coverageFactor: numberValue('coverageFactor'), + scoreWindowHours: numberValue('scoreWindowHours'), + }; +} + +function rawBreakdown(candidate: ParentInferenceCandidate) { + const evidence = candidate.evidence as Record | undefined; + const raw = evidence?.scoreBreakdownRaw as Record | undefined; + if (!raw) return null; + const numberValue = (key: string) => { + const value = raw[key]; + return typeof value === 'number' ? value : null; + }; + return { + trackSimilarityScore: numberValue('trackSimilarityScore'), + visitScore6h: numberValue('visitScore6h'), + proximityScore6h: numberValue('proximityScore6h'), + activitySyncScore6h: numberValue('activitySyncScore6h'), + }; +} + +function formatSpanMinutes(value: number | null | undefined) { + if (value == null) return '-'; + if (value >= 120) return `${(value / 60).toFixed(1)}h`; + if (value >= 60) return `${Math.round(value / 60)}h`; + return `${Math.round(value)}m`; +} + +function inferenceReason( + summary: ParentInferenceSummary | null | undefined, + t: (key: string, options?: Record) => string, +) { + if (!summary) return null; + switch (summary.status) { + case 'SKIPPED_SHORT_NAME': + return t('parentInference.reasons.shortName'); + case 'NO_CANDIDATE': + return t('parentInference.reasons.noCandidate'); + default: + return summary.statusReason || summary.skipReason || null; + } +} + +function metricTone(label: string) { + switch (label) { + case 'corr': + return '#38bdf8'; + case 'name': + return '#f59e0b'; + case 'track': + return '#22c55e'; + case 'visit': + return '#a78bfa'; + case 'prox': + return '#fb7185'; + case 'activity': + return '#60a5fa'; + default: + return '#94a3b8'; + } +} + +function MetricBar({ + label, + value, + color, +}: { + label: string; + value: number | null | undefined; + color: string; +}) { + return ( +
+ {label} +
+
+
+ {scorePct(value)} +
+ ); +} + +function GuideSection({ + title, + children, +}: { + title: string; + children: React.ReactNode; +}) { + return ( +
+
{title}
+
{children}
+
+ ); +} + +function GuideRow({ + label, + description, +}: { + label: string; + description: string; +}) { + return ( +
+
{label}
+
{description}
+
+ ); +} + +export default function ParentReviewPanel({ + selectedGearGroup, + items, + reviewQueue, + reviewQueueFilteredCount, + activeGroupExclusions, + activeGlobalExclusions, + activeLabelSessions, + reviewQueueTotalCount, + filterFallbackActive, + isLoading, + submittingKey, + actor, + workflowDurationDays, + hoveredCandidateMmsi, + minTopScorePct, + minMemberCount, + sortMode, + searchText, + selectedQueueKey, + focusedQueueKey, + scrollTargetQueueKey, + isSpatialFilterDrawing, + hasSpatialFilter, + spatialFilterPointCount, + onActorChange, + onWorkflowDurationDaysChange, + onRefresh, + onSelectGroup, + onJumpToGroup, + onQueueHover, + onCandidateHover, + onMinTopScorePctChange, + onMinMemberCountChange, + onSortModeChange, + onSearchTextChange, + onResetFilters, + onStartSpatialFilter, + onFinishSpatialFilter, + onClearSpatialFilter, + onWorkflowAction, +}: ParentReviewPanelProps) { + const { t } = useTranslation(); + const queueRefs = useRef>({}); + const [showGuide, setShowGuide] = useState(false); + const spatialStatus = isSpatialFilterDrawing + ? t('parentInference.filters.spatialDrawing', { count: spatialFilterPointCount }) + : hasSpatialFilter + ? t('parentInference.filters.spatialApplied') + : t('parentInference.filters.spatialIdle'); + const visibleCandidatesByItem = useMemo( + () => new Map( + items.map(item => [ + queueItemKey(item), + (item.candidates ?? []).filter(candidate => (candidate.finalScore ?? 0) >= MIN_PARENT_REVIEW_SCORE), + ]), + ), + [items], + ); + const activeLabelSessionByQueueKey = useMemo( + () => new Map(activeLabelSessions.map(session => [`${session.groupKey}:${session.subClusterId}`, session])), + [activeLabelSessions], + ); + const activeGroupExclusionByCandidateKey = useMemo( + () => new Map( + activeGroupExclusions + .filter(exclusion => exclusion.groupKey != null && exclusion.subClusterId != null) + .map(exclusion => [ + queueCandidateKey(exclusion.groupKey!, exclusion.subClusterId!, exclusion.candidateMmsi), + exclusion, + ]), + ), + [activeGroupExclusions], + ); + const activeGlobalExclusionByMmsi = useMemo( + () => new Map(activeGlobalExclusions.map(exclusion => [exclusion.candidateMmsi, exclusion])), + [activeGlobalExclusions], + ); + + useEffect(() => { + if (!scrollTargetQueueKey) return; + queueRefs.current[scrollTargetQueueKey]?.scrollIntoView({ block: 'start', behavior: 'smooth' }); + }, [scrollTargetQueueKey]); + + return ( +
+ {showGuide && ( +
+
+
+
+ {t('parentInference.help.title')} +
+
+ {t('parentInference.help.intro')} +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ )} +
+
+
{t('parentInference.title')}
+
{selectedGearGroup}
+
+
+ + +
+
+ +
+
+
+
{t('parentInference.actorLabel')}
+ onActorChange(e.target.value)} + placeholder={t('parentInference.actorPlaceholder')} + style={{ + width: '100%', + background: 'rgba(15,23,42,0.9)', + border: '1px solid rgba(148,163,184,0.2)', + borderRadius: 4, + color: '#e2e8f0', + padding: '6px 8px', + fontSize: 11, + fontFamily: FONT_MONO, + }} + /> +
+
+
{t('parentInference.actions.duration')}
+ +
+
+
+ +
+
+
+
{t('parentInference.filters.sort')}
+ +
+ +
+
{t('parentInference.filters.minMemberCount')}
+ onMinMemberCountChange(Math.max(MIN_PARENT_REVIEW_MEMBER_COUNT, Number(e.target.value) || MIN_PARENT_REVIEW_MEMBER_COUNT))} + style={{ + width: '100%', + background: 'rgba(15,23,42,0.9)', + border: '1px solid rgba(148,163,184,0.2)', + borderRadius: 4, + color: '#e2e8f0', + padding: '6px 8px', + fontSize: 11, + fontFamily: FONT_MONO, + }} + /> +
+ +
+
{t('parentInference.filters.minScore')}
+
+ onMinTopScorePctChange(Number(e.target.value))} + style={{ width: '100%', accentColor: '#38bdf8' }} + /> + + {t('parentInference.filters.minScoreValue', { value: minTopScorePct })} + +
+
+
+ +
+
+
{t('parentInference.filters.search')}
+
+ onSearchTextChange(e.target.value)} + placeholder={t('parentInference.filters.searchPlaceholder')} + style={{ + width: '100%', + background: 'rgba(15,23,42,0.9)', + border: '1px solid rgba(148,163,184,0.2)', + borderRadius: 4, + color: '#e2e8f0', + padding: '6px 8px', + fontSize: 11, + fontFamily: FONT_MONO, + }} + /> + +
+
+ +
+ + + +
+
+ +
+ {spatialStatus} +
+
+ +
+
+
+ {t('parentInference.reviewQueueFiltered', { + filtered: reviewQueueFilteredCount, + total: reviewQueueTotalCount, + })} +
+ {filterFallbackActive && ( +
+
+ {t('parentInference.filters.queueFilterFallback')} +
+ +
+ )} + {reviewQueue.map(item => { + const meta = statusMeta(item.parentInference?.status, t); + const itemKey = queueItemKey(item); + const active = itemKey === selectedQueueKey || (!selectedQueueKey && item.groupKey === selectedGearGroup); + const focused = itemKey === focusedQueueKey; + const topScore = topScorePct(item); + return ( + + ); + })} + {reviewQueue.length === 0 && ( +
{t('parentInference.emptyQueue')}
+ )} +
+ +
+ {isLoading && ( +
{t('parentInference.loading')}
+ )} + {!isLoading && items.length === 0 && ( +
{t('parentInference.emptyState')}
+ )} + + {!isLoading && items.map(item => { + const meta = statusMeta(item.parentInference?.status, t); + const reason = inferenceReason(item.parentInference, t); + const itemKey = queueItemKey(item); + const visibleCandidates = visibleCandidatesByItem.get(itemKey) ?? []; + const activeLabelSession = activeLabelSessionByQueueKey.get(itemKey); + const activeGroupExclusionCount = activeGroupExclusions.filter(exclusion => + exclusion.groupKey === item.groupKey && exclusion.subClusterId === item.subClusterId, + ).length; + return ( +
+
+
+
+ {item.groupKey} / sc#{item.subClusterId} +
+
+ {item.memberCount ?? 0} gear · {item.zoneName || item.groupType} +
+
+
+ + + {meta.label} + +
+
+ +
+
{t('parentInference.summary.recommendedParent')}: {item.parentInference?.selectedParentName || '-'}
+
{t('parentInference.summary.confidence')}: {scorePct(item.parentInference?.confidence)}
+
{t('parentInference.summary.topMargin')}: {scorePct(item.parentInference?.topScore)} / {scorePct(item.parentInference?.scoreMargin)}
+
{t('parentInference.summary.stableCycles')}: {item.parentInference?.stableCycles ?? 0}
+ {activeLabelSession && ( +
+ {t('parentInference.summary.activeLabel')}: {activeLabelSession.labelParentName || activeLabelSession.labelParentMmsi} + {' · '} + {t('parentInference.actions.durationOption', { days: activeLabelSession.durationDays })} + {' · '} + {t('parentInference.summary.activeUntil', { value: formatTimestamp(activeLabelSession.activeUntil) })} +
+ )} + {activeGroupExclusionCount > 0 && ( +
+ {t('parentInference.summary.groupExclusions')}: {activeGroupExclusionCount} +
+ )} + {reason && ( +
{t('parentInference.summary.statusReason')}: {reason}
+ )} +
+ +
+ {t('parentInference.candidate.hoverHint')} +
+ +
+ {visibleCandidates.map(candidate => { + const busy = submittingKey === `${item.groupKey}:${item.subClusterId}`; + const candidateColor = getParentReviewCandidateColor(candidate.rank); + const isHovered = hoveredCandidateMmsi === candidate.candidateMmsi; + const chinaBonus = chinaBonusInfo(candidate); + const confidence = evidenceConfidence(candidate); + const coverage = coverageInfo(candidate); + const rawScores = rawBreakdown(candidate); + const groupExclusion = activeGroupExclusionByCandidateKey.get( + queueCandidateKey(item.groupKey, item.subClusterId, candidate.candidateMmsi), + ); + const globalExclusion = activeGlobalExclusionByMmsi.get(candidate.candidateMmsi); + const isLabelCandidate = activeLabelSession?.labelParentMmsi === candidate.candidateMmsi; + const hasOtherActiveLabel = !!activeLabelSession && !isLabelCandidate; + return ( +
onCandidateHover(candidate.candidateMmsi)} + onMouseLeave={() => onCandidateHover(null)} + style={{ + padding: '8px', + borderRadius: 6, + background: isHovered ? 'rgba(16,28,45,0.96)' : 'rgba(12,18,28,0.9)', + border: `1px solid ${isHovered ? `${candidateColor}aa` : 'rgba(255,255,255,0.06)'}`, + boxShadow: isHovered ? `0 0 0 1px ${candidateColor}44 inset, 0 10px 24px rgba(0,0,0,0.35)` : 'none', + position: 'relative', + overflow: 'hidden', + }} + > +
+
+
+
+ + #{candidate.rank} + +
+ {candidate.candidateName || candidate.candidateMmsi} +
+
+
+ {candidate.candidateMmsi} +
+
+ + {sourceList(candidate)} + + + {candidate.trackAvailable + ? t('parentInference.candidate.trackReady') + : t('parentInference.candidate.trackMissing')} + + + {chinaBonus.applied + ? t('parentInference.candidate.nationalityBonusApplied', { value: Math.round(chinaBonus.bonus * 100) }) + : t('parentInference.candidate.nationalityBonusNone')} + + {isLabelCandidate && ( + + {t('parentInference.candidate.labelActive')} + + )} + {groupExclusion && ( + + {t('parentInference.candidate.groupExcludedUntil', { value: formatTimestamp(groupExclusion.activeUntil) })} + + )} + {globalExclusion && ( + + {t('parentInference.candidate.globalExcluded')} + + )} + {confidence != null && ( + + {t('parentInference.candidate.evidenceConfidence', { value: Math.round(confidence * 100) })} + + )} +
+
+
+
+
{t('parentInference.candidate.totalScore')}
+
{scorePct(candidate.finalScore)}
+
+
+ {t('parentInference.summary.marginOnly')} + {scorePct(candidate.marginFromTop)} +
+ {hasOtherActiveLabel && ( +
+ {t('parentInference.actions.otherLabelActive')} +
+ )} +
+ +
+ + +
+
+
+
+ + {(coverage || rawScores) && ( +
+ {coverage && ( +
+
+
{t('parentInference.candidate.trackWindow')}
+
+ {(coverage.trackPointCount ?? 0)}pt · {formatSpanMinutes(coverage.trackSpanMinutes)} +
+
+
+
{t('parentInference.candidate.overlapWindow')}
+
+ {(coverage.overlapPointCount ?? 0)}pt · {formatSpanMinutes(coverage.overlapSpanMinutes)} +
+
+
+
{t('parentInference.candidate.inZoneWindow')}
+
+ {(coverage.inZonePointCount ?? 0)}pt · {formatSpanMinutes(coverage.inZoneSpanMinutes)} +
+
+
+
{t('parentInference.candidate.scoreWindow')}
+
+ {coverage.scoreWindowHours ?? 0}h +
+
+
+ )} + {(coverage || rawScores) && ( +
+
+
{t('parentInference.candidate.trackCoverage')}
+
+ {scorePct(coverage?.trackCoverageFactor ?? null)} + {rawScores?.trackSimilarityScore != null && candidate.trackSimilarityScore != null && ( + + {' '}· {scorePct(rawScores.trackSimilarityScore)}→{scorePct(candidate.trackSimilarityScore)} + + )} +
+
+
+
{t('parentInference.candidate.visitCoverage')}
+
+ {scorePct(coverage?.visitCoverageFactor ?? null)} + {rawScores?.visitScore6h != null && candidate.visitScore6h != null && ( + + {' '}· {scorePct(rawScores.visitScore6h)}→{scorePct(candidate.visitScore6h)} + + )} +
+
+
+
{t('parentInference.candidate.activityCoverage')}
+
+ {scorePct(coverage?.activityCoverageFactor ?? null)} + {rawScores?.activitySyncScore6h != null && candidate.activitySyncScore6h != null && ( + + {' '}· {scorePct(rawScores.activitySyncScore6h)}→{scorePct(candidate.activitySyncScore6h)} + + )} +
+
+
+
{t('parentInference.candidate.proxCoverage')}
+
+ {scorePct(coverage?.trackCoverageFactor ?? null)} + {rawScores?.proximityScore6h != null && candidate.proximityScore6h != null && ( + + {' '}· {scorePct(rawScores.proximityScore6h)}→{scorePct(candidate.proximityScore6h)} + + )} +
+
+
+ )} +
+ )} + +
+
+ + + + + + +
+
+
+ ); + })} + {visibleCandidates.length === 0 && ( +
+ {t('parentInference.candidate.emptyThreshold', { score: MIN_PARENT_REVIEW_SCORE_PCT })} +
+ )} +
+
+ ); + })} +
+
+
+ ); +} diff --git a/frontend/src/components/korea/parentInferenceConstants.ts b/frontend/src/components/korea/parentInferenceConstants.ts new file mode 100644 index 0000000..f45b61c --- /dev/null +++ b/frontend/src/components/korea/parentInferenceConstants.ts @@ -0,0 +1,15 @@ +export const MIN_PARENT_REVIEW_SCORE = 0.3; +export const MIN_PARENT_REVIEW_SCORE_PCT = 30; +export const MIN_PARENT_REVIEW_MEMBER_COUNT = 2; +export const REPLAY_COMPARE_PANEL_WIDTH_RATIO = 0.7; +export const KOREA_SIDE_PANEL_WIDTH = 300; +export const FLEET_LIST_PANEL_MAX_WIDTH = 300; +export const FLEET_LIST_PANEL_LEFT_OFFSET = 10; +export const ANALYSIS_PANEL_MAX_WIDTH = 280; +export const ANALYSIS_PANEL_RIGHT_OFFSET = 50; +export const REVIEW_PANEL_MAX_WIDTH = 560; +export const REVIEW_PANEL_RIGHT_OFFSET = 16; +export const REPLAY_CENTER_SAFE_GAP = 8; +export const REPLAY_LEFT_RESERVED_WIDTH = FLEET_LIST_PANEL_LEFT_OFFSET + FLEET_LIST_PANEL_MAX_WIDTH + REPLAY_CENTER_SAFE_GAP; +export const REPLAY_ANALYSIS_RESERVED_WIDTH = ANALYSIS_PANEL_MAX_WIDTH + ANALYSIS_PANEL_RIGHT_OFFSET + REPLAY_CENTER_SAFE_GAP; +export const REPLAY_REVIEW_RESERVED_WIDTH = REVIEW_PANEL_MAX_WIDTH + REVIEW_PANEL_RIGHT_OFFSET + REPLAY_CENTER_SAFE_GAP; diff --git a/frontend/src/components/korea/parentReviewCandidateColors.ts b/frontend/src/components/korea/parentReviewCandidateColors.ts new file mode 100644 index 0000000..7568f6d --- /dev/null +++ b/frontend/src/components/korea/parentReviewCandidateColors.ts @@ -0,0 +1,13 @@ +const PARENT_REVIEW_CANDIDATE_COLORS = [ + '#22d3ee', + '#f59e0b', + '#a78bfa', + '#34d399', + '#fb7185', + '#60a5fa', +] as const; + +export function getParentReviewCandidateColor(rank: number): string { + const index = Math.max(0, (rank || 1) - 1) % PARENT_REVIEW_CANDIDATE_COLORS.length; + return PARENT_REVIEW_CANDIDATE_COLORS[index]; +} diff --git a/frontend/src/components/korea/useReplayCenterPanelLayout.ts b/frontend/src/components/korea/useReplayCenterPanelLayout.ts new file mode 100644 index 0000000..01e7087 --- /dev/null +++ b/frontend/src/components/korea/useReplayCenterPanelLayout.ts @@ -0,0 +1,69 @@ +import { useEffect, useMemo, useState } from 'react'; +import { + KOREA_SIDE_PANEL_WIDTH, + REPLAY_ANALYSIS_RESERVED_WIDTH, + REPLAY_COMPARE_PANEL_WIDTH_RATIO, + REPLAY_LEFT_RESERVED_WIDTH, + REPLAY_REVIEW_RESERVED_WIDTH, +} from './parentInferenceConstants'; + +interface ReplayCenterPanelLayoutOptions { + minWidth: number; + maxWidth: number; + hasRightReviewPanel?: boolean; +} + +interface ReplayCenterPanelLayout { + left: number; + width: number; +} + +const FALLBACK_VIEWPORT_WIDTH = 1920; +const ABSOLUTE_MIN_WIDTH = 180; + +export function useReplayCenterPanelLayout({ + minWidth, + maxWidth, + hasRightReviewPanel = false, +}: ReplayCenterPanelLayoutOptions): ReplayCenterPanelLayout { + const [viewportWidth, setViewportWidth] = useState( + () => (typeof window === 'undefined' ? FALLBACK_VIEWPORT_WIDTH : window.innerWidth), + ); + + useEffect(() => { + if (typeof window === 'undefined') return; + const handleResize = () => { + setViewportWidth(window.innerWidth); + }; + window.addEventListener('resize', handleResize); + return () => { + window.removeEventListener('resize', handleResize); + }; + }, []); + + return useMemo(() => { + const mapPanelWidth = Math.max(ABSOLUTE_MIN_WIDTH, viewportWidth - KOREA_SIDE_PANEL_WIDTH); + const leftReserved = REPLAY_LEFT_RESERVED_WIDTH; + const rightReserved = Math.max( + REPLAY_ANALYSIS_RESERVED_WIDTH, + hasRightReviewPanel ? REPLAY_REVIEW_RESERVED_WIDTH : 0, + ); + const availableWidth = Math.max(ABSOLUTE_MIN_WIDTH, mapPanelWidth - leftReserved - rightReserved); + + let width: number; + if (availableWidth >= maxWidth) { + width = maxWidth; + } else if (availableWidth <= minWidth) { + width = Math.max(ABSOLUTE_MIN_WIDTH, availableWidth); + } else { + width = Math.min(maxWidth, Math.max(minWidth, availableWidth * REPLAY_COMPARE_PANEL_WIDTH_RATIO)); + } + + const left = leftReserved + Math.max(0, (availableWidth - width) / 2); + + return { + left, + width, + }; + }, [hasRightReviewPanel, maxWidth, minWidth, viewportWidth]); +} diff --git a/frontend/src/hooks/useGroupPolygons.ts b/frontend/src/hooks/useGroupPolygons.ts index 82dada9..649c42b 100644 --- a/frontend/src/hooks/useGroupPolygons.ts +++ b/frontend/src/hooks/useGroupPolygons.ts @@ -53,7 +53,7 @@ export function useGroupPolygons(enabled: boolean): UseGroupPolygonsResult { const [allGroups, setAllGroups] = useState([]); const [isLoading, setIsLoading] = useState(false); const [lastUpdated, setLastUpdated] = useState(0); - const timerRef = useRef>(); + const timerRef = useRef | undefined>(undefined); const load = useCallback(async () => { setIsLoading(true); diff --git a/frontend/src/services/vesselAnalysis.ts b/frontend/src/services/vesselAnalysis.ts index a1c470c..a3ab21e 100644 --- a/frontend/src/services/vesselAnalysis.ts +++ b/frontend/src/services/vesselAnalysis.ts @@ -58,6 +58,20 @@ export interface MemberInfo { isParent: boolean; } +export interface ParentInferenceSummary { + status: string; + normalizedParentName: string | null; + selectedParentMmsi: string | null; + selectedParentName: string | null; + confidence: number | null; + decisionSource: string | null; + topScore: number | null; + scoreMargin: number | null; + stableCycles: number | null; + skipReason: string | null; + statusReason: string | null; +} + export interface GroupPolygonDto { groupType: 'FLEET' | 'GEAR_IN_ZONE' | 'GEAR_OUT_ZONE'; groupKey: string; @@ -73,7 +87,8 @@ export interface GroupPolygonDto { zoneName: string | null; members: MemberInfo[]; color: string; - resolution?: '1h' | '6h'; + resolution?: '1h' | '1h-fb' | '6h'; + parentInference?: ParentInferenceSummary | null; } export async function fetchGroupPolygons(): Promise { @@ -134,6 +149,376 @@ export async function fetchGroupCorrelations( return res.json(); } +/* ── Parent Inference Review Types ───────────────────────────── */ + +export interface ParentInferenceCandidate { + candidateMmsi: string; + candidateName: string; + candidateVesselId: number | null; + rank: number; + candidateSource: string; + finalScore: number | null; + baseCorrScore: number | null; + nameMatchScore: number | null; + trackSimilarityScore: number | null; + visitScore6h: number | null; + proximityScore6h: number | null; + activitySyncScore6h: number | null; + stabilityScore: number | null; + registryBonus: number | null; + marginFromTop: number | null; + trackAvailable: boolean | null; + evidence: Record; +} + +export interface GroupParentInferenceItem { + groupType: GroupPolygonDto['groupType']; + groupKey: string; + groupLabel: string; + subClusterId: number; + snapshotTime: string; + zoneName: string | null; + memberCount: number | null; + resolution: GroupPolygonDto['resolution']; + candidateCount: number | null; + parentInference: ParentInferenceSummary | null; + candidates?: ParentInferenceCandidate[]; + evidenceSummary?: Record; +} + +export interface ParentInferenceReviewResponse { + count: number; + items: GroupParentInferenceItem[]; +} + +export interface GroupParentInferenceResponse { + groupKey: string; + count: number; + items: GroupParentInferenceItem[]; +} + +export interface ParentInferenceReviewRequest { + action: 'CONFIRM' | 'REJECT' | 'RESET'; + selectedParentMmsi?: string; + actor: string; + comment?: string; +} + +export async function fetchParentInferenceReview( + status = 'REVIEW_REQUIRED', + limit = 100, +): Promise { + const res = await fetch( + `${API_BASE}/vessel-analysis/groups/parent-inference/review?status=${encodeURIComponent(status)}&limit=${limit}`, + { headers: { accept: 'application/json' } }, + ); + if (!res.ok) return { count: 0, items: [] }; + return res.json(); +} + +export async function fetchGroupParentInference(groupKey: string): Promise { + const res = await fetch( + `${API_BASE}/vessel-analysis/groups/${encodeURIComponent(groupKey)}/parent-inference`, + { headers: { accept: 'application/json' } }, + ); + if (!res.ok) return { groupKey, count: 0, items: [] }; + return res.json(); +} + +export async function reviewGroupParentInference( + groupKey: string, + subClusterId: number, + payload: ParentInferenceReviewRequest, +): Promise<{ groupKey: string; subClusterId: number; action: string; item: GroupParentInferenceItem | null }> { + const res = await fetch( + `${API_BASE}/vessel-analysis/groups/${encodeURIComponent(groupKey)}/parent-inference/${subClusterId}/review`, + { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + }, + body: JSON.stringify(payload), + }, + ); + if (!res.ok) { + let message = `parent inference review failed: ${res.status}`; + try { + const data = await res.json() as { error?: string }; + if (data.error) message = data.error; + } catch { + // ignore JSON parse failure + } + throw new Error(message); + } + return res.json(); +} + +export interface ParentCandidateExclusion { + id: number; + scopeType: 'GROUP' | 'GLOBAL'; + groupKey: string | null; + subClusterId: number | null; + candidateMmsi: string; + reasonType: 'GROUP_WRONG_PARENT' | 'GLOBAL_NOT_PARENT_TARGET'; + durationDays: number | null; + activeFrom: string; + activeUntil: string | null; + releasedAt: string | null; + releasedBy: string | null; + actor: string; + comment: string | null; + active: boolean; + metadata: Record; +} + +export interface ParentLabelSession { + id: number; + groupKey: string; + subClusterId: number; + labelParentMmsi: string; + labelParentName: string | null; + labelParentVesselId: number | null; + durationDays: number; + status: 'ACTIVE' | 'EXPIRED' | 'CANCELLED'; + activeFrom: string; + activeUntil: string; + actor: string; + comment: string | null; + anchorSnapshotTime: string | null; + anchorCenterLat: number | null; + anchorCenterLon: number | null; + anchorMemberCount: number | null; + active: boolean; + metadata: Record; +} + +export interface ParentLabelTrackingCycle { + id: number; + labelSessionId: number; + observedAt: string; + candidateSnapshotObservedAt: string | null; + autoStatus: string | null; + topCandidateMmsi: string | null; + topCandidateName: string | null; + topCandidateScore: number | null; + topCandidateMargin: number | null; + candidateCount: number | null; + labeledCandidatePresent: boolean; + labeledCandidateRank: number | null; + labeledCandidateScore: number | null; + labeledCandidatePreBonusScore: number | null; + labeledCandidateMarginFromTop: number | null; + matchedTop1: boolean; + matchedTop3: boolean; + evidenceSummary: Record; +} + +export interface GroupParentLabelSessionRequest { + selectedParentMmsi: string; + durationDays: 1 | 3 | 5; + actor: string; + comment?: string; +} + +export interface GroupParentCandidateExclusionRequest { + candidateMmsi: string; + durationDays: 1 | 3 | 5; + actor: string; + comment?: string; +} + +export interface GlobalParentCandidateExclusionRequest { + candidateMmsi: string; + actor: string; + comment?: string; +} + +export interface ParentWorkflowActionRequest { + actor: string; + comment?: string; +} + +export interface ParentCandidateExclusionListResponse { + count: number; + items: ParentCandidateExclusion[]; +} + +export interface ParentLabelSessionListResponse { + count: number; + items: ParentLabelSession[]; +} + +export interface ParentLabelTrackingResponse { + labelSessionId: number; + count: number; + items: ParentLabelTrackingCycle[]; +} + +async function parseWorkflowError(res: Response, fallback: string): Promise { + let message = fallback; + try { + const data = await res.json() as { error?: string }; + if (data.error) { + message = data.error; + } + } catch { + // ignore JSON parse failure + } + throw new Error(message); +} + +export async function createGroupParentLabelSession( + groupKey: string, + subClusterId: number, + payload: GroupParentLabelSessionRequest, +): Promise<{ groupKey: string; subClusterId: number; action: string; item: ParentLabelSession | null }> { + const res = await fetch( + `${API_BASE}/vessel-analysis/groups/${encodeURIComponent(groupKey)}/parent-inference/${subClusterId}/label-sessions`, + { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + }, + body: JSON.stringify(payload), + }, + ); + if (!res.ok) { + return parseWorkflowError(res, `parent label session failed: ${res.status}`); + } + return res.json(); +} + +export async function createGroupCandidateExclusion( + groupKey: string, + subClusterId: number, + payload: GroupParentCandidateExclusionRequest, +): Promise<{ groupKey: string; subClusterId: number; action: string; item: ParentCandidateExclusion | null }> { + const res = await fetch( + `${API_BASE}/vessel-analysis/groups/${encodeURIComponent(groupKey)}/parent-inference/${subClusterId}/candidate-exclusions`, + { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + }, + body: JSON.stringify(payload), + }, + ); + if (!res.ok) { + return parseWorkflowError(res, `group candidate exclusion failed: ${res.status}`); + } + return res.json(); +} + +export async function createGlobalCandidateExclusion( + payload: GlobalParentCandidateExclusionRequest, +): Promise<{ action: string; item: ParentCandidateExclusion | null }> { + const res = await fetch(`${API_BASE}/vessel-analysis/parent-inference/candidate-exclusions/global`, { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + }, + body: JSON.stringify(payload), + }); + if (!res.ok) { + return parseWorkflowError(res, `global candidate exclusion failed: ${res.status}`); + } + return res.json(); +} + +export async function releaseCandidateExclusion( + exclusionId: number, + payload: ParentWorkflowActionRequest, +): Promise<{ action: string; item: ParentCandidateExclusion | null }> { + const res = await fetch(`${API_BASE}/vessel-analysis/parent-inference/candidate-exclusions/${exclusionId}/release`, { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + }, + body: JSON.stringify(payload), + }); + if (!res.ok) { + return parseWorkflowError(res, `candidate exclusion release failed: ${res.status}`); + } + return res.json(); +} + +export async function fetchParentCandidateExclusions(params: { + scopeType?: 'GROUP' | 'GLOBAL'; + groupKey?: string; + subClusterId?: number; + candidateMmsi?: string; + activeOnly?: boolean; + limit?: number; +} = {}): Promise { + const search = new URLSearchParams(); + if (params.scopeType) search.set('scopeType', params.scopeType); + if (params.groupKey) search.set('groupKey', params.groupKey); + if (params.subClusterId != null) search.set('subClusterId', String(params.subClusterId)); + if (params.candidateMmsi) search.set('candidateMmsi', params.candidateMmsi); + if (params.activeOnly != null) search.set('activeOnly', String(params.activeOnly)); + if (params.limit != null) search.set('limit', String(params.limit)); + const res = await fetch(`${API_BASE}/vessel-analysis/parent-inference/candidate-exclusions?${search.toString()}`, { + headers: { accept: 'application/json' }, + }); + if (!res.ok) return { count: 0, items: [] }; + return res.json(); +} + +export async function fetchParentLabelSessions(params: { + groupKey?: string; + subClusterId?: number; + status?: 'ACTIVE' | 'EXPIRED' | 'CANCELLED'; + activeOnly?: boolean; + limit?: number; +} = {}): Promise { + const search = new URLSearchParams(); + if (params.groupKey) search.set('groupKey', params.groupKey); + if (params.subClusterId != null) search.set('subClusterId', String(params.subClusterId)); + if (params.status) search.set('status', params.status); + if (params.activeOnly != null) search.set('activeOnly', String(params.activeOnly)); + if (params.limit != null) search.set('limit', String(params.limit)); + const res = await fetch(`${API_BASE}/vessel-analysis/parent-inference/label-sessions?${search.toString()}`, { + headers: { accept: 'application/json' }, + }); + if (!res.ok) return { count: 0, items: [] }; + return res.json(); +} + +export async function cancelParentLabelSession( + labelSessionId: number, + payload: ParentWorkflowActionRequest, +): Promise<{ action: string; item: ParentLabelSession | null }> { + const res = await fetch(`${API_BASE}/vessel-analysis/parent-inference/label-sessions/${labelSessionId}/cancel`, { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + }, + body: JSON.stringify(payload), + }); + if (!res.ok) { + return parseWorkflowError(res, `label session cancel failed: ${res.status}`); + } + return res.json(); +} + +export async function fetchParentLabelTracking( + labelSessionId: number, + limit = 200, +): Promise { + const res = await fetch( + `${API_BASE}/vessel-analysis/parent-inference/label-sessions/${labelSessionId}/tracking?limit=${limit}`, + { headers: { accept: 'application/json' } }, + ); + if (!res.ok) return { labelSessionId, count: 0, items: [] }; + return res.json(); +} + /* ── Correlation Tracks (Prediction API) ──────────────────────── */ export interface CorrelationTrackPoint { diff --git a/prediction/algorithms/gear_correlation.py b/prediction/algorithms/gear_correlation.py index 8de28e6..b5c300f 100644 --- a/prediction/algorithms/gear_correlation.py +++ b/prediction/algorithms/gear_correlation.py @@ -19,6 +19,7 @@ from datetime import datetime, timezone from typing import Optional from algorithms.polygon_builder import _get_time_bucket_age +from config import qualified_table logger = logging.getLogger(__name__) @@ -26,6 +27,9 @@ logger = logging.getLogger(__name__) # ── 상수 ────────────────────────────────────────────────────────── _EARTH_RADIUS_NM = 3440.065 _NM_TO_M = 1852.0 +CORRELATION_PARAM_MODELS = qualified_table('correlation_param_models') +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +GEAR_CORRELATION_RAW_METRICS = qualified_table('gear_correlation_raw_metrics') # ── 파라미터 모델 ───────────────────────────────────────────────── @@ -469,10 +473,11 @@ def _get_vessel_track(vessel_store, mmsi: str, hours: int = 6) -> list[dict]: else recent.get('raw_sog', pd.Series(dtype=float))).fillna(0).values cogs = (recent['cog'] if 'cog' in recent.columns else pd.Series(0, index=recent.index)).fillna(0).values + timestamps = recent['timestamp'].tolist() return [ {'lat': float(lats[i]), 'lon': float(lons[i]), - 'sog': float(sogs[i]), 'cog': float(cogs[i])} + 'sog': float(sogs[i]), 'cog': float(cogs[i]), 'timestamp': timestamps[i]} for i in range(len(lats)) ] @@ -724,7 +729,7 @@ def _load_active_models(conn) -> list[ModelParams]: cur = conn.cursor() try: cur.execute( - "SELECT id, name, params FROM kcg.correlation_param_models " + f"SELECT id, name, params FROM {CORRELATION_PARAM_MODELS} " "WHERE is_active = TRUE ORDER BY is_default DESC, id ASC" ) rows = cur.fetchall() @@ -751,7 +756,7 @@ def _load_all_scores(conn) -> dict[tuple, dict]: "SELECT model_id, group_key, sub_cluster_id, target_mmsi, " "current_score, streak_count, last_observed_at, " "target_type, target_name " - "FROM kcg.gear_correlation_scores" + f"FROM {GEAR_CORRELATION_SCORES}" ) result = {} for row in cur.fetchall(): @@ -780,7 +785,7 @@ def _batch_insert_raw(conn, batch: list[tuple]): from psycopg2.extras import execute_values execute_values( cur, - """INSERT INTO kcg.gear_correlation_raw_metrics + f"""INSERT INTO {GEAR_CORRELATION_RAW_METRICS} (observed_at, group_key, sub_cluster_id, target_mmsi, target_type, target_name, proximity_ratio, visit_score, activity_sync, dtw_similarity, speed_correlation, heading_coherence, @@ -805,7 +810,7 @@ def _batch_upsert_scores(conn, batch: list[tuple]): from psycopg2.extras import execute_values execute_values( cur, - """INSERT INTO kcg.gear_correlation_scores + f"""INSERT INTO {GEAR_CORRELATION_SCORES} (model_id, group_key, sub_cluster_id, target_mmsi, target_type, target_name, current_score, streak_count, freeze_state, first_observed_at, last_observed_at, updated_at) @@ -817,7 +822,7 @@ def _batch_upsert_scores(conn, batch: list[tuple]): current_score = EXCLUDED.current_score, streak_count = EXCLUDED.streak_count, freeze_state = EXCLUDED.freeze_state, - observation_count = kcg.gear_correlation_scores.observation_count + 1, + observation_count = {GEAR_CORRELATION_SCORES}.observation_count + 1, last_observed_at = EXCLUDED.last_observed_at, updated_at = EXCLUDED.updated_at""", batch, diff --git a/prediction/algorithms/gear_name_rules.py b/prediction/algorithms/gear_name_rules.py new file mode 100644 index 0000000..903edf1 --- /dev/null +++ b/prediction/algorithms/gear_name_rules.py @@ -0,0 +1,19 @@ +"""어구 parent name 정규화/필터 규칙.""" + +from __future__ import annotations + +from typing import Optional + +_TRACKABLE_PARENT_MIN_LENGTH = 4 +_REMOVE_TOKENS = (' ', '_', '-', '%') + + +def normalize_parent_name(name: Optional[str]) -> str: + value = (name or '').upper().strip() + for token in _REMOVE_TOKENS: + value = value.replace(token, '') + return value + + +def is_trackable_parent_name(name: Optional[str]) -> bool: + return len(normalize_parent_name(name)) >= _TRACKABLE_PARENT_MIN_LENGTH diff --git a/prediction/algorithms/gear_parent_episode.py b/prediction/algorithms/gear_parent_episode.py new file mode 100644 index 0000000..09463ba --- /dev/null +++ b/prediction/algorithms/gear_parent_episode.py @@ -0,0 +1,631 @@ +"""어구 모선 추론 episode continuity + prior bonus helper.""" + +from __future__ import annotations + +import json +import math +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Iterable, Optional +from uuid import uuid4 + +from config import qualified_table + +GEAR_GROUP_EPISODES = qualified_table('gear_group_episodes') +GEAR_GROUP_EPISODE_SNAPSHOTS = qualified_table('gear_group_episode_snapshots') +GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS = qualified_table('gear_group_parent_candidate_snapshots') +GEAR_PARENT_LABEL_SESSIONS = qualified_table('gear_parent_label_sessions') + +_ACTIVE_EPISODE_WINDOW_HOURS = 6 +_EPISODE_PRIOR_WINDOW_HOURS = 24 +_LINEAGE_PRIOR_WINDOW_DAYS = 7 +_LABEL_PRIOR_WINDOW_DAYS = 30 +_CONTINUITY_SCORE_THRESHOLD = 0.45 +_MERGE_SCORE_THRESHOLD = 0.35 +_CENTER_DISTANCE_THRESHOLD_NM = 12.0 +_EPISODE_PRIOR_MAX = 0.10 +_LINEAGE_PRIOR_MAX = 0.05 +_LABEL_PRIOR_MAX = 0.10 +_TOTAL_PRIOR_CAP = 0.20 + + +def _clamp(value: float, floor: float = 0.0, ceil: float = 1.0) -> float: + return max(floor, min(ceil, value)) + + +def _haversine_nm(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + earth_radius_nm = 3440.065 + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2 + return earth_radius_nm * 2 * math.atan2(math.sqrt(a), math.sqrt(max(0.0, 1 - a))) + + +def _json_list(value: Any) -> list[str]: + if value is None: + return [] + if isinstance(value, list): + return [str(item) for item in value if item] + try: + parsed = json.loads(value) + except Exception: + return [] + if isinstance(parsed, list): + return [str(item) for item in parsed if item] + return [] + + +@dataclass +class GroupEpisodeInput: + group_key: str + normalized_parent_name: str + sub_cluster_id: int + member_mmsis: list[str] + member_count: int + center_lat: float + center_lon: float + + @property + def key(self) -> tuple[str, int]: + return (self.group_key, self.sub_cluster_id) + + +@dataclass +class EpisodeState: + episode_id: str + lineage_key: str + group_key: str + normalized_parent_name: str + current_sub_cluster_id: int + member_mmsis: list[str] + member_count: int + center_lat: float + center_lon: float + last_snapshot_time: datetime + status: str + + +@dataclass +class EpisodeAssignment: + group_key: str + sub_cluster_id: int + normalized_parent_name: str + episode_id: str + continuity_source: str + continuity_score: float + split_from_episode_id: Optional[str] + merged_from_episode_ids: list[str] + member_mmsis: list[str] + member_count: int + center_lat: float + center_lon: float + + @property + def key(self) -> tuple[str, int]: + return (self.group_key, self.sub_cluster_id) + + +@dataclass +class EpisodePlan: + assignments: dict[tuple[str, int], EpisodeAssignment] + expired_episode_ids: set[str] + merged_episode_targets: dict[str, str] + + +def _member_jaccard(left: Iterable[str], right: Iterable[str]) -> tuple[float, int]: + left_set = {item for item in left if item} + right_set = {item for item in right if item} + if not left_set and not right_set: + return 0.0, 0 + overlap = len(left_set & right_set) + union = len(left_set | right_set) + return (overlap / union if union else 0.0), overlap + + +def continuity_score(current: GroupEpisodeInput, previous: EpisodeState) -> tuple[float, int, float]: + jaccard, overlap_count = _member_jaccard(current.member_mmsis, previous.member_mmsis) + distance_nm = _haversine_nm(current.center_lat, current.center_lon, previous.center_lat, previous.center_lon) + center_support = _clamp(1.0 - (distance_nm / _CENTER_DISTANCE_THRESHOLD_NM)) + score = _clamp((0.75 * jaccard) + (0.25 * center_support)) + return round(score, 6), overlap_count, round(distance_nm, 3) + + +def load_active_episode_states(conn, lineage_keys: list[str]) -> dict[str, list[EpisodeState]]: + if not lineage_keys: + return {} + + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT episode_id, lineage_key, group_key, normalized_parent_name, + current_sub_cluster_id, current_member_mmsis, current_member_count, + ST_Y(current_center_point) AS center_lat, + ST_X(current_center_point) AS center_lon, + last_snapshot_time, status + FROM {GEAR_GROUP_EPISODES} + WHERE lineage_key = ANY(%s) + AND status = 'ACTIVE' + AND last_snapshot_time >= NOW() - (%s * INTERVAL '1 hour') + ORDER BY lineage_key, last_snapshot_time DESC, episode_id ASC + """, + (lineage_keys, _ACTIVE_EPISODE_WINDOW_HOURS), + ) + result: dict[str, list[EpisodeState]] = {} + for row in cur.fetchall(): + state = EpisodeState( + episode_id=row[0], + lineage_key=row[1], + group_key=row[2], + normalized_parent_name=row[3], + current_sub_cluster_id=int(row[4] or 0), + member_mmsis=_json_list(row[5]), + member_count=int(row[6] or 0), + center_lat=float(row[7] or 0.0), + center_lon=float(row[8] or 0.0), + last_snapshot_time=row[9], + status=row[10], + ) + result.setdefault(state.lineage_key, []).append(state) + return result + finally: + cur.close() + + +def group_to_episode_input(group: dict[str, Any], normalized_parent_name: str) -> GroupEpisodeInput: + members = group.get('members') or [] + member_mmsis = sorted({str(member.get('mmsi')) for member in members if member.get('mmsi')}) + member_count = len(member_mmsis) + if members: + center_lat = sum(float(member['lat']) for member in members) / len(members) + center_lon = sum(float(member['lon']) for member in members) / len(members) + else: + center_lat = 0.0 + center_lon = 0.0 + return GroupEpisodeInput( + group_key=group['parent_name'], + normalized_parent_name=normalized_parent_name, + sub_cluster_id=int(group.get('sub_cluster_id', 0)), + member_mmsis=member_mmsis, + member_count=member_count, + center_lat=center_lat, + center_lon=center_lon, + ) + + +def build_episode_plan( + groups: list[GroupEpisodeInput], + previous_by_lineage: dict[str, list[EpisodeState]], +) -> EpisodePlan: + assignments: dict[tuple[str, int], EpisodeAssignment] = {} + expired_episode_ids: set[str] = set() + merged_episode_targets: dict[str, str] = {} + + groups_by_lineage: dict[str, list[GroupEpisodeInput]] = {} + for group in groups: + groups_by_lineage.setdefault(group.normalized_parent_name, []).append(group) + + for lineage_key, current_groups in groups_by_lineage.items(): + previous_groups = previous_by_lineage.get(lineage_key, []) + qualified_matches: dict[tuple[str, int], list[tuple[EpisodeState, float, int, float]]] = {} + prior_to_currents: dict[str, list[tuple[GroupEpisodeInput, float, int, float]]] = {} + + for current in current_groups: + for previous in previous_groups: + score, overlap_count, distance_nm = continuity_score(current, previous) + if score >= _CONTINUITY_SCORE_THRESHOLD or ( + overlap_count > 0 and distance_nm <= _CENTER_DISTANCE_THRESHOLD_NM + ): + qualified_matches.setdefault(current.key, []).append((previous, score, overlap_count, distance_nm)) + prior_to_currents.setdefault(previous.episode_id, []).append((current, score, overlap_count, distance_nm)) + + consumed_previous_ids: set[str] = set() + assigned_current_keys: set[tuple[str, int]] = set() + + for current in current_groups: + matches = sorted( + qualified_matches.get(current.key, []), + key=lambda item: (item[1], item[2], -item[3], item[0].last_snapshot_time), + reverse=True, + ) + merge_candidates = [ + item for item in matches + if item[1] >= _MERGE_SCORE_THRESHOLD + ] + if len(merge_candidates) >= 2: + episode_id = f"ep-{uuid4().hex[:12]}" + merged_ids = [item[0].episode_id for item in merge_candidates] + assignments[current.key] = EpisodeAssignment( + group_key=current.group_key, + sub_cluster_id=current.sub_cluster_id, + normalized_parent_name=current.normalized_parent_name, + episode_id=episode_id, + continuity_source='MERGE_NEW', + continuity_score=round(max(item[1] for item in merge_candidates), 6), + split_from_episode_id=None, + merged_from_episode_ids=merged_ids, + member_mmsis=current.member_mmsis, + member_count=current.member_count, + center_lat=current.center_lat, + center_lon=current.center_lon, + ) + assigned_current_keys.add(current.key) + for merged_id in merged_ids: + consumed_previous_ids.add(merged_id) + merged_episode_targets[merged_id] = episode_id + + previous_ranked = sorted( + previous_groups, + key=lambda item: item.last_snapshot_time, + reverse=True, + ) + for previous in previous_ranked: + if previous.episode_id in consumed_previous_ids: + continue + matches = [ + item for item in prior_to_currents.get(previous.episode_id, []) + if item[0].key not in assigned_current_keys + ] + if not matches: + continue + matches.sort(key=lambda item: (item[1], item[2], -item[3]), reverse=True) + current, score, _, _ = matches[0] + split_candidate_count = len(prior_to_currents.get(previous.episode_id, [])) + assignments[current.key] = EpisodeAssignment( + group_key=current.group_key, + sub_cluster_id=current.sub_cluster_id, + normalized_parent_name=current.normalized_parent_name, + episode_id=previous.episode_id, + continuity_source='SPLIT_CONTINUE' if split_candidate_count > 1 else 'CONTINUED', + continuity_score=score, + split_from_episode_id=None, + merged_from_episode_ids=[], + member_mmsis=current.member_mmsis, + member_count=current.member_count, + center_lat=current.center_lat, + center_lon=current.center_lon, + ) + assigned_current_keys.add(current.key) + consumed_previous_ids.add(previous.episode_id) + + for current in current_groups: + if current.key in assigned_current_keys: + continue + + matches = sorted( + qualified_matches.get(current.key, []), + key=lambda item: (item[1], item[2], -item[3], item[0].last_snapshot_time), + reverse=True, + ) + split_from_episode_id = None + continuity_source = 'NEW' + continuity_score_value = 0.0 + if matches: + best_previous, score, _, _ = matches[0] + split_from_episode_id = best_previous.episode_id + continuity_source = 'SPLIT_NEW' + continuity_score_value = score + + assignments[current.key] = EpisodeAssignment( + group_key=current.group_key, + sub_cluster_id=current.sub_cluster_id, + normalized_parent_name=current.normalized_parent_name, + episode_id=f"ep-{uuid4().hex[:12]}", + continuity_source=continuity_source, + continuity_score=continuity_score_value, + split_from_episode_id=split_from_episode_id, + merged_from_episode_ids=[], + member_mmsis=current.member_mmsis, + member_count=current.member_count, + center_lat=current.center_lat, + center_lon=current.center_lon, + ) + assigned_current_keys.add(current.key) + + current_previous_ids = {assignment.episode_id for assignment in assignments.values() if assignment.normalized_parent_name == lineage_key} + for previous in previous_groups: + if previous.episode_id in merged_episode_targets: + continue + if previous.episode_id not in current_previous_ids: + expired_episode_ids.add(previous.episode_id) + + return EpisodePlan( + assignments=assignments, + expired_episode_ids=expired_episode_ids, + merged_episode_targets=merged_episode_targets, + ) + + +def load_episode_prior_stats(conn, episode_ids: list[str]) -> dict[tuple[str, str], dict[str, Any]]: + if not episode_ids: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT episode_id, candidate_mmsi, + COUNT(*) AS seen_count, + SUM(CASE WHEN rank = 1 THEN 1 ELSE 0 END) AS top1_count, + AVG(final_score) AS avg_score, + MAX(observed_at) AS last_seen_at + FROM {GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS} + WHERE episode_id = ANY(%s) + AND observed_at >= NOW() - (%s * INTERVAL '1 hour') + GROUP BY episode_id, candidate_mmsi + """, + (episode_ids, _EPISODE_PRIOR_WINDOW_HOURS), + ) + result: dict[tuple[str, str], dict[str, Any]] = {} + for episode_id, candidate_mmsi, seen_count, top1_count, avg_score, last_seen_at in cur.fetchall(): + result[(episode_id, candidate_mmsi)] = { + 'seen_count': int(seen_count or 0), + 'top1_count': int(top1_count or 0), + 'avg_score': float(avg_score or 0.0), + 'last_seen_at': last_seen_at, + } + return result + finally: + cur.close() + + +def load_lineage_prior_stats(conn, lineage_keys: list[str]) -> dict[tuple[str, str], dict[str, Any]]: + if not lineage_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT normalized_parent_name, candidate_mmsi, + COUNT(*) AS seen_count, + SUM(CASE WHEN rank = 1 THEN 1 ELSE 0 END) AS top1_count, + SUM(CASE WHEN rank <= 3 THEN 1 ELSE 0 END) AS top3_count, + AVG(final_score) AS avg_score, + MAX(observed_at) AS last_seen_at + FROM {GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS} + WHERE normalized_parent_name = ANY(%s) + AND observed_at >= NOW() - (%s * INTERVAL '1 day') + GROUP BY normalized_parent_name, candidate_mmsi + """, + (lineage_keys, _LINEAGE_PRIOR_WINDOW_DAYS), + ) + result: dict[tuple[str, str], dict[str, Any]] = {} + for lineage_key, candidate_mmsi, seen_count, top1_count, top3_count, avg_score, last_seen_at in cur.fetchall(): + result[(lineage_key, candidate_mmsi)] = { + 'seen_count': int(seen_count or 0), + 'top1_count': int(top1_count or 0), + 'top3_count': int(top3_count or 0), + 'avg_score': float(avg_score or 0.0), + 'last_seen_at': last_seen_at, + } + return result + finally: + cur.close() + + +def load_label_prior_stats(conn, lineage_keys: list[str]) -> dict[tuple[str, str], dict[str, Any]]: + if not lineage_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT normalized_parent_name, label_parent_mmsi, + COUNT(*) AS session_count, + MAX(active_from) AS last_labeled_at + FROM {GEAR_PARENT_LABEL_SESSIONS} + WHERE normalized_parent_name = ANY(%s) + AND active_from >= NOW() - (%s * INTERVAL '1 day') + GROUP BY normalized_parent_name, label_parent_mmsi + """, + (lineage_keys, _LABEL_PRIOR_WINDOW_DAYS), + ) + result: dict[tuple[str, str], dict[str, Any]] = {} + for lineage_key, candidate_mmsi, session_count, last_labeled_at in cur.fetchall(): + result[(lineage_key, candidate_mmsi)] = { + 'session_count': int(session_count or 0), + 'last_labeled_at': last_labeled_at, + } + return result + finally: + cur.close() + + +def _recency_support(observed_at: Optional[datetime], now: datetime, hours: float) -> float: + if observed_at is None: + return 0.0 + if observed_at.tzinfo is None: + observed_at = observed_at.replace(tzinfo=timezone.utc) + delta_hours = max(0.0, (now - observed_at.astimezone(timezone.utc)).total_seconds() / 3600.0) + return _clamp(1.0 - (delta_hours / hours)) + + +def compute_prior_bonus_components( + observed_at: datetime, + normalized_parent_name: str, + episode_id: str, + candidate_mmsi: str, + episode_prior_stats: dict[tuple[str, str], dict[str, Any]], + lineage_prior_stats: dict[tuple[str, str], dict[str, Any]], + label_prior_stats: dict[tuple[str, str], dict[str, Any]], +) -> dict[str, float]: + episode_stats = episode_prior_stats.get((episode_id, candidate_mmsi), {}) + lineage_stats = lineage_prior_stats.get((normalized_parent_name, candidate_mmsi), {}) + label_stats = label_prior_stats.get((normalized_parent_name, candidate_mmsi), {}) + + episode_bonus = 0.0 + if episode_stats: + episode_bonus = _EPISODE_PRIOR_MAX * ( + 0.35 * min(1.0, episode_stats.get('seen_count', 0) / 6.0) + + 0.35 * min(1.0, episode_stats.get('top1_count', 0) / 3.0) + + 0.15 * _clamp(float(episode_stats.get('avg_score', 0.0))) + + 0.15 * _recency_support(episode_stats.get('last_seen_at'), observed_at, _EPISODE_PRIOR_WINDOW_HOURS) + ) + + lineage_bonus = 0.0 + if lineage_stats: + lineage_bonus = _LINEAGE_PRIOR_MAX * ( + 0.30 * min(1.0, lineage_stats.get('seen_count', 0) / 12.0) + + 0.25 * min(1.0, lineage_stats.get('top3_count', 0) / 6.0) + + 0.20 * min(1.0, lineage_stats.get('top1_count', 0) / 3.0) + + 0.15 * _clamp(float(lineage_stats.get('avg_score', 0.0))) + + 0.10 * _recency_support(lineage_stats.get('last_seen_at'), observed_at, _LINEAGE_PRIOR_WINDOW_DAYS * 24.0) + ) + + label_bonus = 0.0 + if label_stats: + label_bonus = _LABEL_PRIOR_MAX * ( + 0.70 * min(1.0, label_stats.get('session_count', 0) / 3.0) + + 0.30 * _recency_support(label_stats.get('last_labeled_at'), observed_at, _LABEL_PRIOR_WINDOW_DAYS * 24.0) + ) + + total = min(_TOTAL_PRIOR_CAP, episode_bonus + lineage_bonus + label_bonus) + return { + 'episodePriorBonus': round(episode_bonus, 6), + 'lineagePriorBonus': round(lineage_bonus, 6), + 'labelPriorBonus': round(label_bonus, 6), + 'priorBonusTotal': round(total, 6), + } + + +def sync_episode_states(conn, observed_at: datetime, plan: EpisodePlan) -> None: + cur = conn.cursor() + try: + if plan.expired_episode_ids: + cur.execute( + f""" + UPDATE {GEAR_GROUP_EPISODES} + SET status = 'EXPIRED', + updated_at = %s + WHERE episode_id = ANY(%s) + """, + (observed_at, list(plan.expired_episode_ids)), + ) + + for previous_episode_id, merged_into_episode_id in plan.merged_episode_targets.items(): + cur.execute( + f""" + UPDATE {GEAR_GROUP_EPISODES} + SET status = 'MERGED', + merged_into_episode_id = %s, + updated_at = %s + WHERE episode_id = %s + """, + (merged_into_episode_id, observed_at, previous_episode_id), + ) + + for assignment in plan.assignments.values(): + cur.execute( + f""" + INSERT INTO {GEAR_GROUP_EPISODES} ( + episode_id, lineage_key, group_key, normalized_parent_name, + current_sub_cluster_id, status, continuity_source, continuity_score, + first_seen_at, last_seen_at, last_snapshot_time, + current_member_count, current_member_mmsis, current_center_point, + split_from_episode_id, merged_from_episode_ids, metadata, updated_at + ) VALUES ( + %s, %s, %s, %s, + %s, 'ACTIVE', %s, %s, + %s, %s, %s, + %s, %s::jsonb, ST_SetSRID(ST_MakePoint(%s, %s), 4326), + %s, %s::jsonb, '{{}}'::jsonb, %s + ) + ON CONFLICT (episode_id) + DO UPDATE SET + group_key = EXCLUDED.group_key, + normalized_parent_name = EXCLUDED.normalized_parent_name, + current_sub_cluster_id = EXCLUDED.current_sub_cluster_id, + status = 'ACTIVE', + continuity_source = EXCLUDED.continuity_source, + continuity_score = EXCLUDED.continuity_score, + last_seen_at = EXCLUDED.last_seen_at, + last_snapshot_time = EXCLUDED.last_snapshot_time, + current_member_count = EXCLUDED.current_member_count, + current_member_mmsis = EXCLUDED.current_member_mmsis, + current_center_point = EXCLUDED.current_center_point, + split_from_episode_id = COALESCE(EXCLUDED.split_from_episode_id, {GEAR_GROUP_EPISODES}.split_from_episode_id), + merged_from_episode_ids = EXCLUDED.merged_from_episode_ids, + updated_at = EXCLUDED.updated_at + """, + ( + assignment.episode_id, + assignment.normalized_parent_name, + assignment.group_key, + assignment.normalized_parent_name, + assignment.sub_cluster_id, + assignment.continuity_source, + assignment.continuity_score, + observed_at, + observed_at, + observed_at, + assignment.member_count, + json.dumps(assignment.member_mmsis, ensure_ascii=False), + assignment.center_lon, + assignment.center_lat, + assignment.split_from_episode_id, + json.dumps(assignment.merged_from_episode_ids, ensure_ascii=False), + observed_at, + ), + ) + finally: + cur.close() + + +def insert_episode_snapshots( + conn, + observed_at: datetime, + plan: EpisodePlan, + snapshot_payloads: dict[tuple[str, int], dict[str, Any]], +) -> int: + if not snapshot_payloads: + return 0 + rows: list[tuple[Any, ...]] = [] + for key, payload in snapshot_payloads.items(): + assignment = plan.assignments.get(key) + if assignment is None: + continue + rows.append(( + assignment.episode_id, + assignment.normalized_parent_name, + assignment.group_key, + assignment.normalized_parent_name, + assignment.sub_cluster_id, + observed_at, + assignment.member_count, + json.dumps(assignment.member_mmsis, ensure_ascii=False), + assignment.center_lon, + assignment.center_lat, + assignment.continuity_source, + assignment.continuity_score, + json.dumps(payload.get('parentEpisodeIds') or assignment.merged_from_episode_ids, ensure_ascii=False), + payload.get('topCandidateMmsi'), + payload.get('topCandidateScore'), + payload.get('resolutionStatus'), + json.dumps(payload.get('metadata') or {}, ensure_ascii=False), + )) + + if not rows: + return 0 + + cur = conn.cursor() + try: + from psycopg2.extras import execute_values + execute_values( + cur, + f""" + INSERT INTO {GEAR_GROUP_EPISODE_SNAPSHOTS} ( + episode_id, lineage_key, group_key, normalized_parent_name, sub_cluster_id, + observed_at, member_count, member_mmsis, center_point, + continuity_source, continuity_score, parent_episode_ids, + top_candidate_mmsi, top_candidate_score, resolution_status, metadata + ) VALUES %s + ON CONFLICT (episode_id, observed_at) DO NOTHING + """, + rows, + template="(%s, %s, %s, %s, %s, %s, %s, %s::jsonb, ST_SetSRID(ST_MakePoint(%s, %s), 4326), %s, %s, %s::jsonb, %s, %s, %s, %s::jsonb)", + page_size=200, + ) + return len(rows) + finally: + cur.close() diff --git a/prediction/algorithms/gear_parent_inference.py b/prediction/algorithms/gear_parent_inference.py new file mode 100644 index 0000000..e7acf92 --- /dev/null +++ b/prediction/algorithms/gear_parent_inference.py @@ -0,0 +1,1428 @@ +"""어구 그룹 대표 모선 추론.""" + +from __future__ import annotations + +import json +import logging +import math +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Any, Optional + +from algorithms.gear_correlation import _get_vessel_track +from algorithms.gear_parent_episode import ( + build_episode_plan, + compute_prior_bonus_components, + group_to_episode_input, + insert_episode_snapshots, + load_active_episode_states, + load_episode_prior_stats, + load_label_prior_stats, + load_lineage_prior_stats, + sync_episode_states, +) +from algorithms.gear_name_rules import is_trackable_parent_name, normalize_parent_name +from algorithms.track_similarity import compute_track_similarity +from config import qualified_table + +logger = logging.getLogger(__name__) + +FLEET_VESSELS = qualified_table('fleet_vessels') +GROUP_POLYGON_SNAPSHOTS = qualified_table('group_polygon_snapshots') +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +GEAR_CORRELATION_RAW_METRICS = qualified_table('gear_correlation_raw_metrics') +CORRELATION_PARAM_MODELS = qualified_table('correlation_param_models') +GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS = qualified_table('gear_group_parent_candidate_snapshots') +GEAR_GROUP_PARENT_RESOLUTION = qualified_table('gear_group_parent_resolution') +GEAR_PARENT_CANDIDATE_EXCLUSIONS = qualified_table('gear_parent_candidate_exclusions') +GEAR_PARENT_LABEL_SESSIONS = qualified_table('gear_parent_label_sessions') +GEAR_PARENT_LABEL_TRACKING_CYCLES = qualified_table('gear_parent_label_tracking_cycles') + +_SHORT_NAME_STATUS = 'SKIPPED_SHORT_NAME' +_NO_CANDIDATE_STATUS = 'NO_CANDIDATE' +_MANUAL_CONFIRMED_STATUS = 'MANUAL_CONFIRMED' +_AUTO_PROMOTED_STATUS = 'AUTO_PROMOTED' +_REVIEW_REQUIRED_STATUS = 'REVIEW_REQUIRED' +_UNRESOLVED_STATUS = 'UNRESOLVED' +_DIRECT_PARENT_MATCH_STATUS = 'DIRECT_PARENT_MATCH' +_REJECT_COOLDOWN_HOURS = 24 +_MAX_CORRELATION_CANDIDATES = 5 +_MIN_AUTO_PROMOTION_STABLE_CYCLES = 3 +_MIN_AUTO_PROMOTION_SCORE = 0.72 +_MIN_AUTO_PROMOTION_MARGIN = 0.15 +_MIN_REVIEW_REQUIRED_SCORE = 0.60 +_MIN_PREFIX_BONUS_SCORE = 0.30 +_CHINA_MMSI_PREFIX_BONUS = 0.15 +_CHINA_MMSI_PREFIXES = ('412', '413') +_TRACK_SUPPORT_POINT_TARGET = 12 +_TRACK_SUPPORT_SPAN_TARGET_MINUTES = 90.0 +_VISIT_SUPPORT_POINT_TARGET = 8 +_VISIT_SUPPORT_SPAN_TARGET_MINUTES = 60.0 +_ACTIVITY_SUPPORT_POINT_TARGET = 12 +_ACTIVITY_SUPPORT_SPAN_TARGET_MINUTES = 90.0 +_VISIT_ZONE_THRESHOLD_NM = 5.0 +_RAW_SCORE_WINDOW_HOURS = 6 + + +@dataclass +class RegistryVessel: + vessel_id: int + mmsi: str + name_cn: str + name_en: str + + +@dataclass +class CandidateScore: + mmsi: str + name: str + vessel_id: Optional[int] + target_type: str + candidate_source: str + base_corr_score: float + name_match_score: float + track_similarity_score: float + visit_score_6h: float + proximity_score_6h: float + activity_sync_score_6h: float + stability_score: float + registry_bonus: float + episode_prior_bonus: float + lineage_prior_bonus: float + label_prior_bonus: float + final_score: float + streak_count: int + model_id: int + model_name: str + evidence: dict[str, Any] + + +def _clamp(value: float, floor: float = 0.0, ceil: float = 1.0) -> float: + return max(floor, min(ceil, value)) + + +def _china_mmsi_prefix_bonus(mmsi: str, pre_bonus_score: float) -> float: + if pre_bonus_score < _MIN_PREFIX_BONUS_SCORE: + return 0.0 + if any((mmsi or '').startswith(prefix) for prefix in _CHINA_MMSI_PREFIXES): + return _CHINA_MMSI_PREFIX_BONUS + return 0.0 + + +def _apply_final_score_bonus(mmsi: str, weighted_score: float) -> tuple[float, float, float]: + pre_bonus_score = _clamp(weighted_score) + china_mmsi_bonus = _china_mmsi_prefix_bonus(mmsi, pre_bonus_score) + final_score = _clamp(weighted_score + china_mmsi_bonus) + return pre_bonus_score, china_mmsi_bonus, final_score + + +def _to_aware_utc(value: Any) -> Optional[datetime]: + if value is None: + return None + if isinstance(value, datetime): + if value.tzinfo is None: + return value.replace(tzinfo=timezone.utc) + return value.astimezone(timezone.utc) + try: + parsed = datetime.fromisoformat(str(value)) + except Exception: + return None + if parsed.tzinfo is None: + return parsed.replace(tzinfo=timezone.utc) + return parsed.astimezone(timezone.utc) + + +def _span_minutes(timestamps: list[datetime]) -> float: + if len(timestamps) < 2: + return 0.0 + return max(0.0, (timestamps[-1] - timestamps[0]).total_seconds() / 60.0) + + +def _support_factor(point_count: int, span_minutes: float, point_target: int, span_target_minutes: float) -> float: + if point_count <= 0 or span_minutes <= 0: + return 0.0 + point_support = min(1.0, point_count / max(point_target, 1)) + span_support = min(1.0, span_minutes / max(span_target_minutes, 1.0)) + return _clamp(math.sqrt(point_support * span_support)) + + +def _haversine_nm(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + earth_radius_nm = 3440.065 + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlam = math.radians(lon2 - lon1) + a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlam / 2) ** 2 + return earth_radius_nm * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)) + + +def _build_track_coverage_metrics( + center_track: list[dict[str, Any]], + vessel_track: list[dict[str, Any]], + gear_center_lat: float, + gear_center_lon: float, +) -> dict[str, float | int]: + vessel_timestamps = sorted( + ts for ts in (_to_aware_utc(point.get('timestamp')) for point in vessel_track) + if ts is not None + ) + center_timestamps = sorted( + ts for ts in (_to_aware_utc(point.get('timestamp')) for point in center_track) + if ts is not None + ) + + track_point_count = len(vessel_track) + track_span_minutes = _span_minutes(vessel_timestamps) + center_point_count = len(center_track) + center_span_minutes = _span_minutes(center_timestamps) + + overlap_points: list[dict[str, Any]] = vessel_track + if vessel_timestamps and center_timestamps: + overlap_start = center_timestamps[0] + overlap_end = center_timestamps[-1] + overlap_points = [ + point for point in vessel_track + if (ts := _to_aware_utc(point.get('timestamp'))) is not None and overlap_start <= ts <= overlap_end + ] + overlap_timestamps = sorted( + ts for ts in (_to_aware_utc(point.get('timestamp')) for point in overlap_points) + if ts is not None + ) + overlap_point_count = len(overlap_points) + overlap_span_minutes = _span_minutes(overlap_timestamps) + + in_zone_points = [ + point for point in overlap_points + if _haversine_nm(gear_center_lat, gear_center_lon, float(point['lat']), float(point['lon'])) < _VISIT_ZONE_THRESHOLD_NM + ] + in_zone_timestamps = sorted( + ts for ts in (_to_aware_utc(point.get('timestamp')) for point in in_zone_points) + if ts is not None + ) + in_zone_point_count = len(in_zone_points) + in_zone_span_minutes = _span_minutes(in_zone_timestamps) + + track_coverage_factor = _support_factor( + track_point_count, + track_span_minutes, + _TRACK_SUPPORT_POINT_TARGET, + _TRACK_SUPPORT_SPAN_TARGET_MINUTES, + ) + visit_coverage_factor = _support_factor( + in_zone_point_count, + in_zone_span_minutes, + _VISIT_SUPPORT_POINT_TARGET, + _VISIT_SUPPORT_SPAN_TARGET_MINUTES, + ) + activity_coverage_factor = _support_factor( + in_zone_point_count, + in_zone_span_minutes, + _ACTIVITY_SUPPORT_POINT_TARGET, + _ACTIVITY_SUPPORT_SPAN_TARGET_MINUTES, + ) + coverage_factor = round( + (track_coverage_factor + visit_coverage_factor + activity_coverage_factor) / 3.0, + 4, + ) + + return { + 'trackPointCount': track_point_count, + 'trackSpanMinutes': round(track_span_minutes, 1), + 'centerPointCount': center_point_count, + 'centerSpanMinutes': round(center_span_minutes, 1), + 'overlapPointCount': overlap_point_count, + 'overlapSpanMinutes': round(overlap_span_minutes, 1), + 'inZonePointCount': in_zone_point_count, + 'inZoneSpanMinutes': round(in_zone_span_minutes, 1), + 'trackCoverageFactor': round(track_coverage_factor, 4), + 'visitCoverageFactor': round(visit_coverage_factor, 4), + 'activityCoverageFactor': round(activity_coverage_factor, 4), + 'coverageFactor': coverage_factor, + 'scoreWindowHours': _RAW_SCORE_WINDOW_HOURS, + } + + +def _candidate_sources(candidate: Optional[CandidateScore]) -> set[str]: + if candidate is None: + return set() + raw = candidate.evidence.get('sources') + if isinstance(raw, list): + return {str(item) for item in raw if item} + return set() + + +def _top_candidate_stable_cycles(existing: Optional[dict[str, Any]], top_candidate: Optional[CandidateScore]) -> int: + if top_candidate is None: + return 0 + previous_mmsi = None + previous_cycles = 0 + if existing is not None: + previous_summary = existing.get('evidence_summary') or {} + previous_mmsi = previous_summary.get('topCandidateMmsi') + previous_cycles = int(existing.get('stable_cycles') or 0) + if previous_mmsi == top_candidate.mmsi: + return max(previous_cycles + 1, 1) + return 1 + + +def _status_reason(status: str) -> Optional[str]: + if status == _SHORT_NAME_STATUS: + return '정규화 이름 길이 4 미만' + if status == _NO_CANDIDATE_STATUS: + return '후보를 생성하지 못함' + if status == _DIRECT_PARENT_MATCH_STATUS: + return '그룹 멤버에 직접 모선이 포함됨' + return None + + +def _select_status( + top_candidate: Optional[CandidateScore], + margin: float, + stable_cycles: int, +) -> tuple[str, str]: + if top_candidate is None: + return _NO_CANDIDATE_STATUS, 'AUTO_NO_CANDIDATE' + + has_correlation = 'CORRELATION' in _candidate_sources(top_candidate) + if ( + top_candidate.target_type == 'VESSEL' + and has_correlation + and top_candidate.final_score >= _MIN_AUTO_PROMOTION_SCORE + and margin >= _MIN_AUTO_PROMOTION_MARGIN + and stable_cycles >= _MIN_AUTO_PROMOTION_STABLE_CYCLES + ): + return _AUTO_PROMOTED_STATUS, 'AUTO_PROMOTION' + + if top_candidate.final_score >= _MIN_REVIEW_REQUIRED_SCORE: + return _REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW' + + return _UNRESOLVED_STATUS, 'AUTO_SCORE' + + +def _load_default_model(conn) -> tuple[int, str]: + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT id, name + FROM {CORRELATION_PARAM_MODELS} + WHERE is_active = TRUE + ORDER BY is_default DESC, id ASC + LIMIT 1 + """ + ) + row = cur.fetchone() + if row is None: + return 1, 'default' + return int(row[0]), row[1] or 'default' + finally: + cur.close() + + +def _load_registry(conn) -> tuple[dict[str, RegistryVessel], dict[str, list[RegistryVessel]]]: + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT id, COALESCE(mmsi, ''), COALESCE(name_cn, ''), COALESCE(name_en, '') + FROM {FLEET_VESSELS} + """ + ) + by_mmsi: dict[str, RegistryVessel] = {} + by_normalized_name: dict[str, list[RegistryVessel]] = {} + for vessel_id, mmsi, name_cn, name_en in cur.fetchall(): + vessel = RegistryVessel( + vessel_id=int(vessel_id), + mmsi=mmsi or '', + name_cn=name_cn or '', + name_en=name_en or '', + ) + if vessel.mmsi: + by_mmsi[vessel.mmsi] = vessel + for raw_name in (vessel.name_cn, vessel.name_en): + normalized = normalize_parent_name(raw_name) + if normalized: + by_normalized_name.setdefault(normalized, []).append(vessel) + return by_mmsi, by_normalized_name + finally: + cur.close() + + +def _json_to_dict(value: Any) -> dict[str, Any]: + if value is None: + return {} + if isinstance(value, dict): + return value + try: + return json.loads(value) + except Exception: + return {} + + +def _load_existing_resolution(conn, group_keys: list[str]) -> dict[tuple[str, int], dict[str, Any]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT group_key, sub_cluster_id, parent_name, normalized_parent_name, + status, selected_parent_mmsi, selected_parent_name, selected_vessel_id, + confidence, decision_source, top_score, second_score, score_margin, + stable_cycles, approved_by, approved_at, manual_comment, + rejected_candidate_mmsi, rejected_at, evidence_summary, + episode_id, continuity_source, continuity_score, prior_bonus_total + FROM {GEAR_GROUP_PARENT_RESOLUTION} + WHERE group_key = ANY(%s) + """, + (group_keys,), + ) + result: dict[tuple[str, int], dict[str, Any]] = {} + for row in cur.fetchall(): + key = (row[0], int(row[1])) + result[key] = { + 'parent_name': row[2], + 'normalized_parent_name': row[3], + 'status': row[4], + 'selected_parent_mmsi': row[5], + 'selected_parent_name': row[6], + 'selected_vessel_id': row[7], + 'confidence': row[8], + 'decision_source': row[9], + 'top_score': row[10] or 0.0, + 'second_score': row[11] or 0.0, + 'score_margin': row[12] or 0.0, + 'stable_cycles': row[13] or 0, + 'approved_by': row[14], + 'approved_at': row[15], + 'manual_comment': row[16], + 'rejected_candidate_mmsi': row[17], + 'rejected_at': row[18], + 'evidence_summary': _json_to_dict(row[19]), + 'episode_id': row[20], + 'continuity_source': row[21], + 'continuity_score': row[22] or 0.0, + 'prior_bonus_total': row[23] or 0.0, + } + return result + finally: + cur.close() + + +def _expire_label_sessions(conn) -> None: + cur = conn.cursor() + try: + cur.execute( + f""" + UPDATE {GEAR_PARENT_LABEL_SESSIONS} + SET status = 'EXPIRED', + updated_at = NOW() + WHERE status = 'ACTIVE' + AND active_until <= NOW() + """ + ) + finally: + cur.close() + + +def _load_active_candidate_exclusions(conn, group_keys: list[str]) -> dict[str, Any]: + result: dict[str, Any] = { + 'global': set(), + 'group': {}, + } + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT scope_type, group_key, sub_cluster_id, candidate_mmsi + FROM {GEAR_PARENT_CANDIDATE_EXCLUSIONS} + WHERE released_at IS NULL + AND active_from <= NOW() + AND (active_until IS NULL OR active_until > NOW()) + AND (scope_type = 'GLOBAL' OR group_key = ANY(%s)) + ORDER BY active_from DESC, id DESC + """, + (group_keys or [''],), + ) + for scope_type, group_key, sub_cluster_id, candidate_mmsi in cur.fetchall(): + if scope_type == 'GLOBAL': + result['global'].add(candidate_mmsi) + continue + key = (group_key, int(sub_cluster_id)) + result['group'].setdefault(key, set()).add(candidate_mmsi) + return result + finally: + cur.close() + + +def _load_active_label_sessions(conn, group_keys: list[str]) -> dict[tuple[str, int], dict[str, Any]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT DISTINCT ON (group_key, sub_cluster_id) + id, group_key, sub_cluster_id, + label_parent_mmsi, label_parent_name, label_parent_vessel_id, + duration_days, active_from, active_until, actor, comment, metadata + FROM {GEAR_PARENT_LABEL_SESSIONS} + WHERE status = 'ACTIVE' + AND active_from <= NOW() + AND active_until > NOW() + AND group_key = ANY(%s) + ORDER BY group_key, sub_cluster_id, active_from DESC, id DESC + """, + (group_keys,), + ) + result: dict[tuple[str, int], dict[str, Any]] = {} + for row in cur.fetchall(): + result[(row[1], int(row[2]))] = { + 'id': int(row[0]), + 'group_key': row[1], + 'sub_cluster_id': int(row[2]), + 'label_parent_mmsi': row[3], + 'label_parent_name': row[4], + 'label_parent_vessel_id': row[5], + 'duration_days': int(row[6]), + 'active_from': row[7], + 'active_until': row[8], + 'actor': row[9], + 'comment': row[10], + 'metadata': _json_to_dict(row[11]), + } + return result + finally: + cur.close() + + +def _load_correlation_scores( + conn, + default_model_id: int, + group_keys: list[str], +) -> dict[tuple[str, int], list[dict[str, Any]]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT group_key, sub_cluster_id, target_mmsi, target_type, COALESCE(target_name, ''), + current_score, streak_count + FROM {GEAR_CORRELATION_SCORES} + WHERE model_id = %s + AND group_key = ANY(%s) + AND target_type = 'VESSEL' + ORDER BY group_key, sub_cluster_id, current_score DESC, last_observed_at DESC + """, + (default_model_id, group_keys), + ) + result: dict[tuple[str, int], list[dict[str, Any]]] = {} + for row in cur.fetchall(): + key = (row[0], int(row[1])) + result.setdefault(key, []).append({ + 'target_mmsi': row[2], + 'target_type': row[3], + 'target_name': row[4] or '', + 'current_score': float(row[5] or 0.0), + 'streak_count': int(row[6] or 0), + }) + return result + finally: + cur.close() + + +def _load_raw_metric_averages(conn, group_keys: list[str]) -> dict[tuple[str, int, str], dict[str, float]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT group_key, + sub_cluster_id, + target_mmsi, + AVG(COALESCE(visit_score, 0)) AS avg_visit, + AVG(COALESCE(proximity_ratio, 0)) AS avg_proximity, + AVG(COALESCE(activity_sync, 0)) AS avg_activity + FROM {GEAR_CORRELATION_RAW_METRICS} + WHERE group_key = ANY(%s) + AND observed_at > NOW() - INTERVAL '6 hours' + GROUP BY group_key, sub_cluster_id, target_mmsi + """, + (group_keys,), + ) + result: dict[tuple[str, int, str], dict[str, float]] = {} + for row in cur.fetchall(): + result[(row[0], int(row[1]), row[2])] = { + 'visit_score_6h': float(row[3] or 0.0), + 'proximity_score_6h': float(row[4] or 0.0), + 'activity_sync_score_6h': float(row[5] or 0.0), + } + return result + finally: + cur.close() + + +def _load_group_center_tracks(conn, group_keys: list[str]) -> dict[tuple[str, int], list[dict[str, Any]]]: + if not group_keys: + return {} + cur = conn.cursor() + try: + cur.execute( + f""" + SELECT group_key, sub_cluster_id, snapshot_time, ST_Y(center_point) AS lat, ST_X(center_point) AS lon + FROM {GROUP_POLYGON_SNAPSHOTS} + WHERE group_key = ANY(%s) + AND resolution = '1h' + AND center_point IS NOT NULL + AND snapshot_time > NOW() - INTERVAL '6 hours' + ORDER BY group_key, sub_cluster_id, snapshot_time ASC + """, + (group_keys,), + ) + result: dict[tuple[str, int], list[dict[str, Any]]] = {} + for row in cur.fetchall(): + result.setdefault((row[0], int(row[1])), []).append({ + 'timestamp': row[2], + 'lat': float(row[3]), + 'lon': float(row[4]), + }) + return result + finally: + cur.close() + + +def _name_match_score(parent_name: str, candidate_name: str, registry: Optional[RegistryVessel]) -> float: + def score_pair(left: str, right: str) -> float: + raw_left = (left or '').strip().upper() + raw_right = (right or '').strip().upper() + normalized_left = normalize_parent_name(left) + normalized_right = normalize_parent_name(right) + alpha_left = ''.join(ch for ch in normalized_left if ch.isalpha()) + alpha_right = ''.join(ch for ch in normalized_right if ch.isalpha()) + if not normalized_left or not normalized_right: + return 0.0 + if raw_left and raw_left == raw_right: + return 1.0 + if normalized_left == normalized_right: + return 0.8 + if normalized_left.startswith(normalized_right) or normalized_right.startswith(normalized_left): + return 0.5 + if normalized_left in normalized_right or normalized_right in normalized_left: + return 0.5 + if alpha_left and alpha_left == alpha_right: + return 0.3 + return 0.0 + + score = score_pair(parent_name, candidate_name) + if registry is not None: + score = max(score, score_pair(parent_name, registry.name_cn)) + score = max(score, score_pair(parent_name, registry.name_en)) + return score + + +def _candidate_name(candidate_mmsi: str, all_positions: dict[str, dict], registry: Optional[RegistryVessel]) -> str: + position_name = (all_positions.get(candidate_mmsi) or {}).get('name', '') + if position_name: + return position_name + if registry is not None: + return registry.name_cn or registry.name_en or candidate_mmsi + return candidate_mmsi + + +def _direct_parent_member(group: dict[str, Any], all_positions: dict[str, dict]) -> Optional[dict[str, Any]]: + members = group.get('members') or [] + for member in members: + if member.get('isParent') and member.get('mmsi'): + return member + + parent_mmsi = group.get('parent_mmsi') + if not parent_mmsi: + return None + + position = all_positions.get(parent_mmsi) or {} + return { + 'mmsi': parent_mmsi, + 'name': position.get('name') or group.get('parent_name') or parent_mmsi, + } + + +def _direct_parent_stable_cycles(existing: Optional[dict[str, Any]], direct_parent_mmsi: str) -> int: + if existing is None or not direct_parent_mmsi: + return 1 + + previous_mmsi = existing.get('selected_parent_mmsi') + if not previous_mmsi: + previous_summary = existing.get('evidence_summary') or {} + previous_mmsi = previous_summary.get('directParentMmsi') or previous_summary.get('topCandidateMmsi') + previous_cycles = int(existing.get('stable_cycles') or 0) + if previous_mmsi == direct_parent_mmsi: + return max(previous_cycles + 1, 1) + return 1 + + +def _build_candidate_scores( + vessel_store, + observed_at: datetime, + group: dict[str, Any], + episode_assignment, + default_model_id: int, + default_model_name: str, + score_rows: list[dict[str, Any]], + raw_metrics: dict[tuple[str, int, str], dict[str, float]], + center_track: list[dict[str, Any]], + all_positions: dict[str, dict], + registry_by_mmsi: dict[str, RegistryVessel], + registry_by_name: dict[str, list[RegistryVessel]], + existing: Optional[dict[str, Any]], + excluded_candidate_mmsis: set[str], + episode_prior_stats: dict[tuple[str, str], dict[str, Any]], + lineage_prior_stats: dict[tuple[str, str], dict[str, Any]], + label_prior_stats: dict[tuple[str, str], dict[str, Any]], +) -> list[CandidateScore]: + group_key = group['parent_name'] + sub_cluster_id = int(group.get('sub_cluster_id', 0)) + normalized_parent_name = normalize_parent_name(group_key) + members = group.get('members') or [] + if members: + gear_center_lat = sum(float(member['lat']) for member in members) / len(members) + gear_center_lon = sum(float(member['lon']) for member in members) / len(members) + else: + gear_center_lat = 0.0 + gear_center_lon = 0.0 + + candidates: dict[str, dict[str, Any]] = {} + score_lookup = {row['target_mmsi']: row for row in score_rows} + center_track_latlon = [ + (float(point['lat']), float(point['lon'])) + for point in center_track + if point.get('lat') is not None and point.get('lon') is not None + ] + + for row in score_rows[:_MAX_CORRELATION_CANDIDATES]: + candidates.setdefault(row['target_mmsi'], {'sources': set()})['sources'].add('CORRELATION') + + for vessel in registry_by_name.get(normalized_parent_name, []): + if vessel.mmsi: + candidates.setdefault(vessel.mmsi, {'sources': set()})['sources'].add('REGISTRY_NAME') + + if existing is not None and existing.get('episode_id') == episode_assignment.episode_id: + current_candidate = existing.get('selected_parent_mmsi') or existing.get('evidence_summary', {}).get('topCandidateMmsi') + if current_candidate: + candidates.setdefault(current_candidate, {'sources': set()})['sources'].add('PREVIOUS_SELECTION') + + if existing is not None: + rejected_mmsi = existing.get('rejected_candidate_mmsi') + rejected_at = existing.get('rejected_at') + if rejected_mmsi and rejected_at is not None: + cutoff = datetime.now(timezone.utc) - timedelta(hours=_REJECT_COOLDOWN_HOURS) + if rejected_at >= cutoff and rejected_mmsi in candidates: + candidates.pop(rejected_mmsi, None) + + for excluded_mmsi in excluded_candidate_mmsis: + candidates.pop(excluded_mmsi, None) + + scored: list[CandidateScore] = [] + for candidate_mmsi, meta in candidates.items(): + registry = registry_by_mmsi.get(candidate_mmsi) + score_row = score_lookup.get(candidate_mmsi, {}) + raw = raw_metrics.get((group_key, sub_cluster_id, candidate_mmsi), {}) + vessel_track = _get_vessel_track(vessel_store, candidate_mmsi, hours=6) + raw_track_similarity = 0.0 + if center_track_latlon and vessel_track: + raw_track_similarity = compute_track_similarity( + center_track_latlon, + [(point['lat'], point['lon']) for point in vessel_track], + ) + + base_corr_score = float(score_row.get('current_score', 0.0) or 0.0) + streak_count = int(score_row.get('streak_count', 0) or 0) + stability_score = _clamp(streak_count / 6.0) + candidate_name = _candidate_name(candidate_mmsi, all_positions, registry) + name_match_score = _name_match_score(group_key, candidate_name, registry) + registry_bonus = 0.05 if registry is not None else 0.0 + raw_visit_score = float(raw.get('visit_score_6h', 0.0) or 0.0) + raw_proximity_score = float(raw.get('proximity_score_6h', 0.0) or 0.0) + raw_activity_score = float(raw.get('activity_sync_score_6h', 0.0) or 0.0) + coverage_metrics = _build_track_coverage_metrics( + center_track=center_track, + vessel_track=vessel_track, + gear_center_lat=gear_center_lat, + gear_center_lon=gear_center_lon, + ) + track_coverage_factor = float(coverage_metrics['trackCoverageFactor']) + visit_coverage_factor = float(coverage_metrics['visitCoverageFactor']) + activity_coverage_factor = float(coverage_metrics['activityCoverageFactor']) + track_similarity = _clamp(raw_track_similarity * track_coverage_factor) + visit_score = _clamp(raw_visit_score * visit_coverage_factor) + proximity_score = _clamp(raw_proximity_score * track_coverage_factor) + activity_score = _clamp(raw_activity_score * activity_coverage_factor) + + weighted_score = ( + 0.40 * base_corr_score + + 0.15 * name_match_score + + 0.15 * track_similarity + + 0.10 * visit_score + + 0.05 * proximity_score + + 0.05 * activity_score + + 0.10 * stability_score + + registry_bonus + ) + pre_bonus_score, china_mmsi_bonus, final_score = _apply_final_score_bonus( + candidate_mmsi, + weighted_score, + ) + prior_bonus = compute_prior_bonus_components( + observed_at=observed_at, + normalized_parent_name=normalized_parent_name, + episode_id=episode_assignment.episode_id, + candidate_mmsi=candidate_mmsi, + episode_prior_stats=episode_prior_stats, + lineage_prior_stats=lineage_prior_stats, + label_prior_stats=label_prior_stats, + ) + final_score = _clamp(final_score + prior_bonus['priorBonusTotal']) + + evidence = { + 'normalizedParentName': normalized_parent_name, + 'episodeId': episode_assignment.episode_id, + 'continuitySource': episode_assignment.continuity_source, + 'continuityScore': round(float(episode_assignment.continuity_score or 0.0), 6), + 'sources': sorted(meta['sources']), + 'trackAvailable': bool(vessel_track), + 'registryMatched': registry is not None, + 'coverage': coverage_metrics, + 'evidenceConfidence': coverage_metrics['coverageFactor'], + 'scoreBreakdown': { + 'baseCorrScore': round(base_corr_score, 4), + 'nameMatchScore': round(name_match_score, 4), + 'trackSimilarityScore': round(track_similarity, 4), + 'visitScore6h': round(visit_score, 4), + 'proximityScore6h': round(proximity_score, 4), + 'activitySyncScore6h': round(activity_score, 4), + 'stabilityScore': round(stability_score, 4), + 'registryBonus': round(registry_bonus, 4), + 'preBonusScore': round(pre_bonus_score, 4), + 'chinaMmsiBonus': round(china_mmsi_bonus, 4), + 'episodePriorBonus': round(prior_bonus['episodePriorBonus'], 4), + 'lineagePriorBonus': round(prior_bonus['lineagePriorBonus'], 4), + 'labelPriorBonus': round(prior_bonus['labelPriorBonus'], 4), + 'priorBonusTotal': round(prior_bonus['priorBonusTotal'], 4), + }, + 'scoreBreakdownRaw': { + 'trackSimilarityScore': round(raw_track_similarity, 4), + 'visitScore6h': round(raw_visit_score, 4), + 'proximityScore6h': round(raw_proximity_score, 4), + 'activitySyncScore6h': round(raw_activity_score, 4), + }, + 'chinaMmsiBonusApplied': china_mmsi_bonus > 0.0, + } + scored.append(CandidateScore( + mmsi=candidate_mmsi, + name=candidate_name, + vessel_id=registry.vessel_id if registry is not None else None, + target_type='VESSEL', + candidate_source=','.join(sorted(meta['sources'])), + base_corr_score=round(base_corr_score, 6), + name_match_score=round(name_match_score, 6), + track_similarity_score=round(track_similarity, 6), + visit_score_6h=round(visit_score, 6), + proximity_score_6h=round(proximity_score, 6), + activity_sync_score_6h=round(activity_score, 6), + stability_score=round(stability_score, 6), + registry_bonus=round(registry_bonus, 6), + episode_prior_bonus=round(prior_bonus['episodePriorBonus'], 6), + lineage_prior_bonus=round(prior_bonus['lineagePriorBonus'], 6), + label_prior_bonus=round(prior_bonus['labelPriorBonus'], 6), + final_score=round(final_score, 6), + streak_count=streak_count, + model_id=default_model_id, + model_name=default_model_name, + evidence=evidence, + )) + + scored.sort( + key=lambda item: ( + item.final_score, + item.base_corr_score, + item.stability_score, + item.name_match_score, + item.mmsi, + ), + reverse=True, + ) + return scored + + +def _insert_candidate_snapshots(conn, observed_at: datetime, rows: list[tuple]) -> int: + if not rows: + return 0 + cur = conn.cursor() + try: + from psycopg2.extras import execute_values + execute_values( + cur, + f""" + INSERT INTO {GEAR_GROUP_PARENT_CANDIDATE_SNAPSHOTS} ( + observed_at, group_key, sub_cluster_id, parent_name, normalized_parent_name, episode_id, candidate_mmsi, + candidate_name, candidate_vessel_id, rank, candidate_source, + model_id, model_name, base_corr_score, name_match_score, + track_similarity_score, visit_score_6h, proximity_score_6h, + activity_sync_score_6h, stability_score, registry_bonus, + episode_prior_bonus, lineage_prior_bonus, label_prior_bonus, + final_score, margin_from_top, evidence + ) VALUES %s + """, + rows, + page_size=200, + ) + return len(rows) + finally: + cur.close() + + +def _insert_label_tracking_rows(conn, rows: list[tuple]) -> int: + if not rows: + return 0 + cur = conn.cursor() + try: + from psycopg2.extras import execute_values + execute_values( + cur, + f""" + INSERT INTO {GEAR_PARENT_LABEL_TRACKING_CYCLES} ( + label_session_id, observed_at, candidate_snapshot_observed_at, auto_status, + top_candidate_mmsi, top_candidate_name, top_candidate_score, + top_candidate_margin, candidate_count, labeled_candidate_present, + labeled_candidate_rank, labeled_candidate_score, + labeled_candidate_pre_bonus_score, labeled_candidate_margin_from_top, + matched_top1, matched_top3, evidence_summary + ) VALUES %s + ON CONFLICT (label_session_id, observed_at) DO NOTHING + """, + rows, + page_size=200, + ) + return len(rows) + finally: + cur.close() + + +def _upsert_resolution(conn, row: tuple) -> None: + cur = conn.cursor() + try: + cur.execute( + f""" + INSERT INTO {GEAR_GROUP_PARENT_RESOLUTION} ( + group_key, sub_cluster_id, parent_name, normalized_parent_name, + episode_id, continuity_source, continuity_score, prior_bonus_total, + status, selected_parent_mmsi, selected_parent_name, selected_vessel_id, + confidence, decision_source, top_score, second_score, score_margin, + stable_cycles, last_evaluated_at, last_promoted_at, approved_by, + approved_at, manual_comment, rejected_candidate_mmsi, rejected_at, + evidence_summary, updated_at + ) VALUES ( + %s, %s, %s, %s, + %s, %s, %s, %s, + %s, %s, %s, %s, + %s, %s, %s, %s, %s, + %s, %s, %s, %s, + %s, %s, %s, %s, + %s::jsonb, %s + ) + ON CONFLICT (group_key, sub_cluster_id) + DO UPDATE SET + parent_name = EXCLUDED.parent_name, + normalized_parent_name = EXCLUDED.normalized_parent_name, + episode_id = EXCLUDED.episode_id, + continuity_source = EXCLUDED.continuity_source, + continuity_score = EXCLUDED.continuity_score, + prior_bonus_total = EXCLUDED.prior_bonus_total, + status = EXCLUDED.status, + selected_parent_mmsi = EXCLUDED.selected_parent_mmsi, + selected_parent_name = EXCLUDED.selected_parent_name, + selected_vessel_id = EXCLUDED.selected_vessel_id, + confidence = EXCLUDED.confidence, + decision_source = EXCLUDED.decision_source, + top_score = EXCLUDED.top_score, + second_score = EXCLUDED.second_score, + score_margin = EXCLUDED.score_margin, + stable_cycles = EXCLUDED.stable_cycles, + last_evaluated_at = EXCLUDED.last_evaluated_at, + last_promoted_at = EXCLUDED.last_promoted_at, + approved_by = EXCLUDED.approved_by, + approved_at = EXCLUDED.approved_at, + manual_comment = EXCLUDED.manual_comment, + rejected_candidate_mmsi = EXCLUDED.rejected_candidate_mmsi, + rejected_at = EXCLUDED.rejected_at, + evidence_summary = EXCLUDED.evidence_summary, + updated_at = EXCLUDED.updated_at + """, + row, + ) + finally: + cur.close() + + +def _label_tracking_row( + observed_at: datetime, + label_session: dict[str, Any], + auto_status: str, + top_candidate: Optional[CandidateScore], + margin: float, + candidates: list[CandidateScore], +) -> tuple: + labeled_candidate = next( + (candidate for candidate in candidates if candidate.mmsi == label_session['label_parent_mmsi']), + None, + ) + labeled_rank = None + labeled_pre_bonus_score = None + labeled_margin_from_top = None + if labeled_candidate is not None: + for index, candidate in enumerate(candidates, start=1): + if candidate.mmsi == labeled_candidate.mmsi: + labeled_rank = index + break + labeled_pre_bonus_score = ( + labeled_candidate.evidence.get('scoreBreakdown', {}).get('preBonusScore') + if isinstance(labeled_candidate.evidence.get('scoreBreakdown'), dict) + else None + ) + labeled_margin_from_top = round( + (top_candidate.final_score - labeled_candidate.final_score) if top_candidate else 0.0, + 6, + ) + + evidence_summary = { + 'labelParentMmsi': label_session['label_parent_mmsi'], + 'labelParentName': label_session.get('label_parent_name'), + 'topCandidateSources': sorted(_candidate_sources(top_candidate)), + 'candidateMmsis': [candidate.mmsi for candidate in candidates[:5]], + } + + return ( + label_session['id'], + observed_at, + observed_at, + auto_status, + top_candidate.mmsi if top_candidate else None, + top_candidate.name if top_candidate else None, + top_candidate.final_score if top_candidate else None, + margin if top_candidate else 0.0, + len(candidates), + labeled_candidate is not None, + labeled_rank, + labeled_candidate.final_score if labeled_candidate else None, + labeled_pre_bonus_score, + labeled_margin_from_top, + top_candidate is not None and label_session['label_parent_mmsi'] == top_candidate.mmsi, + labeled_rank is not None and labeled_rank <= 3, + json.dumps(evidence_summary, ensure_ascii=False), + ) + + +def run_gear_parent_inference(vessel_store, gear_groups: list[dict], conn) -> dict[str, int]: + """미해결 어구 그룹에 대한 대표 모선 추론 실행.""" + observed_at = datetime.now(timezone.utc) + active_groups = [group for group in gear_groups if group.get('parent_name')] + if not active_groups: + return {'groups': 0, 'candidates': 0, 'promoted': 0, 'review_required': 0, 'skipped': 0, 'no_candidate': 0, 'direct_matched': 0, 'episode_snapshots': 0} + + group_keys = sorted({group['parent_name'] for group in active_groups}) + episode_inputs = [ + group_to_episode_input(group, normalize_parent_name(group['parent_name'])) + for group in active_groups + ] + lineage_keys = sorted({item.normalized_parent_name for item in episode_inputs if item.normalized_parent_name}) + previous_episodes = load_active_episode_states(conn, lineage_keys) + episode_plan = build_episode_plan(episode_inputs, previous_episodes) + episode_prior_stats = load_episode_prior_stats(conn, [assignment.episode_id for assignment in episode_plan.assignments.values()]) + lineage_prior_stats = load_lineage_prior_stats(conn, lineage_keys) + label_prior_stats = load_label_prior_stats(conn, lineage_keys) + registry_by_mmsi, registry_by_name = _load_registry(conn) + _expire_label_sessions(conn) + existing_resolution = _load_existing_resolution(conn, group_keys) + all_positions = vessel_store.get_all_latest_positions() + direct_parent_groups = [ + group for group in active_groups + if _direct_parent_member(group, all_positions) is not None + ] + unresolved_groups = [ + group for group in active_groups + if _direct_parent_member(group, all_positions) is None + ] + + default_model_id, default_model_name = _load_default_model(conn) + correlation_scores = _load_correlation_scores(conn, default_model_id, group_keys) + raw_metric_averages = _load_raw_metric_averages(conn, group_keys) + center_tracks = _load_group_center_tracks(conn, group_keys) + active_exclusions = _load_active_candidate_exclusions(conn, group_keys) + active_label_sessions = _load_active_label_sessions(conn, group_keys) + + snapshot_rows: list[tuple] = [] + label_tracking_rows: list[tuple] = [] + episode_snapshot_payloads: dict[tuple[str, int], dict[str, Any]] = {} + promoted = 0 + review_required = 0 + skipped = 0 + no_candidate = 0 + direct_matched = 0 + + for group in direct_parent_groups: + group_key = group['parent_name'] + sub_cluster_id = int(group.get('sub_cluster_id', 0)) + key = (group_key, sub_cluster_id) + episode_assignment = episode_plan.assignments.get(key) + if episode_assignment is None: + continue + existing = existing_resolution.get(key) + direct_parent = _direct_parent_member(group, all_positions) + if direct_parent is None: + continue + normalized_parent_name = normalize_parent_name(group_key) + direct_parent_mmsi = str(direct_parent.get('mmsi') or '') + direct_parent_name = str(direct_parent.get('name') or group_key or direct_parent_mmsi) + stable_cycles = _direct_parent_stable_cycles(existing, direct_parent_mmsi) + status_reason = _status_reason(_DIRECT_PARENT_MATCH_STATUS) + evidence_summary = { + 'episodeId': episode_assignment.episode_id, + 'continuitySource': episode_assignment.continuity_source, + 'continuityScore': episode_assignment.continuity_score, + 'mergedFromEpisodeIds': episode_assignment.merged_from_episode_ids, + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'normalizedParentName': normalized_parent_name, + 'candidateCount': 0, + 'directParentMmsi': direct_parent_mmsi, + 'directParentName': direct_parent_name, + 'statusReason': status_reason, + 'trackable': is_trackable_parent_name(group_key), + } + + status = _DIRECT_PARENT_MATCH_STATUS + decision_source = 'DIRECT_PARENT_MATCH' + selected_parent_mmsi = direct_parent_mmsi + selected_parent_name = direct_parent_name + selected_vessel_id = registry_by_mmsi.get(direct_parent_mmsi).vessel_id if direct_parent_mmsi in registry_by_mmsi else None + confidence = 1.0 + last_promoted_at = observed_at + + if existing is not None and existing.get('status') == _MANUAL_CONFIRMED_STATUS: + status = _MANUAL_CONFIRMED_STATUS + decision_source = existing.get('decision_source') or 'MANUAL' + selected_parent_mmsi = existing.get('selected_parent_mmsi') or selected_parent_mmsi + selected_parent_name = existing.get('selected_parent_name') or selected_parent_name + selected_vessel_id = existing.get('selected_vessel_id') if existing.get('selected_vessel_id') is not None else selected_vessel_id + confidence = existing.get('confidence') or confidence + last_promoted_at = existing.get('approved_at') or last_promoted_at + evidence_summary['statusReason'] = existing.get('evidence_summary', {}).get('statusReason') or status_reason + + _upsert_resolution( + conn, + ( + group_key, + sub_cluster_id, + group_key, + normalized_parent_name, + episode_assignment.episode_id, + episode_assignment.continuity_source, + episode_assignment.continuity_score, + 0.0, + status, + selected_parent_mmsi, + selected_parent_name, + selected_vessel_id, + confidence, + decision_source, + confidence or 0.0, + 0.0, + confidence or 0.0, + stable_cycles, + observed_at, + last_promoted_at, + (existing or {}).get('approved_by'), + (existing or {}).get('approved_at'), + (existing or {}).get('manual_comment'), + (existing or {}).get('rejected_candidate_mmsi'), + (existing or {}).get('rejected_at'), + json.dumps(evidence_summary, ensure_ascii=False), + observed_at, + ), + ) + episode_snapshot_payloads[key] = { + 'parentEpisodeIds': episode_assignment.merged_from_episode_ids, + 'topCandidateMmsi': selected_parent_mmsi, + 'topCandidateScore': confidence or 1.0, + 'resolutionStatus': status, + 'metadata': { + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'directParentMmsi': direct_parent_mmsi, + }, + } + direct_matched += 1 + + for group in unresolved_groups: + group_key = group['parent_name'] + sub_cluster_id = int(group.get('sub_cluster_id', 0)) + key = (group_key, sub_cluster_id) + episode_assignment = episode_plan.assignments.get(key) + if episode_assignment is None: + continue + existing = existing_resolution.get(key) + normalized_parent_name = normalize_parent_name(group_key) + excluded_candidate_mmsis = set(active_exclusions['global']) + excluded_candidate_mmsis.update(active_exclusions['group'].get(key, set())) + active_label_session = active_label_sessions.get(key) + + if not is_trackable_parent_name(group_key) and (existing or {}).get('status') != _MANUAL_CONFIRMED_STATUS: + skipped += 1 + status_reason = _status_reason(_SHORT_NAME_STATUS) + evidence_summary = { + 'episodeId': episode_assignment.episode_id, + 'continuitySource': episode_assignment.continuity_source, + 'continuityScore': episode_assignment.continuity_score, + 'mergedFromEpisodeIds': episode_assignment.merged_from_episode_ids, + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'skipReason': status_reason, + 'statusReason': status_reason, + 'normalizedParentName': normalized_parent_name, + } + _upsert_resolution( + conn, + ( + group_key, + sub_cluster_id, + group_key, + normalized_parent_name, + episode_assignment.episode_id, + episode_assignment.continuity_source, + episode_assignment.continuity_score, + 0.0, + _SHORT_NAME_STATUS, + None, + None, + None, + None, + 'AUTO_SKIP', + 0.0, + 0.0, + 0.0, + 0, + observed_at, + None, + None, + None, + (existing or {}).get('manual_comment'), + (existing or {}).get('rejected_candidate_mmsi'), + (existing or {}).get('rejected_at'), + json.dumps(evidence_summary, ensure_ascii=False), + observed_at, + ), + ) + episode_snapshot_payloads[key] = { + 'parentEpisodeIds': episode_assignment.merged_from_episode_ids, + 'topCandidateMmsi': None, + 'topCandidateScore': 0.0, + 'resolutionStatus': _SHORT_NAME_STATUS, + 'metadata': {'skipReason': status_reason}, + } + continue + + candidates = _build_candidate_scores( + vessel_store=vessel_store, + observed_at=observed_at, + group=group, + episode_assignment=episode_assignment, + default_model_id=default_model_id, + default_model_name=default_model_name, + score_rows=correlation_scores.get(key, []), + raw_metrics=raw_metric_averages, + center_track=center_tracks.get(key, []), + all_positions=all_positions, + registry_by_mmsi=registry_by_mmsi, + registry_by_name=registry_by_name, + existing=existing, + excluded_candidate_mmsis=excluded_candidate_mmsis, + episode_prior_stats=episode_prior_stats, + lineage_prior_stats=lineage_prior_stats, + label_prior_stats=label_prior_stats, + ) + + top_candidate = candidates[0] if candidates else None + second_score = candidates[1].final_score if len(candidates) > 1 else 0.0 + margin = round((top_candidate.final_score - second_score), 6) if top_candidate else 0.0 + stable_cycles = _top_candidate_stable_cycles(existing, top_candidate) + for rank, candidate in enumerate(candidates, start=1): + snapshot_rows.append(( + observed_at, + group_key, + sub_cluster_id, + group_key, + normalized_parent_name, + episode_assignment.episode_id, + candidate.mmsi, + candidate.name, + candidate.vessel_id, + rank, + candidate.candidate_source, + candidate.model_id, + candidate.model_name, + candidate.base_corr_score, + candidate.name_match_score, + candidate.track_similarity_score, + candidate.visit_score_6h, + candidate.proximity_score_6h, + candidate.activity_sync_score_6h, + candidate.stability_score, + candidate.registry_bonus, + candidate.episode_prior_bonus, + candidate.lineage_prior_bonus, + candidate.label_prior_bonus, + candidate.final_score, + round(top_candidate.final_score - candidate.final_score, 6) if top_candidate else 0.0, + json.dumps(candidate.evidence, ensure_ascii=False), + )) + + status, decision_source = _select_status(top_candidate, margin, stable_cycles) + auto_status = status + selected_parent_mmsi: Optional[str] = None + selected_parent_name: Optional[str] = None + selected_vessel_id: Optional[int] = None + confidence: Optional[float] = None + last_promoted_at: Optional[datetime] = None + + if top_candidate is not None: + if status == _AUTO_PROMOTED_STATUS: + selected_parent_mmsi = top_candidate.mmsi + selected_parent_name = top_candidate.name + selected_vessel_id = top_candidate.vessel_id + confidence = top_candidate.final_score + last_promoted_at = observed_at + promoted += 1 + elif status == _REVIEW_REQUIRED_STATUS: + selected_parent_mmsi = top_candidate.mmsi + selected_parent_name = top_candidate.name + selected_vessel_id = top_candidate.vessel_id + confidence = top_candidate.final_score + review_required += 1 + elif status == _NO_CANDIDATE_STATUS: + no_candidate += 1 + + status_reason = _status_reason(status) + evidence_summary = { + 'episodeId': episode_assignment.episode_id, + 'continuitySource': episode_assignment.continuity_source, + 'continuityScore': episode_assignment.continuity_score, + 'mergedFromEpisodeIds': episode_assignment.merged_from_episode_ids, + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'normalizedParentName': normalized_parent_name, + 'candidateCount': len(candidates), + 'topCandidateMmsi': top_candidate.mmsi if top_candidate else None, + 'topCandidateName': top_candidate.name if top_candidate else None, + 'topCandidateSources': sorted(_candidate_sources(top_candidate)), + 'hasCorrelationCandidate': 'CORRELATION' in _candidate_sources(top_candidate), + 'recentTopCandidateStableCycles': stable_cycles, + 'skipReason': _status_reason(_SHORT_NAME_STATUS) if status == _SHORT_NAME_STATUS else None, + 'statusReason': status_reason, + 'trackable': is_trackable_parent_name(group_key), + 'priorBonusTotal': top_candidate.evidence.get('scoreBreakdown', {}).get('priorBonusTotal') if top_candidate else 0.0, + } + if excluded_candidate_mmsis: + evidence_summary['excludedCandidateMmsis'] = sorted(excluded_candidate_mmsis) + if active_label_session is not None: + evidence_summary['activeLabelSessionId'] = active_label_session['id'] + evidence_summary['activeLabelParentMmsi'] = active_label_session['label_parent_mmsi'] + + if existing is not None and existing.get('status') == _MANUAL_CONFIRMED_STATUS: + status = _MANUAL_CONFIRMED_STATUS + decision_source = existing.get('decision_source') or 'MANUAL' + selected_parent_mmsi = existing.get('selected_parent_mmsi') + selected_parent_name = existing.get('selected_parent_name') + selected_vessel_id = existing.get('selected_vessel_id') + confidence = existing.get('confidence') or confidence + last_promoted_at = existing.get('approved_at') or existing.get('rejected_at') or last_promoted_at + + _upsert_resolution( + conn, + ( + group_key, + sub_cluster_id, + group_key, + normalized_parent_name, + episode_assignment.episode_id, + episode_assignment.continuity_source, + episode_assignment.continuity_score, + top_candidate.evidence.get('scoreBreakdown', {}).get('priorBonusTotal', 0.0) if top_candidate else 0.0, + status, + selected_parent_mmsi, + selected_parent_name, + selected_vessel_id, + confidence, + decision_source, + top_candidate.final_score if top_candidate else 0.0, + second_score, + margin, + stable_cycles, + observed_at, + last_promoted_at, + (existing or {}).get('approved_by'), + (existing or {}).get('approved_at'), + (existing or {}).get('manual_comment'), + (existing or {}).get('rejected_candidate_mmsi'), + (existing or {}).get('rejected_at'), + json.dumps(evidence_summary, ensure_ascii=False), + observed_at, + ), + ) + episode_snapshot_payloads[key] = { + 'parentEpisodeIds': episode_assignment.merged_from_episode_ids, + 'topCandidateMmsi': top_candidate.mmsi if top_candidate else None, + 'topCandidateScore': top_candidate.final_score if top_candidate else 0.0, + 'resolutionStatus': status, + 'metadata': { + 'splitFromEpisodeId': episode_assignment.split_from_episode_id, + 'candidateCount': len(candidates), + 'topCandidateSources': sorted(_candidate_sources(top_candidate)), + }, + } + if active_label_session is not None: + label_tracking_rows.append( + _label_tracking_row( + observed_at=observed_at, + label_session=active_label_session, + auto_status=auto_status, + top_candidate=top_candidate, + margin=margin, + candidates=candidates, + ) + ) + + sync_episode_states(conn, observed_at, episode_plan) + inserted = _insert_candidate_snapshots(conn, observed_at, snapshot_rows) + episode_snapshots_inserted = insert_episode_snapshots(conn, observed_at, episode_plan, episode_snapshot_payloads) + tracking_inserted = _insert_label_tracking_rows(conn, label_tracking_rows) + conn.commit() + logger.info( + 'gear parent inference: %d groups, %d direct-match, %d candidates, %d promoted, %d review, %d skipped, %d no-candidate, %d episode-snapshots, %d label-tracking', + len(active_groups), + direct_matched, + inserted, + promoted, + review_required, + skipped, + no_candidate, + episode_snapshots_inserted, + tracking_inserted, + ) + return { + 'groups': len(active_groups), + 'candidates': inserted, + 'promoted': promoted, + 'review_required': review_required, + 'skipped': skipped, + 'no_candidate': no_candidate, + 'direct_matched': direct_matched, + 'episode_snapshots': episode_snapshots_inserted, + 'label_tracking': tracking_inserted, + } diff --git a/prediction/config.py b/prediction/config.py index 7d823c3..37baa13 100644 --- a/prediction/config.py +++ b/prediction/config.py @@ -1,3 +1,6 @@ +import re +from typing import Optional + from pydantic_settings import BaseSettings @@ -50,3 +53,14 @@ class Settings(BaseSettings): settings = Settings() + +_SQL_IDENTIFIER = re.compile(r'^[A-Za-z_][A-Za-z0-9_]*$') + + +def qualified_table(table_name: str, schema: Optional[str] = None) -> str: + resolved_schema = schema or settings.KCGDB_SCHEMA + if not _SQL_IDENTIFIER.fullmatch(resolved_schema): + raise ValueError(f'Invalid schema name: {resolved_schema!r}') + if not _SQL_IDENTIFIER.fullmatch(table_name): + raise ValueError(f'Invalid table name: {table_name!r}') + return f'{resolved_schema}.{table_name}' diff --git a/prediction/db/kcgdb.py b/prediction/db/kcgdb.py index db55152..6654744 100644 --- a/prediction/db/kcgdb.py +++ b/prediction/db/kcgdb.py @@ -7,7 +7,7 @@ import psycopg2 from psycopg2 import pool from psycopg2.extras import execute_values -from config import settings +from config import qualified_table, settings if TYPE_CHECKING: from models.result import AnalysisResult @@ -15,6 +15,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) _pool: Optional[pool.ThreadedConnectionPool] = None +GROUP_POLYGON_SNAPSHOTS = qualified_table('group_polygon_snapshots') def init_pool(): @@ -152,8 +153,8 @@ def save_group_snapshots(snapshots: list[dict]) -> int: if not snapshots: return 0 - insert_sql = """ - INSERT INTO kcg.group_polygon_snapshots ( + insert_sql = f""" + INSERT INTO {GROUP_POLYGON_SNAPSHOTS} ( group_type, group_key, group_label, sub_cluster_id, resolution, snapshot_time, polygon, center_point, area_sq_nm, member_count, zone_id, zone_name, members, color @@ -280,11 +281,11 @@ def fetch_polygon_summary() -> dict: try: with get_conn() as conn: with conn.cursor() as cur: - cur.execute(""" + cur.execute(f""" SELECT group_type, COUNT(*), SUM(member_count) - FROM kcg.group_polygon_snapshots + FROM {GROUP_POLYGON_SNAPSHOTS} WHERE snapshot_time = ( - SELECT MAX(snapshot_time) FROM kcg.group_polygon_snapshots + SELECT MAX(snapshot_time) FROM {GROUP_POLYGON_SNAPSHOTS} ) GROUP BY group_type """) @@ -315,7 +316,9 @@ def cleanup_group_snapshots(days: int = 7) -> int: with get_conn() as conn: with conn.cursor() as cur: cur.execute( - f"DELETE FROM kcg.group_polygon_snapshots WHERE snapshot_time < NOW() - INTERVAL '{days} days'", + f"DELETE FROM {GROUP_POLYGON_SNAPSHOTS} " + "WHERE snapshot_time < NOW() - (%s * INTERVAL '1 day')", + (days,), ) deleted = cur.rowcount conn.commit() diff --git a/prediction/fleet_tracker.py b/prediction/fleet_tracker.py index db85628..ba4f959 100644 --- a/prediction/fleet_tracker.py +++ b/prediction/fleet_tracker.py @@ -7,6 +7,9 @@ from typing import Optional import pandas as pd +from algorithms.gear_name_rules import is_trackable_parent_name +from config import qualified_table + logger = logging.getLogger(__name__) # 어구 이름 패턴 — 공백/영숫자 인덱스/끝_ 허용 @@ -14,6 +17,11 @@ GEAR_PATTERN = re.compile(r'^(.+?)_(?=\S*\d)\S+(?:[_ ]\S*)*[_ ]*$|^(\d+)$') GEAR_PATTERN_PCT = re.compile(r'^(.+?)%$') _REGISTRY_CACHE_SEC = 3600 +FLEET_COMPANIES = qualified_table('fleet_companies') +FLEET_VESSELS = qualified_table('fleet_vessels') +GEAR_IDENTITY_LOG = qualified_table('gear_identity_log') +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +FLEET_TRACKING_SNAPSHOT = qualified_table('fleet_tracking_snapshot') class FleetTracker: @@ -32,13 +40,13 @@ class FleetTracker: return cur = conn.cursor() - cur.execute('SELECT id, name_cn, name_en FROM kcg.fleet_companies') + cur.execute(f'SELECT id, name_cn, name_en FROM {FLEET_COMPANIES}') self._companies = {r[0]: {'name_cn': r[1], 'name_en': r[2]} for r in cur.fetchall()} cur.execute( - """SELECT id, company_id, permit_no, name_cn, name_en, tonnage, - gear_code, fleet_role, pair_vessel_id, mmsi - FROM kcg.fleet_vessels""" + f"""SELECT id, company_id, permit_no, name_cn, name_en, tonnage, + gear_code, fleet_role, pair_vessel_id, mmsi + FROM {FLEET_VESSELS}""" ) self._vessels = {} self._name_cn_map = {} @@ -92,7 +100,7 @@ class FleetTracker: # 이미 매칭됨 → last_seen_at 업데이트 if mmsi in self._mmsi_to_vid: cur.execute( - 'UPDATE kcg.fleet_vessels SET last_seen_at = NOW() WHERE id = %s', + f'UPDATE {FLEET_VESSELS} SET last_seen_at = NOW() WHERE id = %s', (self._mmsi_to_vid[mmsi],), ) continue @@ -104,7 +112,7 @@ class FleetTracker: if vid: cur.execute( - """UPDATE kcg.fleet_vessels + f"""UPDATE {FLEET_VESSELS} SET mmsi = %s, match_confidence = 0.95, match_method = 'NAME_EXACT', last_seen_at = NOW(), updated_at = NOW() WHERE id = %s AND (mmsi IS NULL OR mmsi = %s)""", @@ -154,6 +162,10 @@ class FleetTracker: if m2: parent_name = m2.group(1).strip() + effective_parent_name = parent_name or name + if not is_trackable_parent_name(effective_parent_name): + continue + # 모선 매칭 parent_mmsi: Optional[str] = None parent_vid: Optional[int] = None @@ -170,7 +182,7 @@ class FleetTracker: # 기존 활성 행 조회 cur.execute( - """SELECT id, name FROM kcg.gear_identity_log + f"""SELECT id, name FROM {GEAR_IDENTITY_LOG} WHERE mmsi = %s AND is_active = TRUE""", (mmsi,), ) @@ -180,7 +192,7 @@ class FleetTracker: if existing[1] == name: # 같은 MMSI + 같은 이름 → 위치/시간 업데이트 cur.execute( - """UPDATE kcg.gear_identity_log + f"""UPDATE {GEAR_IDENTITY_LOG} SET last_seen_at = %s, lat = %s, lon = %s WHERE id = %s""", (now, lat, lon, existing[0]), @@ -188,11 +200,11 @@ class FleetTracker: else: # 같은 MMSI + 다른 이름 → 이전 비활성화 + 새 행 cur.execute( - 'UPDATE kcg.gear_identity_log SET is_active = FALSE WHERE id = %s', + f'UPDATE {GEAR_IDENTITY_LOG} SET is_active = FALSE WHERE id = %s', (existing[0],), ) cur.execute( - """INSERT INTO kcg.gear_identity_log + f"""INSERT INTO {GEAR_IDENTITY_LOG} (mmsi, name, parent_name, parent_mmsi, parent_vessel_id, gear_index_1, gear_index_2, lat, lon, match_method, match_confidence, first_seen_at, last_seen_at) @@ -204,7 +216,7 @@ class FleetTracker: else: # 새 MMSI → 같은 이름이 다른 MMSI로 있는지 확인 cur.execute( - """SELECT id, mmsi FROM kcg.gear_identity_log + f"""SELECT id, mmsi FROM {GEAR_IDENTITY_LOG} WHERE name = %s AND is_active = TRUE AND mmsi != %s""", (name, mmsi), ) @@ -212,7 +224,7 @@ class FleetTracker: if old_mmsi_row: # 같은 이름 + 다른 MMSI → MMSI 변경 cur.execute( - 'UPDATE kcg.gear_identity_log SET is_active = FALSE WHERE id = %s', + f'UPDATE {GEAR_IDENTITY_LOG} SET is_active = FALSE WHERE id = %s', (old_mmsi_row[0],), ) logger.info('gear MMSI change: %s → %s (name=%s)', old_mmsi_row[1], mmsi, name) @@ -220,7 +232,7 @@ class FleetTracker: # 어피니티 점수 이전 (이전 MMSI → 새 MMSI) try: cur.execute( - "UPDATE kcg.gear_correlation_scores " + f"UPDATE {GEAR_CORRELATION_SCORES} " "SET target_mmsi = %s, updated_at = NOW() " "WHERE target_mmsi = %s", (mmsi, old_mmsi_row[1]), @@ -234,7 +246,7 @@ class FleetTracker: logger.warning('affinity score transfer failed: %s', e) cur.execute( - """INSERT INTO kcg.gear_identity_log + f"""INSERT INTO {GEAR_IDENTITY_LOG} (mmsi, name, parent_name, parent_mmsi, parent_vessel_id, gear_index_1, gear_index_2, lat, lon, match_method, match_confidence, first_seen_at, last_seen_at) @@ -329,7 +341,7 @@ class FleetTracker: center_lon = sum(lons) / len(lons) if lons else None cur.execute( - """INSERT INTO kcg.fleet_tracking_snapshot + f"""INSERT INTO {FLEET_TRACKING_SNAPSHOT} (company_id, snapshot_time, total_vessels, active_vessels, center_lat, center_lon) VALUES (%s, %s, %s, %s, %s, %s)""", diff --git a/prediction/main.py b/prediction/main.py index 139912f..e16283a 100644 --- a/prediction/main.py +++ b/prediction/main.py @@ -4,7 +4,7 @@ from contextlib import asynccontextmanager from fastapi import BackgroundTasks, FastAPI -from config import settings +from config import qualified_table, settings from db import kcgdb, snpdb from scheduler import get_last_run, run_analysis_cycle, start_scheduler, stop_scheduler @@ -14,6 +14,8 @@ logging.basicConfig( stream=sys.stdout, ) logger = logging.getLogger(__name__) +GEAR_CORRELATION_SCORES = qualified_table('gear_correlation_scores') +CORRELATION_PARAM_MODELS = qualified_table('correlation_param_models') @asynccontextmanager @@ -89,11 +91,11 @@ def get_correlation_tracks( cur = conn.cursor() # Get correlated vessels from ALL active models - cur.execute(""" + cur.execute(f""" SELECT s.target_mmsi, s.target_type, s.target_name, s.current_score, m.name AS model_name - FROM kcg.gear_correlation_scores s - JOIN kcg.correlation_param_models m ON s.model_id = m.id + FROM {GEAR_CORRELATION_SCORES} s + JOIN {CORRELATION_PARAM_MODELS} m ON s.model_id = m.id WHERE s.group_key = %s AND s.current_score >= %s AND m.is_active = TRUE diff --git a/prediction/scheduler.py b/prediction/scheduler.py index 10eba03..8bae5c7 100644 --- a/prediction/scheduler.py +++ b/prediction/scheduler.py @@ -135,6 +135,27 @@ def run_analysis_cycle(): except Exception as e: logger.warning('gear correlation failed: %s', e) + # 4.8 어구 모선 추론 (episode continuity + 다층 점수 모델) + try: + from algorithms.gear_parent_inference import run_gear_parent_inference + + inference_result = run_gear_parent_inference( + vessel_store=vessel_store, + gear_groups=gear_groups, + conn=kcg_conn, + ) + logger.info( + 'gear parent inference: %d groups, %d direct-match, %d candidates, %d promoted, %d review, %d skipped', + inference_result['groups'], + inference_result.get('direct_matched', 0), + inference_result['candidates'], + inference_result['promoted'], + inference_result['review_required'], + inference_result['skipped'], + ) + except Exception as e: + logger.warning('gear parent inference failed: %s', e) + # 5. 선박별 추가 알고리즘 → AnalysisResult 생성 results = [] for c in classifications: diff --git a/prediction/tests/test_gear_parent_episode.py b/prediction/tests/test_gear_parent_episode.py new file mode 100644 index 0000000..1ffeaaa --- /dev/null +++ b/prediction/tests/test_gear_parent_episode.py @@ -0,0 +1,177 @@ +import unittest +import sys +import types +from datetime import datetime, timedelta, timezone + +stub = types.ModuleType('pydantic_settings') + + +class BaseSettings: + def __init__(self, **kwargs): + for name, value in self.__class__.__dict__.items(): + if name.isupper(): + setattr(self, name, kwargs.get(name, value)) + + +stub.BaseSettings = BaseSettings +sys.modules.setdefault('pydantic_settings', stub) + +from algorithms.gear_parent_episode import ( + GroupEpisodeInput, + EpisodeState, + build_episode_plan, + compute_prior_bonus_components, + continuity_score, +) + + +class GearParentEpisodeTest(unittest.TestCase): + def test_continuity_score_prefers_member_overlap_and_near_center(self): + current = GroupEpisodeInput( + group_key='ZHEDAIYU02394', + normalized_parent_name='ZHEDAIYU02394', + sub_cluster_id=1, + member_mmsis=['100', '200', '300'], + member_count=3, + center_lat=35.0, + center_lon=129.0, + ) + previous = EpisodeState( + episode_id='ep-prev', + lineage_key='ZHEDAIYU02394', + group_key='ZHEDAIYU02394', + normalized_parent_name='ZHEDAIYU02394', + current_sub_cluster_id=0, + member_mmsis=['100', '200', '400'], + member_count=3, + center_lat=35.02, + center_lon=129.01, + last_snapshot_time=datetime.now(timezone.utc), + status='ACTIVE', + ) + score, overlap_count, distance_nm = continuity_score(current, previous) + self.assertGreaterEqual(overlap_count, 2) + self.assertGreater(score, 0.45) + self.assertLess(distance_nm, 12.0) + + def test_build_episode_plan_creates_merge_episode(self): + now = datetime.now(timezone.utc) + current = GroupEpisodeInput( + group_key='JINSHI', + normalized_parent_name='JINSHI', + sub_cluster_id=0, + member_mmsis=['a', 'b', 'c', 'd'], + member_count=4, + center_lat=35.0, + center_lon=129.0, + ) + previous_a = EpisodeState( + episode_id='ep-a', + lineage_key='JINSHI', + group_key='JINSHI', + normalized_parent_name='JINSHI', + current_sub_cluster_id=1, + member_mmsis=['a', 'b'], + member_count=2, + center_lat=35.0, + center_lon=129.0, + last_snapshot_time=now - timedelta(minutes=5), + status='ACTIVE', + ) + previous_b = EpisodeState( + episode_id='ep-b', + lineage_key='JINSHI', + group_key='JINSHI', + normalized_parent_name='JINSHI', + current_sub_cluster_id=2, + member_mmsis=['c', 'd'], + member_count=2, + center_lat=35.01, + center_lon=129.01, + last_snapshot_time=now - timedelta(minutes=5), + status='ACTIVE', + ) + plan = build_episode_plan([current], {'JINSHI': [previous_a, previous_b]}) + assignment = plan.assignments[current.key] + self.assertEqual(assignment.continuity_source, 'MERGE_NEW') + self.assertEqual(set(assignment.merged_from_episode_ids), {'ep-a', 'ep-b'}) + self.assertEqual(plan.merged_episode_targets['ep-a'], assignment.episode_id) + self.assertEqual(plan.merged_episode_targets['ep-b'], assignment.episode_id) + + def test_build_episode_plan_marks_split_continue_and_split_new(self): + now = datetime.now(timezone.utc) + previous = EpisodeState( + episode_id='ep-prev', + lineage_key='A01859', + group_key='A01859', + normalized_parent_name='A01859', + current_sub_cluster_id=0, + member_mmsis=['a', 'b', 'c', 'd'], + member_count=4, + center_lat=35.0, + center_lon=129.0, + last_snapshot_time=now - timedelta(minutes=5), + status='ACTIVE', + ) + current_a = GroupEpisodeInput( + group_key='A01859', + normalized_parent_name='A01859', + sub_cluster_id=1, + member_mmsis=['a', 'b', 'c'], + member_count=3, + center_lat=35.0, + center_lon=129.0, + ) + current_b = GroupEpisodeInput( + group_key='A01859', + normalized_parent_name='A01859', + sub_cluster_id=2, + member_mmsis=['c', 'd'], + member_count=2, + center_lat=35.02, + center_lon=129.02, + ) + plan = build_episode_plan([current_a, current_b], {'A01859': [previous]}) + sources = {plan.assignments[current_a.key].continuity_source, plan.assignments[current_b.key].continuity_source} + self.assertIn('SPLIT_CONTINUE', sources) + self.assertIn('SPLIT_NEW', sources) + + def test_compute_prior_bonus_components_caps_total_bonus(self): + observed_at = datetime.now(timezone.utc) + bonuses = compute_prior_bonus_components( + observed_at=observed_at, + normalized_parent_name='JINSHI', + episode_id='ep-1', + candidate_mmsi='412333326', + episode_prior_stats={ + ('ep-1', '412333326'): { + 'seen_count': 12, + 'top1_count': 5, + 'avg_score': 0.88, + 'last_seen_at': observed_at - timedelta(hours=1), + }, + }, + lineage_prior_stats={ + ('JINSHI', '412333326'): { + 'seen_count': 24, + 'top1_count': 6, + 'top3_count': 10, + 'avg_score': 0.82, + 'last_seen_at': observed_at - timedelta(hours=3), + }, + }, + label_prior_stats={ + ('JINSHI', '412333326'): { + 'session_count': 4, + 'last_labeled_at': observed_at - timedelta(days=1), + }, + }, + ) + self.assertGreater(bonuses['episodePriorBonus'], 0.0) + self.assertGreater(bonuses['lineagePriorBonus'], 0.0) + self.assertGreater(bonuses['labelPriorBonus'], 0.0) + self.assertLessEqual(bonuses['priorBonusTotal'], 0.20) + + +if __name__ == '__main__': + unittest.main() diff --git a/prediction/tests/test_gear_parent_inference.py b/prediction/tests/test_gear_parent_inference.py new file mode 100644 index 0000000..fdee2af --- /dev/null +++ b/prediction/tests/test_gear_parent_inference.py @@ -0,0 +1,279 @@ +import unittest +import sys +import types +from datetime import datetime, timedelta, timezone + +stub = types.ModuleType('pydantic_settings') + + +class BaseSettings: + def __init__(self, **kwargs): + for name, value in self.__class__.__dict__.items(): + if name.isupper(): + setattr(self, name, kwargs.get(name, value)) + + +stub.BaseSettings = BaseSettings +sys.modules.setdefault('pydantic_settings', stub) + +from algorithms.gear_parent_inference import ( + RegistryVessel, + CandidateScore, + _AUTO_PROMOTED_STATUS, + _apply_final_score_bonus, + _build_track_coverage_metrics, + _build_candidate_scores, + _china_mmsi_prefix_bonus, + _direct_parent_member, + _direct_parent_stable_cycles, + _label_tracking_row, + _NO_CANDIDATE_STATUS, + _REVIEW_REQUIRED_STATUS, + _UNRESOLVED_STATUS, + _name_match_score, + _select_status, + _top_candidate_stable_cycles, + is_trackable_parent_name, + normalize_parent_name, +) + + +class GearParentInferenceRuleTest(unittest.TestCase): + def _candidate(self, *, mmsi='123456789', score=0.8, sources=None): + return CandidateScore( + mmsi=mmsi, + name='TEST', + vessel_id=1, + target_type='VESSEL', + candidate_source=','.join(sources or ['CORRELATION']), + base_corr_score=0.7, + name_match_score=0.1, + track_similarity_score=0.8, + visit_score_6h=0.4, + proximity_score_6h=0.3, + activity_sync_score_6h=0.2, + stability_score=0.9, + registry_bonus=0.05, + episode_prior_bonus=0.0, + lineage_prior_bonus=0.0, + label_prior_bonus=0.0, + final_score=score, + streak_count=6, + model_id=1, + model_name='default', + evidence={'sources': sources or ['CORRELATION']}, + ) + + def test_normalize_parent_name_removes_space_symbols(self): + self.assertEqual(normalize_parent_name(' A_B-C% 12 '), 'ABC12') + + def test_trackable_parent_name_requires_length_four_after_normalize(self): + self.assertFalse(is_trackable_parent_name('A-1%')) + self.assertFalse(is_trackable_parent_name('ZSY')) + self.assertFalse(is_trackable_parent_name('991')) + self.assertTrue(is_trackable_parent_name(' AB_12 ')) + + def test_name_match_score_prefers_raw_exact(self): + self.assertEqual(_name_match_score('LUWENYU 53265', 'LUWENYU 53265', None), 1.0) + + def test_name_match_score_supports_compact_exact_and_prefix(self): + registry = RegistryVessel( + vessel_id=1, + mmsi='412327765', + name_cn='LUWENYU53265', + name_en='LUWENYU 53265', + ) + self.assertEqual(_name_match_score('LUWENYU 53265', 'LUWENYU53265', None), 0.8) + self.assertEqual(_name_match_score('LUWENYU 532', 'LUWENYU53265', None), 0.5) + self.assertEqual(_name_match_score('LUWENYU 53265', 'DIFFERENT', registry), 1.0) + self.assertEqual(_name_match_score('ZHEDAIYU02433', 'ZHEDAIYU06178', None), 0.3) + + def test_name_match_score_does_not_use_candidate_registry_self_match(self): + registry = RegistryVessel( + vessel_id=1, + mmsi='412413545', + name_cn='ZHEXIANGYU55005', + name_en='ZHEXIANGYU55005', + ) + self.assertEqual(_name_match_score('JINSHI', 'ZHEXIANGYU55005', registry), 0.0) + + def test_direct_parent_member_prefers_parent_member_then_parent_mmsi(self): + all_positions = {'412420673': {'name': 'ZHEDAIYU02433'}} + from_members = _direct_parent_member( + { + 'parent_name': 'ZHEDAIYU02433', + 'members': [ + {'mmsi': '412420673', 'name': 'ZHEDAIYU02433', 'isParent': True}, + {'mmsi': '24330082', 'name': 'ZHEDAIYU02433_82_99_', 'isParent': False}, + ], + }, + all_positions, + ) + self.assertEqual(from_members['mmsi'], '412420673') + + from_parent_mmsi = _direct_parent_member( + { + 'parent_name': 'ZHEDAIYU02433', + 'parent_mmsi': '412420673', + 'members': [], + }, + all_positions, + ) + self.assertEqual(from_parent_mmsi['mmsi'], '412420673') + self.assertEqual(from_parent_mmsi['name'], 'ZHEDAIYU02433') + + def test_direct_parent_stable_cycles_reuses_same_parent(self): + existing = { + 'selected_parent_mmsi': '412420673', + 'stable_cycles': 4, + 'evidence_summary': {'directParentMmsi': '412420673'}, + } + self.assertEqual(_direct_parent_stable_cycles(existing, '412420673'), 5) + self.assertEqual(_direct_parent_stable_cycles(existing, '412000000'), 1) + + def test_china_prefix_bonus_requires_threshold(self): + self.assertEqual(_china_mmsi_prefix_bonus('412327765', 0.30), 0.15) + self.assertEqual(_china_mmsi_prefix_bonus('413987654', 0.65), 0.15) + self.assertEqual(_china_mmsi_prefix_bonus('412327765', 0.29), 0.0) + self.assertEqual(_china_mmsi_prefix_bonus('440123456', 0.75), 0.0) + + def test_apply_final_score_bonus_adds_bonus_after_weighted_score(self): + pre_bonus_score, china_bonus, final_score = _apply_final_score_bonus('412333326', 0.66) + self.assertIsInstance(pre_bonus_score, float) + self.assertIsInstance(china_bonus, float) + self.assertIsInstance(final_score, float) + self.assertEqual(pre_bonus_score, 0.66) + self.assertEqual(china_bonus, 0.15) + self.assertEqual(final_score, 0.81) + + def test_top_candidate_stable_cycles_resets_on_candidate_change(self): + existing = { + 'stable_cycles': 5, + 'evidence_summary': {'topCandidateMmsi': '111111111'}, + } + self.assertEqual(_top_candidate_stable_cycles(existing, self._candidate(mmsi='111111111')), 6) + self.assertEqual(_top_candidate_stable_cycles(existing, self._candidate(mmsi='222222222')), 1) + + def test_select_status_requires_recent_stability_and_correlation_for_auto(self): + self.assertEqual( + _select_status(self._candidate(score=0.8, sources=['CORRELATION']), margin=0.2, stable_cycles=3), + (_AUTO_PROMOTED_STATUS, 'AUTO_PROMOTION'), + ) + self.assertEqual( + _select_status(self._candidate(score=0.8, sources=['PREVIOUS_SELECTION']), margin=0.2, stable_cycles=3), + (_REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW'), + ) + self.assertEqual( + _select_status(self._candidate(score=0.8, sources=['CORRELATION']), margin=0.2, stable_cycles=2), + (_REVIEW_REQUIRED_STATUS, 'AUTO_REVIEW'), + ) + + def test_select_status_marks_candidate_gaps_explicitly(self): + self.assertEqual(_select_status(None, margin=0.0, stable_cycles=0), (_NO_CANDIDATE_STATUS, 'AUTO_NO_CANDIDATE')) + self.assertEqual( + _select_status(self._candidate(score=0.45, sources=['CORRELATION']), margin=0.1, stable_cycles=1), + (_UNRESOLVED_STATUS, 'AUTO_SCORE'), + ) + + def test_build_candidate_scores_applies_active_exclusions_before_scoring(self): + class FakeStore: + _tracks = {} + + candidates = _build_candidate_scores( + vessel_store=FakeStore(), + observed_at=datetime(2026, 4, 3, 0, 0, tzinfo=timezone.utc), + group={'parent_name': 'AB1234', 'sub_cluster_id': 1}, + episode_assignment=types.SimpleNamespace( + episode_id='ep-test', + continuity_source='NEW', + continuity_score=0.0, + ), + default_model_id=1, + default_model_name='default', + score_rows=[ + { + 'target_mmsi': '412111111', + 'target_type': 'VESSEL', + 'target_name': 'AB1234', + 'current_score': 0.8, + 'streak_count': 4, + }, + { + 'target_mmsi': '440222222', + 'target_type': 'VESSEL', + 'target_name': 'AB1234', + 'current_score': 0.7, + 'streak_count': 3, + }, + ], + raw_metrics={}, + center_track=[], + all_positions={}, + registry_by_mmsi={}, + registry_by_name={}, + existing=None, + excluded_candidate_mmsis={'412111111'}, + episode_prior_stats={}, + lineage_prior_stats={}, + label_prior_stats={}, + ) + self.assertEqual([candidate.mmsi for candidate in candidates], ['440222222']) + + def test_track_coverage_metrics_penalize_short_track_support(self): + now = datetime(2026, 4, 3, 0, 0, tzinfo=timezone.utc) + center_track = [ + {'timestamp': now - timedelta(hours=5), 'lat': 35.0, 'lon': 129.0}, + {'timestamp': now - timedelta(hours=1), 'lat': 35.1, 'lon': 129.1}, + ] + short_track = [ + {'timestamp': now - timedelta(minutes=10), 'lat': 35.1, 'lon': 129.1, 'sog': 0.5}, + ] + long_track = [ + {'timestamp': now - timedelta(minutes=90) + timedelta(minutes=10 * idx), 'lat': 35.0, 'lon': 129.0 + (0.01 * idx), 'sog': 0.5} + for idx in range(10) + ] + + short_metrics = _build_track_coverage_metrics(center_track, short_track, 35.05, 129.05) + long_metrics = _build_track_coverage_metrics(center_track, long_track, 35.05, 129.05) + + self.assertEqual(short_metrics['trackPointCount'], 1) + self.assertEqual(short_metrics['trackCoverageFactor'], 0.0) + self.assertGreater(long_metrics['trackCoverageFactor'], 0.0) + self.assertGreater(long_metrics['coverageFactor'], short_metrics['coverageFactor']) + + def test_label_tracking_row_tracks_rank_and_match_flags(self): + top_candidate = self._candidate(mmsi='412333326', score=0.81, sources=['CORRELATION']) + top_candidate.evidence = { + 'sources': ['CORRELATION'], + 'scoreBreakdown': {'preBonusScore': 0.66}, + } + labeled_candidate = self._candidate(mmsi='440123456', score=0.62, sources=['CORRELATION']) + labeled_candidate.evidence = { + 'sources': ['CORRELATION'], + 'scoreBreakdown': {'preBonusScore': 0.62}, + } + + row = _label_tracking_row( + observed_at='2026-04-03T00:00:00Z', + label_session={ + 'id': 10, + 'label_parent_mmsi': '440123456', + 'label_parent_name': 'TARGET', + }, + auto_status='REVIEW_REQUIRED', + top_candidate=top_candidate, + margin=0.19, + candidates=[top_candidate, labeled_candidate], + ) + self.assertEqual(row[0], 10) + self.assertEqual(row[8], 2) + self.assertTrue(row[9]) + self.assertEqual(row[10], 2) + self.assertEqual(row[11], 0.62) + self.assertEqual(row[12], 0.62) + self.assertFalse(row[14]) + self.assertTrue(row[15]) + + +if __name__ == '__main__': + unittest.main() diff --git a/prediction/tests/test_time_bucket.py b/prediction/tests/test_time_bucket.py new file mode 100644 index 0000000..c9d091c --- /dev/null +++ b/prediction/tests/test_time_bucket.py @@ -0,0 +1,90 @@ +import unittest +import sys +import types +from datetime import datetime, timezone +from zoneinfo import ZoneInfo + +import pandas as pd + +stub = types.ModuleType('pydantic_settings') + + +class BaseSettings: + def __init__(self, **kwargs): + for name, value in self.__class__.__dict__.items(): + if name.isupper(): + setattr(self, name, kwargs.get(name, value)) + + +stub.BaseSettings = BaseSettings +sys.modules.setdefault('pydantic_settings', stub) + +from cache.vessel_store import VesselStore +from time_bucket import compute_incremental_window_start, compute_initial_window_start, compute_safe_bucket + + +class TimeBucketRuleTest(unittest.TestCase): + def test_safe_bucket_uses_delay_then_floors_to_5m(self): + now = datetime(2026, 4, 2, 15, 14, 0, tzinfo=ZoneInfo('Asia/Seoul')) + self.assertEqual(compute_safe_bucket(now), datetime(2026, 4, 2, 15, 0, 0)) + + def test_incremental_window_includes_overlap_buckets(self): + last_bucket = datetime(2026, 4, 2, 15, 0, 0) + self.assertEqual(compute_incremental_window_start(last_bucket), datetime(2026, 4, 2, 14, 45, 0)) + + def test_initial_window_start_anchors_to_safe_bucket(self): + safe_bucket = datetime(2026, 4, 2, 15, 0, 0) + self.assertEqual(compute_initial_window_start(24, safe_bucket), datetime(2026, 4, 1, 15, 0, 0)) + + def test_merge_incremental_prefers_newer_overlap_rows(self): + store = VesselStore() + store._tracks = { + '412000001': pd.DataFrame([ + { + 'mmsi': '412000001', + 'timestamp': pd.Timestamp('2026-04-02T00:01:00Z'), + 'time_bucket': datetime(2026, 4, 2, 9, 0, 0), + 'lat': 30.0, + 'lon': 120.0, + 'raw_sog': 1.0, + }, + { + 'mmsi': '412000001', + 'timestamp': pd.Timestamp('2026-04-02T00:02:00Z'), + 'time_bucket': datetime(2026, 4, 2, 9, 0, 0), + 'lat': 30.1, + 'lon': 120.1, + 'raw_sog': 1.0, + }, + ]) + } + df_new = pd.DataFrame([ + { + 'mmsi': '412000001', + 'timestamp': pd.Timestamp('2026-04-02T00:02:00Z'), + 'time_bucket': datetime(2026, 4, 2, 9, 0, 0), + 'lat': 30.2, + 'lon': 120.2, + 'raw_sog': 2.0, + }, + { + 'mmsi': '412000001', + 'timestamp': pd.Timestamp('2026-04-02T00:03:00Z'), + 'time_bucket': datetime(2026, 4, 2, 9, 5, 0), + 'lat': 30.3, + 'lon': 120.3, + 'raw_sog': 2.0, + }, + ]) + + store.merge_incremental(df_new) + + merged = store._tracks['412000001'] + self.assertEqual(len(merged), 3) + replacement = merged.loc[merged['timestamp'] == pd.Timestamp('2026-04-02T00:02:00Z')].iloc[0] + self.assertEqual(float(replacement['lat']), 30.2) + self.assertEqual(float(replacement['lon']), 120.2) + + +if __name__ == '__main__': + unittest.main()