From 37ae1bfa484253973019e308255eabfc5b870898 Mon Sep 17 00:00:00 2001 From: htlee Date: Thu, 16 Apr 2026 09:50:42 +0900 Subject: [PATCH] =?UTF-8?q?feat(prediction):=20=EC=84=9C=EB=B2=84=20?= =?UTF-8?q?=EC=8A=A4=ED=81=AC=EB=A6=BD=ED=8A=B8=EC=97=90=20tier/match=5Fme?= =?UTF-8?q?thod/G-02/G-03=20=EC=B6=94=EC=A0=81=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scheduler.py features write 확장: - pair_tier (STRONG/PROBABLE/SUSPECT) - pair_type, pair_reject_reason - similarity, confidence - registered_fishery_code → SUSPECT tier 까지 raw_pair 로 보존하여 통계 집계 가능 diagnostic-snapshot.sh (5분 주기): - 4-4.1 pair_trawl tier 분포 + avg_sync_min - 4-4.2 reject 사유 journal 로그 tail - 4-4.3 G-02 금어기 상세 (observed_at, fishery_code) - 4-4.4 G-03 미등록 어구 상세 (detected/registered/allowed) - 7.5-2b match_method 분포 (EXACT vs FUZZY) - 7.5-2c fishery_code × match_method 교차 hourly-analysis-snapshot.sh (1시간 주기): - P3.5 match_method 분포 + avg_confidence - P3.6 fishery_code × match_method 교차 - D3.6 pair_tier 분포 + avg_sync_min + avg_sep_nm - D3.7 G-02/G-03 건수 + gear_judgment 분포 - D3.8 reject 사유 1시간 journal 집계 --- prediction/scheduler.py | 9 +++ prediction/scripts/diagnostic-snapshot.sh | 71 ++++++++++++++++++- .../scripts/hourly-analysis-snapshot.sh | 46 ++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/prediction/scheduler.py b/prediction/scheduler.py index 001165d..892c8aa 100644 --- a/prediction/scheduler.py +++ b/prediction/scheduler.py @@ -437,6 +437,9 @@ def run_analysis_cycle(): elif final_risk >= 30: final_risk_level = 'MEDIUM' + # pair_result 는 STRONG/PROBABLE 필터링으로 SUSPECT 는 None. + # SUSPECT tier 까지 통계로 남기려면 raw pair 결과도 조회. + raw_pair = pair_results.get(mmsi) or {} merged_features = { **(c.get('features', {}) or {}), **dark_features, @@ -445,6 +448,12 @@ def run_analysis_cycle(): 'gear_violation_evidence': gear_violation_evidence, 'pair_trawl_detected': bool(pair_result and pair_result.get('pair_detected')), 'pair_trawl_pair_mmsi': (pair_result or {}).get('pair_mmsi', ''), + 'pair_tier': raw_pair.get('tier') or '', + 'pair_type': raw_pair.get('pair_type') or '', + 'pair_reject_reason': raw_pair.get('reject_reason') or '', + 'similarity': raw_pair.get('similarity', 0), + 'confidence': raw_pair.get('confidence', 0), + 'registered_fishery_code': registered_fishery_code or '', } results.append(AnalysisResult( diff --git a/prediction/scripts/diagnostic-snapshot.sh b/prediction/scripts/diagnostic-snapshot.sh index 6bc7da7..2403925 100644 --- a/prediction/scripts/diagnostic-snapshot.sh +++ b/prediction/scripts/diagnostic-snapshot.sh @@ -203,9 +203,10 @@ ORDER BY risk_score DESC LIMIT 20; SQL echo "" -echo "--- 4-4. G-06 쌍끌이 공조 탐지 ---" +echo "--- 4-4. G-06 쌍끌이 공조 탐지 (tier 포함) ---" $PSQL_TABLE << 'SQL' SELECT mmsi, zone_code, vessel_type, risk_score, + features->>'pair_tier' tier, (features->'gear_violation_evidence'->'G-06'->>'sync_duration_min') sync_min, (features->'gear_violation_evidence'->'G-06'->>'mean_separation_nm') sep_nm, (features->'gear_violation_evidence'->'G-06'->>'pair_mmsi') pair_mmsi, @@ -216,6 +217,50 @@ WHERE analyzed_at > now() - interval '5 minutes' ORDER BY risk_score DESC LIMIT 20; SQL +echo "" +echo "--- 4-4.1 pair_trawl tier 분포 (DAR-03 신호 강도별) ---" +$PSQL_TABLE << 'SQL' +SELECT coalesce(features->>'pair_tier', '(none)') tier, + count(*) cnt, + round(avg((features->>'similarity')::numeric)::numeric, 3) avg_sim, + round(avg((features->'gear_violation_evidence'->'G-06'->>'sync_duration_min')::numeric)::numeric, 1) avg_sync_min +FROM kcg.vessel_analysis_results +WHERE analyzed_at > now() - interval '5 minutes' + AND features->>'pair_trawl_detected' = 'true' +GROUP BY tier ORDER BY cnt DESC; +SQL + +echo "" +echo "--- 4-4.2 pair detection reject 사유 (최근 5분 로그) ---" +ssh redis-211 "sudo journalctl -u kcg-ai-prediction --no-pager --since '5 minutes ago' | grep -oE 'pair detection:[^$]+reject=\{[^}]+\}' | tail -5" 2>/dev/null || true + +echo "" +echo "--- 4-4.3 G-02 금어기 조업 탐지 ---" +$PSQL_TABLE << 'SQL' +SELECT mmsi, zone_code, vessel_type, risk_score, + features->>'g_codes' g_codes, + (features->'gear_violation_evidence'->'G-02'->>'observed_at') observed_at, + features->>'registered_fishery_code' fishery_code +FROM kcg.vessel_analysis_results +WHERE analyzed_at > now() - interval '5 minutes' + AND features->>'g_codes' LIKE '%G-02%' +ORDER BY risk_score DESC LIMIT 15; +SQL + +echo "" +echo "--- 4-4.4 G-03 미등록/허가외 어구 탐지 ---" +$PSQL_TABLE << 'SQL' +SELECT mmsi, zone_code, vessel_type, risk_score, + features->>'g_codes' g_codes, + (features->'gear_violation_evidence'->'G-03'->>'detected_gear') detected, + (features->'gear_violation_evidence'->'G-03'->>'registered_fishery_code') registered, + (features->'gear_violation_evidence'->'G-03'->>'allowed_gears') allowed +FROM kcg.vessel_analysis_results +WHERE analyzed_at > now() - interval '5 minutes' + AND features->>'g_codes' LIKE '%G-03%' +ORDER BY risk_score DESC LIMIT 15; +SQL + echo "" echo "--- 4-5. G-04 MMSI 조작 + G-05 어구 이동 ---" $PSQL_TABLE << 'SQL' @@ -341,6 +386,30 @@ WHERE permit_year = EXTRACT(YEAR FROM now())::int GROUP BY fishery_code ORDER BY total DESC; SQL +echo "" +echo "--- 7.5-2b. match_method 분포 (NAME_EXACT vs NAME_FUZZY) ---" +$PSQL_TABLE << 'SQL' +SELECT coalesce(match_method, '(unmatched)') method, + count(*) cnt, + round(avg(match_confidence)::numeric, 3) avg_conf +FROM kcg.fleet_vessels +WHERE permit_year = EXTRACT(YEAR FROM now())::int +GROUP BY method ORDER BY cnt DESC; +SQL + +echo "" +echo "--- 7.5-2c. fishery_code × match_method 교차 ---" +$PSQL_TABLE << 'SQL' +SELECT fishery_code, + count(*) FILTER (WHERE match_method = 'NAME_EXACT') exact, + count(*) FILTER (WHERE match_method = 'NAME_FUZZY') fuzzy, + count(*) FILTER (WHERE mmsi IS NULL) unmatched, + count(*) total +FROM kcg.fleet_vessels +WHERE permit_year = EXTRACT(YEAR FROM now())::int +GROUP BY fishery_code ORDER BY total DESC; +SQL + echo "" echo "--- 7.5-3. vessel_analysis_results.gear_code 분포 (last 5min) ---" $PSQL_TABLE << 'SQL' diff --git a/prediction/scripts/hourly-analysis-snapshot.sh b/prediction/scripts/hourly-analysis-snapshot.sh index 4f7b450..cb0fa72 100755 --- a/prediction/scripts/hourly-analysis-snapshot.sh +++ b/prediction/scripts/hourly-analysis-snapshot.sh @@ -163,6 +163,26 @@ FROM kcg.fleet_vessels WHERE permit_year = EXTRACT(YEAR FROM now())::int GROUP BY fishery_code ORDER BY total DESC; +\echo +\echo === P3.5 match_method distribution (NAME_EXACT vs NAME_FUZZY) === +SELECT coalesce(match_method, '(unmatched)') method, + count(*) cnt, + round(avg(match_confidence)::numeric, 3) avg_conf +FROM kcg.fleet_vessels +WHERE permit_year = EXTRACT(YEAR FROM now())::int +GROUP BY method ORDER BY cnt DESC; + +\echo +\echo === P3.6 fishery_code × match_method cross === +SELECT fishery_code, + count(*) FILTER (WHERE match_method = 'NAME_EXACT') exact, + count(*) FILTER (WHERE match_method = 'NAME_FUZZY') fuzzy, + count(*) FILTER (WHERE mmsi IS NULL) unmatched, + count(*) total +FROM kcg.fleet_vessels +WHERE permit_year = EXTRACT(YEAR FROM now())::int +GROUP BY fishery_code ORDER BY total DESC; + \echo \echo === P4. vessel_analysis_results.gear_code distribution (last 1h) === SELECT coalesce(gear_code, '(null)') gear_code, @@ -258,6 +278,32 @@ WHERE analyzed_at > now() - interval '1 hour' AND features->>'pair_trawl_detected' = 'true' GROUP BY pair_type ORDER BY cnt DESC; +\echo +\echo === D3.6 pair_trawl tier distribution (signal-strength tier) === +SELECT coalesce(features->>'pair_tier', '(none)') tier, + count(*) cnt, + round(avg((features->>'similarity')::numeric)::numeric, 3) avg_sim, + round(avg((features->'gear_violation_evidence'->'G-06'->>'sync_duration_min')::numeric)::numeric, 1) avg_sync_min, + round(avg((features->'gear_violation_evidence'->'G-06'->>'mean_separation_nm')::numeric)::numeric, 3) avg_sep_nm +FROM kcg.vessel_analysis_results +WHERE analyzed_at > now() - interval '1 hour' + AND features->>'pair_trawl_detected' = 'true' +GROUP BY tier ORDER BY cnt DESC; + +\echo +\echo === D3.7 G-02 closed-season + G-03 unregistered-gear counts === +SELECT + count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-02%') g02_count, + count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-03%') g03_count, + count(*) FILTER (WHERE features->>'gear_judgment' = 'CLOSED_SEASON_FISHING') judg_closed, + count(*) FILTER (WHERE features->>'gear_judgment' = 'UNREGISTERED_GEAR') judg_unreg +FROM kcg.vessel_analysis_results +WHERE analyzed_at > now() - interval '1 hour'; + +\echo +\echo === D3.8 pair detection reject breakdown (last 1h journal) === +\! ssh redis-211 "sudo journalctl -u kcg-ai-prediction --no-pager --since '1 hour ago' | grep -oE 'pair detection:[^$]+reject=\{[^}]+\}' | awk -F'reject=' '{print $2}' | sort | uniq -c | sort -rn | head -10" 2>/dev/null || true + \echo \echo === D4. G-06 pair trawl detections === SELECT mmsi, zone_code, vessel_type, risk_score,