kcg-ai-monitoring/prediction/scripts/diagnostic-snapshot.sh
htlee 37ae1bfa48 feat(prediction): 서버 스크립트에 tier/match_method/G-02/G-03 추적 추가
scheduler.py features write 확장:
- pair_tier (STRONG/PROBABLE/SUSPECT)
- pair_type, pair_reject_reason
- similarity, confidence
- registered_fishery_code
→ SUSPECT tier 까지 raw_pair 로 보존하여 통계 집계 가능

diagnostic-snapshot.sh (5분 주기):
- 4-4.1 pair_trawl tier 분포 + avg_sync_min
- 4-4.2 reject 사유 journal 로그 tail
- 4-4.3 G-02 금어기 상세 (observed_at, fishery_code)
- 4-4.4 G-03 미등록 어구 상세 (detected/registered/allowed)
- 7.5-2b match_method 분포 (EXACT vs FUZZY)
- 7.5-2c fishery_code × match_method 교차

hourly-analysis-snapshot.sh (1시간 주기):
- P3.5 match_method 분포 + avg_confidence
- P3.6 fishery_code × match_method 교차
- D3.6 pair_tier 분포 + avg_sync_min + avg_sep_nm
- D3.7 G-02/G-03 건수 + gear_judgment 분포
- D3.8 reject 사유 1시간 journal 집계
2026-04-16 09:50:42 +09:00

465 lines
18 KiB
Bash
Raw Blame 히스토리

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# prediction 알고리즘 진단 스냅샷 수집기 (5분 주기, 수동 종료까지 연속 실행)
#
# 용도: DAR-03 G코드 + 쌍끌이 + 어구 위반 포함 알고리즘 동작 검증
# 실행: nohup bash /home/apps/kcg-ai-prediction/scripts/diagnostic-snapshot.sh &
# 종료: kill $(cat /home/apps/kcg-ai-prediction/data/diag/diag.pid)
# 출력: /home/apps/kcg-ai-prediction/data/diag/YYYYMMDD-HHMM.txt
set -u
OUTDIR=/home/apps/kcg-ai-prediction/data/diag
mkdir -p "$OUTDIR"
echo $$ > "$OUTDIR/diag.pid"
export PGPASSWORD=Kcg2026ai
PSQL="psql -U kcg-app -d kcgaidb -h 211.208.115.83 -P pager=off -x"
PSQL_TABLE="psql -U kcg-app -d kcgaidb -h 211.208.115.83 -P pager=off"
INTERVAL_SEC=300 # 5분
while true; do
STAMP=$(date '+%Y%m%d-%H%M')
OUT="$OUTDIR/$STAMP.txt"
{
echo "###################################################################"
echo "# PREDICTION DIAGNOSTIC SNAPSHOT (DAR-03 enhanced)"
echo "# generated: $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo "# host: $(hostname)"
echo "# interval: ${INTERVAL_SEC}s"
echo "###################################################################"
#===================================================================
# PART 1: 종합 지표
#===================================================================
echo ""
echo "================================================================="
echo "PART 1: 종합 지표 (last 5min)"
echo "================================================================="
$PSQL_TABLE << 'SQL'
SELECT count(*) total,
count(*) FILTER (WHERE vessel_type != 'UNKNOWN') pipeline,
count(*) FILTER (WHERE vessel_type = 'UNKNOWN') lightweight,
count(*) FILTER (WHERE is_dark) dark,
count(*) FILTER (WHERE transship_suspect) transship,
count(*) FILTER (WHERE gear_judgment IS NOT NULL AND gear_judgment != '') gear_violation,
count(*) FILTER (WHERE risk_level='CRITICAL') crit,
count(*) FILTER (WHERE risk_level='HIGH') high,
round(avg(risk_score)::numeric, 1) avg_risk,
max(risk_score) max_risk,
round(count(*) FILTER (WHERE is_dark)::numeric / NULLIF(count(*), 0) * 100, 1) AS dark_pct
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes';
SQL
#===================================================================
# PART 2: 다크베셀 심층 진단
#===================================================================
echo ""
echo "================================================================="
echo "PART 2: DARK VESSEL 심층 진단"
echo "================================================================="
echo ""
echo "--- 2-1. dark_suspicion_score 히스토그램 ---"
$PSQL_TABLE << 'SQL'
SELECT CASE
WHEN (features->>'dark_suspicion_score')::int >= 90 THEN 'a_90-100 (CRITICAL_HIGH)'
WHEN (features->>'dark_suspicion_score')::int >= 70 THEN 'b_70-89 (CRITICAL)'
WHEN (features->>'dark_suspicion_score')::int >= 50 THEN 'c_50-69 (HIGH)'
WHEN (features->>'dark_suspicion_score')::int >= 30 THEN 'd_30-49 (WATCH)'
WHEN (features->>'dark_suspicion_score')::int >= 1 THEN 'e_1-29 (NONE_SCORED)'
ELSE 'f_0 (NOT_DARK)'
END bucket,
count(*) cnt,
round(avg(gap_duration_min)::numeric, 0) avg_gap_min,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND is_dark = true
GROUP BY bucket ORDER BY bucket;
SQL
echo ""
echo "--- 2-2. dark_patterns 발동 빈도 ---"
$PSQL_TABLE << 'SQL'
SELECT pattern,
count(*) cnt,
round(count(*)::numeric / NULLIF((SELECT count(*) FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes' AND is_dark), 0) * 100, 1) AS pct
FROM kcg.vessel_analysis_results,
LATERAL jsonb_array_elements_text(features->'dark_patterns') AS pattern
WHERE analyzed_at > now() - interval '5 minutes'
AND is_dark = true
GROUP BY pattern ORDER BY cnt DESC;
SQL
echo ""
echo "--- 2-3. P9/P10/P11 + coverage 요약 ---"
$PSQL_TABLE << 'SQL'
WITH dark AS (
SELECT features FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes' AND is_dark = true
)
SELECT count(*) total_dark,
count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%fishing_vessel_dark%'
OR features->>'dark_patterns' LIKE '%cargo_natural_gap%') p9,
count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%underway_deliberate_off%'
OR features->>'dark_patterns' LIKE '%anchored_natural_gap%') p10,
count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%heading_cog_mismatch%') p11,
count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%out_of_coverage%') coverage
FROM dark;
SQL
echo ""
echo "--- 2-4. CRITICAL dark 상위 10건 ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, gap_duration_min, zone_code, activity_state,
(features->>'dark_suspicion_score')::int AS score,
features->>'dark_tier' AS tier,
features->>'dark_patterns' AS patterns,
risk_score, risk_level
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND features->>'dark_tier' = 'CRITICAL'
ORDER BY (features->>'dark_suspicion_score')::int DESC
LIMIT 10;
SQL
#===================================================================
# PART 3: 환적 탐지
#===================================================================
echo ""
echo "================================================================="
echo "PART 3: TRANSSHIPMENT 진단"
echo "================================================================="
$PSQL_TABLE << 'SQL'
SELECT count(*) suspects,
count(*) FILTER (WHERE (features->>'transship_score')::numeric >= 70) critical,
count(*) FILTER (WHERE (features->>'transship_score')::numeric >= 50
AND (features->>'transship_score')::numeric < 70) high,
round(avg((features->>'transship_score')::numeric)::numeric, 1) avg_score,
round(avg(transship_duration_min)::numeric, 0) avg_dur
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes' AND transship_suspect = true;
SQL
echo ""
echo "--- 3-1. 환적 의심 개별 건 ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, transship_pair_mmsi pair, transship_duration_min dur,
(features->>'transship_score')::numeric score,
features->>'transship_tier' tier, zone_code
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes' AND transship_suspect = true
ORDER BY (features->>'transship_score')::numeric DESC;
SQL
#===================================================================
# PART 4: G코드 어구 위반 진단 (DAR-03 신규)
#===================================================================
echo ""
echo "================================================================="
echo "PART 4: G코드 어구 위반 진단 (DAR-03)"
echo "================================================================="
echo ""
echo "--- 4-1. gear_judgment 분포 ---"
$PSQL_TABLE << 'SQL'
SELECT coalesce(NULLIF(gear_judgment, ''), '(none)') AS judgment,
count(*) cnt,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
GROUP BY judgment ORDER BY cnt DESC;
SQL
echo ""
echo "--- 4-2. G코드별 발동 빈도 ---"
$PSQL_TABLE << 'SQL'
SELECT gcode,
count(*) cnt,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results,
LATERAL jsonb_array_elements_text(features->'g_codes') AS gcode
WHERE analyzed_at > now() - interval '5 minutes'
GROUP BY gcode ORDER BY cnt DESC;
SQL
echo ""
echo "--- 4-3. G-01 수역-어구 위반 상세 (상위 20건) ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, zone_code, vessel_type, risk_score, gear_judgment,
(features->>'gear_violation_score')::int AS gv_score,
(features->'gear_violation_evidence'->'G-01'->>'allowed')::text AS allowed
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND features->>'g_codes' LIKE '%G-01%'
ORDER BY risk_score DESC LIMIT 20;
SQL
echo ""
echo "--- 4-4. G-06 쌍끌이 공조 탐지 (tier 포함) ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, zone_code, vessel_type, risk_score,
features->>'pair_tier' tier,
(features->'gear_violation_evidence'->'G-06'->>'sync_duration_min') sync_min,
(features->'gear_violation_evidence'->'G-06'->>'mean_separation_nm') sep_nm,
(features->'gear_violation_evidence'->'G-06'->>'pair_mmsi') pair_mmsi,
features->>'pair_trawl_detected' pt
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND (features->>'pair_trawl_detected' = 'true' OR features->>'g_codes' LIKE '%G-06%')
ORDER BY risk_score DESC LIMIT 20;
SQL
echo ""
echo "--- 4-4.1 pair_trawl tier 분포 (DAR-03 신호 강도별) ---"
$PSQL_TABLE << 'SQL'
SELECT coalesce(features->>'pair_tier', '(none)') tier,
count(*) cnt,
round(avg((features->>'similarity')::numeric)::numeric, 3) avg_sim,
round(avg((features->'gear_violation_evidence'->'G-06'->>'sync_duration_min')::numeric)::numeric, 1) avg_sync_min
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND features->>'pair_trawl_detected' = 'true'
GROUP BY tier ORDER BY cnt DESC;
SQL
echo ""
echo "--- 4-4.2 pair detection reject 사유 (최근 5분 로그) ---"
ssh redis-211 "sudo journalctl -u kcg-ai-prediction --no-pager --since '5 minutes ago' | grep -oE 'pair detection:[^$]+reject=\{[^}]+\}' | tail -5" 2>/dev/null || true
echo ""
echo "--- 4-4.3 G-02 금어기 조업 탐지 ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, zone_code, vessel_type, risk_score,
features->>'g_codes' g_codes,
(features->'gear_violation_evidence'->'G-02'->>'observed_at') observed_at,
features->>'registered_fishery_code' fishery_code
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND features->>'g_codes' LIKE '%G-02%'
ORDER BY risk_score DESC LIMIT 15;
SQL
echo ""
echo "--- 4-4.4 G-03 미등록/허가외 어구 탐지 ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, zone_code, vessel_type, risk_score,
features->>'g_codes' g_codes,
(features->'gear_violation_evidence'->'G-03'->>'detected_gear') detected,
(features->'gear_violation_evidence'->'G-03'->>'registered_fishery_code') registered,
(features->'gear_violation_evidence'->'G-03'->>'allowed_gears') allowed
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND features->>'g_codes' LIKE '%G-03%'
ORDER BY risk_score DESC LIMIT 15;
SQL
echo ""
echo "--- 4-5. G-04 MMSI 조작 + G-05 어구 이동 ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, zone_code, vessel_type, risk_score,
features->>'g_codes' g_codes,
(features->'gear_violation_evidence'->'G-04'->>'cycling_count') g04_cycle,
(features->'gear_violation_evidence'->'G-05'->>'drift_nm') g05_drift
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND (features->>'g_codes' LIKE '%G-04%' OR features->>'g_codes' LIKE '%G-05%')
ORDER BY risk_score DESC LIMIT 10;
SQL
echo ""
echo "--- 4-5.5 pair_type 분포 (DAR-03 base-target 탐색, 5min) ---"
$PSQL_TABLE << 'SQL'
SELECT coalesce(features->>'pair_type', '(none)') pair_type,
count(*) cnt,
round(avg((features->>'similarity')::numeric)::numeric, 3) avg_sim
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND features->>'pair_trawl_detected' = 'true'
GROUP BY pair_type ORDER BY cnt DESC;
SQL
echo ""
echo "--- 4-6. GEAR_ILLEGAL 이벤트 ---"
$PSQL_TABLE << 'SQL'
SELECT category, level, title, count(*) cnt
FROM kcg.prediction_events
WHERE created_at > now() - interval '5 minutes'
AND category IN ('GEAR_ILLEGAL', 'MMSI_TAMPERING')
GROUP BY category, level, title ORDER BY cnt DESC;
SQL
echo ""
echo "--- 4-7. violation_categories ILLEGAL_GEAR ---"
$PSQL_TABLE << 'SQL'
SELECT count(*) total,
count(*) FILTER (WHERE gear_judgment = 'ZONE_VIOLATION') zone_viol,
count(*) FILTER (WHERE gear_judgment = 'PAIR_TRAWL') pair_trawl,
count(*) FILTER (WHERE gear_judgment = 'GEAR_MISMATCH') mismatch
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND 'ILLEGAL_GEAR' = ANY(violation_categories);
SQL
#===================================================================
# PART 5: 수역 × 어구 타입 교차 (G-01 검증 핵심)
#===================================================================
echo ""
echo "================================================================="
echo "PART 5: 수역 x 어구 타입 교차 (G-01 검증)"
echo "================================================================="
$PSQL_TABLE << 'SQL'
SELECT zone_code, vessel_type, count(*) total,
count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-01%') g01
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND vessel_type != 'UNKNOWN' AND zone_code LIKE 'ZONE_%'
GROUP BY zone_code, vessel_type ORDER BY zone_code, vessel_type;
SQL
#===================================================================
# PART 6: 이벤트 + KPI
#===================================================================
echo ""
echo "================================================================="
echo "PART 6: 이벤트 + KPI"
echo "================================================================="
$PSQL_TABLE << 'SQL'
SELECT category, level, count(*) cnt
FROM kcg.prediction_events
WHERE created_at > now() - interval '5 minutes'
GROUP BY category, level ORDER BY cnt DESC;
SQL
$PSQL_TABLE << 'SQL'
SELECT kpi_key, value, trend, delta_pct, updated_at
FROM kcg.prediction_kpi_realtime ORDER BY kpi_key;
SQL
#===================================================================
# PART 7: 사이클 로그 + 에러
#===================================================================
echo ""
echo "================================================================="
echo "PART 7: 사이클 로그 (최근 6분)"
echo "================================================================="
journalctl -u kcg-ai-prediction --since '6 minutes ago' --no-pager 2>/dev/null | \
grep -E 'analysis cycle:|lightweight|pipeline dark:|event_generator:|pair_trawl|gear_violation|GEAR_ILLEGAL|ERROR|Traceback' | \
tail -20
#===================================================================
# PART 7.5: 한중어업협정 레지스트리 매칭 (V029)
#===================================================================
echo ""
echo "================================================================="
echo "PART 7.5: FISHERY PERMIT CN REGISTRY 매칭 현황"
echo "================================================================="
echo ""
echo "--- 7.5-1. fleet_vessels 매칭 현황 (현재 연도) ---"
$PSQL_TABLE << 'SQL'
SELECT permit_year,
count(*) total,
count(mmsi) with_mmsi,
round(count(mmsi)::numeric / NULLIF(count(*),0) * 100, 1) match_pct
FROM kcg.fleet_vessels
WHERE permit_year IS NOT NULL
GROUP BY permit_year ORDER BY permit_year DESC;
SQL
echo ""
echo "--- 7.5-2. fishery_code 별 매칭률 (현재 연도) ---"
$PSQL_TABLE << 'SQL'
SELECT fishery_code, count(*) total, count(mmsi) matched,
round(count(mmsi)::numeric / NULLIF(count(*),0) * 100, 1) pct
FROM kcg.fleet_vessels
WHERE permit_year = EXTRACT(YEAR FROM now())::int
GROUP BY fishery_code ORDER BY total DESC;
SQL
echo ""
echo "--- 7.5-2b. match_method 분포 (NAME_EXACT vs NAME_FUZZY) ---"
$PSQL_TABLE << 'SQL'
SELECT coalesce(match_method, '(unmatched)') method,
count(*) cnt,
round(avg(match_confidence)::numeric, 3) avg_conf
FROM kcg.fleet_vessels
WHERE permit_year = EXTRACT(YEAR FROM now())::int
GROUP BY method ORDER BY cnt DESC;
SQL
echo ""
echo "--- 7.5-2c. fishery_code × match_method 교차 ---"
$PSQL_TABLE << 'SQL'
SELECT fishery_code,
count(*) FILTER (WHERE match_method = 'NAME_EXACT') exact,
count(*) FILTER (WHERE match_method = 'NAME_FUZZY') fuzzy,
count(*) FILTER (WHERE mmsi IS NULL) unmatched,
count(*) total
FROM kcg.fleet_vessels
WHERE permit_year = EXTRACT(YEAR FROM now())::int
GROUP BY fishery_code ORDER BY total DESC;
SQL
echo ""
echo "--- 7.5-3. vessel_analysis_results.gear_code 분포 (last 5min) ---"
$PSQL_TABLE << 'SQL'
SELECT coalesce(gear_code, '(null)') gear_code,
count(*) cnt,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
GROUP BY gear_code ORDER BY cnt DESC LIMIT 15;
SQL
echo ""
echo "--- 7.5-4. 최근 매칭된 선박 (top 10 by last_seen_at) ---"
$PSQL_TABLE << 'SQL'
SELECT permit_no, fishery_code, name_cn, mmsi, match_method,
match_confidence, last_seen_at
FROM kcg.fleet_vessels
WHERE permit_year = EXTRACT(YEAR FROM now())::int AND mmsi IS NOT NULL
ORDER BY last_seen_at DESC NULLS LAST LIMIT 10;
SQL
#===================================================================
# PART 8: 해역별 종합 교차
#===================================================================
echo ""
echo "================================================================="
echo "PART 8: 해역별 종합 교차표"
echo "================================================================="
$PSQL_TABLE << 'SQL'
SELECT zone_code, count(*) total,
count(*) FILTER (WHERE is_dark) dark,
count(*) FILTER (WHERE transship_suspect) transship,
count(*) FILTER (WHERE gear_judgment IS NOT NULL AND gear_judgment != '') gear_viol,
count(*) FILTER (WHERE risk_level='CRITICAL') crit,
count(*) FILTER (WHERE risk_level='HIGH') high,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
GROUP BY zone_code ORDER BY total DESC;
SQL
echo ""
echo "================================================================="
echo "END OF SNAPSHOT $STAMP"
echo "================================================================="
} > "$OUT" 2>&1
echo "[diag] $(date '+%H:%M:%S') saved: $OUT ($(wc -l < "$OUT") lines)"
sleep $INTERVAL_SEC
done