#!/bin/bash # prediction 알고리즘 진단 스냅샷 수집기 (5분 주기, 수동 종료까지 연속 실행) # # 용도: 알고리즘 재설계 후 동작 검증용. 단순 집계가 아닌 개별 판정 과정 추적. # 실행: nohup bash /home/apps/kcg-ai-prediction/scripts/diagnostic-snapshot.sh & # 종료: kill $(cat /home/apps/kcg-ai-prediction/data/diag/diag.pid) # 출력: /home/apps/kcg-ai-prediction/data/diag/YYYYMMDD-HHMM.txt set -u OUTDIR=/home/apps/kcg-ai-prediction/data/diag mkdir -p "$OUTDIR" echo $$ > "$OUTDIR/diag.pid" export PGPASSWORD=Kcg2026ai PSQL="psql -U kcg-app -d kcgaidb -h 211.208.115.83 -P pager=off -x" PSQL_TABLE="psql -U kcg-app -d kcgaidb -h 211.208.115.83 -P pager=off" INTERVAL_SEC=300 # 5분 while true; do STAMP=$(date '+%Y%m%d-%H%M') OUT="$OUTDIR/$STAMP.txt" { echo "###################################################################" echo "# PREDICTION DIAGNOSTIC SNAPSHOT" echo "# generated: $(date '+%Y-%m-%d %H:%M:%S %Z')" echo "# host: $(hostname)" echo "# interval: ${INTERVAL_SEC}s" echo "###################################################################" #=================================================================== # PART 1: 종합 지표 #=================================================================== echo "" echo "=================================================================" echo "PART 1: 종합 지표 (last 5min)" echo "=================================================================" $PSQL_TABLE << 'SQL' SELECT count(*) total, count(*) FILTER (WHERE vessel_type != 'UNKNOWN') pipeline, count(*) FILTER (WHERE vessel_type = 'UNKNOWN') lightweight, count(*) FILTER (WHERE is_dark) dark, count(*) FILTER (WHERE transship_suspect) transship, count(*) FILTER (WHERE risk_level='CRITICAL') crit, count(*) FILTER (WHERE risk_level='HIGH') high, round(avg(risk_score)::numeric, 1) avg_risk, max(risk_score) max_risk, round(count(*) FILTER (WHERE is_dark)::numeric / NULLIF(count(*), 0) * 100, 1) AS dark_pct FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes'; SQL #=================================================================== # PART 2: 다크베셀 심층 진단 #=================================================================== echo "" echo "=================================================================" echo "PART 2: DARK VESSEL 심층 진단" echo "=================================================================" echo "" echo "--- 2-1. dark_suspicion_score 히스토그램 ---" $PSQL_TABLE << 'SQL' SELECT CASE WHEN (features->>'dark_suspicion_score')::int >= 90 THEN 'a_90-100 (CRITICAL_HIGH)' WHEN (features->>'dark_suspicion_score')::int >= 70 THEN 'b_70-89 (CRITICAL)' WHEN (features->>'dark_suspicion_score')::int >= 50 THEN 'c_50-69 (HIGH)' WHEN (features->>'dark_suspicion_score')::int >= 30 THEN 'd_30-49 (WATCH)' WHEN (features->>'dark_suspicion_score')::int >= 1 THEN 'e_1-29 (NONE_SCORED)' ELSE 'f_0 (NOT_DARK)' END bucket, count(*) cnt, round(avg(gap_duration_min)::numeric, 0) avg_gap_min, round(avg(risk_score)::numeric, 1) avg_risk FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND is_dark = true GROUP BY bucket ORDER BY bucket; SQL echo "" echo "--- 2-2. dark_patterns 발동 빈도 (어떤 규칙이 얼마나 적용되는지) ---" $PSQL_TABLE << 'SQL' SELECT pattern, count(*) cnt, round(count(*)::numeric / NULLIF((SELECT count(*) FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND is_dark), 0) * 100, 1) AS pct FROM kcg.vessel_analysis_results, LATERAL jsonb_array_elements_text(features->'dark_patterns') AS pattern WHERE analyzed_at > now() - interval '5 minutes' AND is_dark = true GROUP BY pattern ORDER BY cnt DESC; SQL echo "" echo "--- 2-3. P9 선종별 dark 분포 (신규 패턴 검증) ---" $PSQL_TABLE << 'SQL' SELECT CASE WHEN features->>'dark_patterns' LIKE '%fishing_vessel_dark%' THEN 'FISHING(+10)' WHEN features->>'dark_patterns' LIKE '%cargo_natural_gap%' THEN 'CARGO(-10)' ELSE 'NO_KIND_EFFECT' END AS p9_effect, count(*) cnt, round(avg((features->>'dark_suspicion_score')::int)::numeric, 1) avg_score, round(avg(gap_duration_min)::numeric, 0) avg_gap FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND is_dark = true GROUP BY p9_effect ORDER BY cnt DESC; SQL echo "" echo "--- 2-4. P10 항해상태 dark 분포 (신규 패턴 검증) ---" $PSQL_TABLE << 'SQL' SELECT CASE WHEN features->>'dark_patterns' LIKE '%underway_deliberate_off%' THEN 'UNDERWAY_OFF(+20)' WHEN features->>'dark_patterns' LIKE '%anchored_natural_gap%' THEN 'ANCHORED(-15)' ELSE 'NO_STATUS_EFFECT' END AS p10_effect, count(*) cnt, round(avg((features->>'dark_suspicion_score')::int)::numeric, 1) avg_score FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND is_dark = true GROUP BY p10_effect ORDER BY cnt DESC; SQL echo "" echo "--- 2-5. P11 heading/COG 불일치 (신규 패턴 검증) ---" $PSQL_TABLE << 'SQL' SELECT CASE WHEN features->>'dark_patterns' LIKE '%heading_cog_mismatch%' THEN 'MISMATCH(+15)' ELSE 'NO_MISMATCH' END AS p11_effect, count(*) cnt, round(avg((features->>'dark_suspicion_score')::int)::numeric, 1) avg_score FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND is_dark = true GROUP BY p11_effect ORDER BY cnt DESC; SQL echo "" echo "--- 2-6. GAP 구간별 dark_tier 교차표 (임계값 100분 검증) ---" $PSQL_TABLE << 'SQL' SELECT CASE WHEN gap_duration_min < 100 THEN 'a_lt100 (NOT_DARK 예상)' WHEN gap_duration_min < 180 THEN 'b_100-179' WHEN gap_duration_min < 360 THEN 'c_180-359' WHEN gap_duration_min < 720 THEN 'd_360-719' ELSE 'e_gte720' END gap_bucket, count(*) total, count(*) FILTER (WHERE is_dark) dark, count(*) FILTER (WHERE features->>'dark_tier' = 'CRITICAL') crit, count(*) FILTER (WHERE features->>'dark_tier' = 'HIGH') high, count(*) FILTER (WHERE features->>'dark_tier' = 'WATCH') watch, count(*) FILTER (WHERE features->>'dark_tier' = 'NONE') tier_none FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' GROUP BY gap_bucket ORDER BY gap_bucket; SQL echo "" echo "--- 2-7. CRITICAL dark 상위 10건 (개별 판정 상세) ---" $PSQL_TABLE << 'SQL' SELECT mmsi, gap_duration_min, zone_code, activity_state, (features->>'dark_suspicion_score')::int AS score, features->>'dark_tier' AS tier, features->>'dark_patterns' AS patterns, risk_score, risk_level FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND features->>'dark_tier' = 'CRITICAL' ORDER BY (features->>'dark_suspicion_score')::int DESC LIMIT 10; SQL #=================================================================== # PART 3: 환적 탐지 심층 진단 #=================================================================== echo "" echo "=================================================================" echo "PART 3: TRANSSHIPMENT 심층 진단" echo "=================================================================" echo "" echo "--- 3-1. 환적 의심 건수 + 점수 분포 ---" $PSQL_TABLE << 'SQL' SELECT count(*) total_suspects, count(*) FILTER (WHERE (features->>'transship_score')::numeric >= 70) critical, count(*) FILTER (WHERE (features->>'transship_score')::numeric >= 50 AND (features->>'transship_score')::numeric < 70) high, round(avg((features->>'transship_score')::numeric)::numeric, 1) avg_score, round(avg(transship_duration_min)::numeric, 0) avg_duration_min FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND transship_suspect = true; SQL echo "" echo "--- 3-2. 환적 의심 개별 건 상세 (전체) ---" $PSQL_TABLE << 'SQL' SELECT mmsi, transship_pair_mmsi AS pair_mmsi, transship_duration_min AS dur_min, (features->>'transship_score')::numeric AS score, features->>'transship_tier' AS tier, zone_code, activity_state, risk_score FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND transship_suspect = true ORDER BY (features->>'transship_score')::numeric DESC; SQL echo "" echo "--- 3-3. 환적 후보 선종 분포 (Stage 1 이종 쌍 검증) ---" echo " (이 쿼리는 journalctl 로그에서 추출)" journalctl -u kcg-ai-prediction --since '6 minutes ago' --no-pager 2>/dev/null | \ grep -o 'transshipment:.*' | tail -1 #=================================================================== # PART 4: 이벤트 + KPI #=================================================================== echo "" echo "=================================================================" echo "PART 4: 이벤트 + KPI (시스템 출력 검증)" echo "=================================================================" echo "" echo "--- 4-1. prediction_events (last 5min) ---" $PSQL_TABLE << 'SQL' SELECT category, level, count(*) cnt FROM kcg.prediction_events WHERE created_at > now() - interval '5 minutes' GROUP BY category, level ORDER BY cnt DESC; SQL echo "" echo "--- 4-2. KPI 실시간 ---" $PSQL_TABLE << 'SQL' SELECT kpi_key, value, trend, delta_pct, updated_at FROM kcg.prediction_kpi_realtime ORDER BY kpi_key; SQL #=================================================================== # PART 5: signal-batch 정적정보 보강 검증 #=================================================================== echo "" echo "=================================================================" echo "PART 5: signal-batch 정적정보 보강 검증" echo "=================================================================" echo "" echo "--- 5-1. 직전 사이클 enrich 로그 ---" journalctl -u kcg-ai-prediction --since '6 minutes ago' --no-pager 2>/dev/null | \ grep -E 'signal-batch enrich|fetch_recent_detail' | tail -2 echo "" echo "--- 5-2. features 내 신규 패턴(P9/P10/P11) 적용 비율 ---" $PSQL_TABLE << 'SQL' WITH dark_vessels AS ( SELECT features FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' AND is_dark = true ) SELECT count(*) AS total_dark, count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%fishing_vessel_dark%' OR features->>'dark_patterns' LIKE '%cargo_natural_gap%') AS p9_applied, count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%underway_deliberate_off%' OR features->>'dark_patterns' LIKE '%anchored_natural_gap%') AS p10_applied, count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%heading_cog_mismatch%') AS p11_applied, round(count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%fishing_vessel_dark%' OR features->>'dark_patterns' LIKE '%cargo_natural_gap%')::numeric / NULLIF(count(*), 0) * 100, 1) AS p9_pct, round(count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%underway_deliberate_off%' OR features->>'dark_patterns' LIKE '%anchored_natural_gap%')::numeric / NULLIF(count(*), 0) * 100, 1) AS p10_pct, round(count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%heading_cog_mismatch%')::numeric / NULLIF(count(*), 0) * 100, 1) AS p11_pct FROM dark_vessels; SQL #=================================================================== # PART 6: 사이클 로그 (직전 6분) #=================================================================== echo "" echo "=================================================================" echo "PART 6: 사이클 로그 (최근 6분)" echo "=================================================================" journalctl -u kcg-ai-prediction --since '6 minutes ago' --no-pager 2>/dev/null | \ grep -E 'analysis cycle:|lightweight analysis:|pipeline dark:|event_generator:|kpi_writer:|stats_aggregator|enrich|transship|ERROR|Traceback' | \ tail -20 #=================================================================== # PART 7: 해역별 + 위험도 교차 (운영 지표) #=================================================================== echo "" echo "=================================================================" echo "PART 7: 해역별 × 위험도 교차표" echo "=================================================================" $PSQL_TABLE << 'SQL' SELECT zone_code, count(*) total, count(*) FILTER (WHERE is_dark) dark, count(*) FILTER (WHERE risk_level='CRITICAL') crit, count(*) FILTER (WHERE risk_level='HIGH') high, round(avg(risk_score)::numeric, 1) avg_risk, count(*) FILTER (WHERE transship_suspect) transship FROM kcg.vessel_analysis_results WHERE analyzed_at > now() - interval '5 minutes' GROUP BY zone_code ORDER BY total DESC; SQL echo "" echo "=================================================================" echo "END OF SNAPSHOT $STAMP" echo "=================================================================" } > "$OUT" 2>&1 echo "[diag] $(date '+%H:%M:%S') saved: $OUT ($(wc -l < "$OUT") lines)" sleep $INTERVAL_SEC done