kcg-ai-monitoring/prediction/scripts/diagnostic-snapshot.sh

327 lines
13 KiB
Bash
Raw Blame 히스토리

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# prediction 알고리즘 진단 스냅샷 수집기 (5분 주기, 수동 종료까지 연속 실행)
#
# 용도: 알고리즘 재설계 후 동작 검증용. 단순 집계가 아닌 개별 판정 과정 추적.
# 실행: nohup bash /home/apps/kcg-ai-prediction/scripts/diagnostic-snapshot.sh &
# 종료: kill $(cat /home/apps/kcg-ai-prediction/data/diag/diag.pid)
# 출력: /home/apps/kcg-ai-prediction/data/diag/YYYYMMDD-HHMM.txt
set -u
OUTDIR=/home/apps/kcg-ai-prediction/data/diag
mkdir -p "$OUTDIR"
echo $$ > "$OUTDIR/diag.pid"
export PGPASSWORD=Kcg2026ai
PSQL="psql -U kcg-app -d kcgaidb -h 211.208.115.83 -P pager=off -x"
PSQL_TABLE="psql -U kcg-app -d kcgaidb -h 211.208.115.83 -P pager=off"
INTERVAL_SEC=300 # 5분
while true; do
STAMP=$(date '+%Y%m%d-%H%M')
OUT="$OUTDIR/$STAMP.txt"
{
echo "###################################################################"
echo "# PREDICTION DIAGNOSTIC SNAPSHOT"
echo "# generated: $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo "# host: $(hostname)"
echo "# interval: ${INTERVAL_SEC}s"
echo "###################################################################"
#===================================================================
# PART 1: 종합 지표
#===================================================================
echo ""
echo "================================================================="
echo "PART 1: 종합 지표 (last 5min)"
echo "================================================================="
$PSQL_TABLE << 'SQL'
SELECT count(*) total,
count(*) FILTER (WHERE vessel_type != 'UNKNOWN') pipeline,
count(*) FILTER (WHERE vessel_type = 'UNKNOWN') lightweight,
count(*) FILTER (WHERE is_dark) dark,
count(*) FILTER (WHERE transship_suspect) transship,
count(*) FILTER (WHERE risk_level='CRITICAL') crit,
count(*) FILTER (WHERE risk_level='HIGH') high,
round(avg(risk_score)::numeric, 1) avg_risk,
max(risk_score) max_risk,
round(count(*) FILTER (WHERE is_dark)::numeric / NULLIF(count(*), 0) * 100, 1) AS dark_pct
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes';
SQL
#===================================================================
# PART 2: 다크베셀 심층 진단
#===================================================================
echo ""
echo "================================================================="
echo "PART 2: DARK VESSEL 심층 진단"
echo "================================================================="
echo ""
echo "--- 2-1. dark_suspicion_score 히스토그램 ---"
$PSQL_TABLE << 'SQL'
SELECT CASE
WHEN (features->>'dark_suspicion_score')::int >= 90 THEN 'a_90-100 (CRITICAL_HIGH)'
WHEN (features->>'dark_suspicion_score')::int >= 70 THEN 'b_70-89 (CRITICAL)'
WHEN (features->>'dark_suspicion_score')::int >= 50 THEN 'c_50-69 (HIGH)'
WHEN (features->>'dark_suspicion_score')::int >= 30 THEN 'd_30-49 (WATCH)'
WHEN (features->>'dark_suspicion_score')::int >= 1 THEN 'e_1-29 (NONE_SCORED)'
ELSE 'f_0 (NOT_DARK)'
END bucket,
count(*) cnt,
round(avg(gap_duration_min)::numeric, 0) avg_gap_min,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND is_dark = true
GROUP BY bucket ORDER BY bucket;
SQL
echo ""
echo "--- 2-2. dark_patterns 발동 빈도 (어떤 규칙이 얼마나 적용되는지) ---"
$PSQL_TABLE << 'SQL'
SELECT pattern,
count(*) cnt,
round(count(*)::numeric / NULLIF((SELECT count(*) FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes' AND is_dark), 0) * 100, 1) AS pct
FROM kcg.vessel_analysis_results,
LATERAL jsonb_array_elements_text(features->'dark_patterns') AS pattern
WHERE analyzed_at > now() - interval '5 minutes'
AND is_dark = true
GROUP BY pattern ORDER BY cnt DESC;
SQL
echo ""
echo "--- 2-3. P9 선종별 dark 분포 (신규 패턴 검증) ---"
$PSQL_TABLE << 'SQL'
SELECT
CASE WHEN features->>'dark_patterns' LIKE '%fishing_vessel_dark%' THEN 'FISHING(+10)'
WHEN features->>'dark_patterns' LIKE '%cargo_natural_gap%' THEN 'CARGO(-10)'
ELSE 'NO_KIND_EFFECT' END AS p9_effect,
count(*) cnt,
round(avg((features->>'dark_suspicion_score')::int)::numeric, 1) avg_score,
round(avg(gap_duration_min)::numeric, 0) avg_gap
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND is_dark = true
GROUP BY p9_effect ORDER BY cnt DESC;
SQL
echo ""
echo "--- 2-4. P10 항해상태 dark 분포 (신규 패턴 검증) ---"
$PSQL_TABLE << 'SQL'
SELECT
CASE WHEN features->>'dark_patterns' LIKE '%underway_deliberate_off%' THEN 'UNDERWAY_OFF(+20)'
WHEN features->>'dark_patterns' LIKE '%anchored_natural_gap%' THEN 'ANCHORED(-15)'
ELSE 'NO_STATUS_EFFECT' END AS p10_effect,
count(*) cnt,
round(avg((features->>'dark_suspicion_score')::int)::numeric, 1) avg_score
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND is_dark = true
GROUP BY p10_effect ORDER BY cnt DESC;
SQL
echo ""
echo "--- 2-5. P11 heading/COG 불일치 (신규 패턴 검증) ---"
$PSQL_TABLE << 'SQL'
SELECT
CASE WHEN features->>'dark_patterns' LIKE '%heading_cog_mismatch%' THEN 'MISMATCH(+15)'
ELSE 'NO_MISMATCH' END AS p11_effect,
count(*) cnt,
round(avg((features->>'dark_suspicion_score')::int)::numeric, 1) avg_score
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND is_dark = true
GROUP BY p11_effect ORDER BY cnt DESC;
SQL
echo ""
echo "--- 2-6. GAP 구간별 dark_tier 교차표 (임계값 100분 검증) ---"
$PSQL_TABLE << 'SQL'
SELECT CASE
WHEN gap_duration_min < 100 THEN 'a_lt100 (NOT_DARK 예상)'
WHEN gap_duration_min < 180 THEN 'b_100-179'
WHEN gap_duration_min < 360 THEN 'c_180-359'
WHEN gap_duration_min < 720 THEN 'd_360-719'
ELSE 'e_gte720' END gap_bucket,
count(*) total,
count(*) FILTER (WHERE is_dark) dark,
count(*) FILTER (WHERE features->>'dark_tier' = 'CRITICAL') crit,
count(*) FILTER (WHERE features->>'dark_tier' = 'HIGH') high,
count(*) FILTER (WHERE features->>'dark_tier' = 'WATCH') watch,
count(*) FILTER (WHERE features->>'dark_tier' = 'NONE') tier_none
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
GROUP BY gap_bucket ORDER BY gap_bucket;
SQL
echo ""
echo "--- 2-7. CRITICAL dark 상위 10건 (개별 판정 상세) ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, gap_duration_min, zone_code, activity_state,
(features->>'dark_suspicion_score')::int AS score,
features->>'dark_tier' AS tier,
features->>'dark_patterns' AS patterns,
risk_score, risk_level
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND features->>'dark_tier' = 'CRITICAL'
ORDER BY (features->>'dark_suspicion_score')::int DESC
LIMIT 10;
SQL
#===================================================================
# PART 3: 환적 탐지 심층 진단
#===================================================================
echo ""
echo "================================================================="
echo "PART 3: TRANSSHIPMENT 심층 진단"
echo "================================================================="
echo ""
echo "--- 3-1. 환적 의심 건수 + 점수 분포 ---"
$PSQL_TABLE << 'SQL'
SELECT count(*) total_suspects,
count(*) FILTER (WHERE (features->>'transship_score')::numeric >= 70) critical,
count(*) FILTER (WHERE (features->>'transship_score')::numeric >= 50
AND (features->>'transship_score')::numeric < 70) high,
round(avg((features->>'transship_score')::numeric)::numeric, 1) avg_score,
round(avg(transship_duration_min)::numeric, 0) avg_duration_min
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND transship_suspect = true;
SQL
echo ""
echo "--- 3-2. 환적 의심 개별 건 상세 (전체) ---"
$PSQL_TABLE << 'SQL'
SELECT mmsi, transship_pair_mmsi AS pair_mmsi,
transship_duration_min AS dur_min,
(features->>'transship_score')::numeric AS score,
features->>'transship_tier' AS tier,
zone_code,
activity_state,
risk_score
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
AND transship_suspect = true
ORDER BY (features->>'transship_score')::numeric DESC;
SQL
echo ""
echo "--- 3-3. 환적 후보 선종 분포 (Stage 1 이종 쌍 검증) ---"
echo " (이 쿼리는 journalctl 로그에서 추출)"
journalctl -u kcg-ai-prediction --since '6 minutes ago' --no-pager 2>/dev/null | \
grep -o 'transshipment:.*' | tail -1
#===================================================================
# PART 4: 이벤트 + KPI
#===================================================================
echo ""
echo "================================================================="
echo "PART 4: 이벤트 + KPI (시스템 출력 검증)"
echo "================================================================="
echo ""
echo "--- 4-1. prediction_events (last 5min) ---"
$PSQL_TABLE << 'SQL'
SELECT category, level, count(*) cnt
FROM kcg.prediction_events
WHERE created_at > now() - interval '5 minutes'
GROUP BY category, level ORDER BY cnt DESC;
SQL
echo ""
echo "--- 4-2. KPI 실시간 ---"
$PSQL_TABLE << 'SQL'
SELECT kpi_key, value, trend, delta_pct, updated_at
FROM kcg.prediction_kpi_realtime ORDER BY kpi_key;
SQL
#===================================================================
# PART 5: signal-batch 정적정보 보강 검증
#===================================================================
echo ""
echo "================================================================="
echo "PART 5: signal-batch 정적정보 보강 검증"
echo "================================================================="
echo ""
echo "--- 5-1. 직전 사이클 enrich 로그 ---"
journalctl -u kcg-ai-prediction --since '6 minutes ago' --no-pager 2>/dev/null | \
grep -E 'signal-batch enrich|fetch_recent_detail' | tail -2
echo ""
echo "--- 5-2. features 내 신규 패턴(P9/P10/P11) 적용 비율 ---"
$PSQL_TABLE << 'SQL'
WITH dark_vessels AS (
SELECT features FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes' AND is_dark = true
)
SELECT
count(*) AS total_dark,
count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%fishing_vessel_dark%'
OR features->>'dark_patterns' LIKE '%cargo_natural_gap%') AS p9_applied,
count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%underway_deliberate_off%'
OR features->>'dark_patterns' LIKE '%anchored_natural_gap%') AS p10_applied,
count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%heading_cog_mismatch%') AS p11_applied,
round(count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%fishing_vessel_dark%'
OR features->>'dark_patterns' LIKE '%cargo_natural_gap%')::numeric
/ NULLIF(count(*), 0) * 100, 1) AS p9_pct,
round(count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%underway_deliberate_off%'
OR features->>'dark_patterns' LIKE '%anchored_natural_gap%')::numeric
/ NULLIF(count(*), 0) * 100, 1) AS p10_pct,
round(count(*) FILTER (WHERE features->>'dark_patterns' LIKE '%heading_cog_mismatch%')::numeric
/ NULLIF(count(*), 0) * 100, 1) AS p11_pct
FROM dark_vessels;
SQL
#===================================================================
# PART 6: 사이클 로그 (직전 6분)
#===================================================================
echo ""
echo "================================================================="
echo "PART 6: 사이클 로그 (최근 6분)"
echo "================================================================="
journalctl -u kcg-ai-prediction --since '6 minutes ago' --no-pager 2>/dev/null | \
grep -E 'analysis cycle:|lightweight analysis:|pipeline dark:|event_generator:|kpi_writer:|stats_aggregator|enrich|transship|ERROR|Traceback' | \
tail -20
#===================================================================
# PART 7: 해역별 + 위험도 교차 (운영 지표)
#===================================================================
echo ""
echo "================================================================="
echo "PART 7: 해역별 × 위험도 교차표"
echo "================================================================="
$PSQL_TABLE << 'SQL'
SELECT zone_code,
count(*) total,
count(*) FILTER (WHERE is_dark) dark,
count(*) FILTER (WHERE risk_level='CRITICAL') crit,
count(*) FILTER (WHERE risk_level='HIGH') high,
round(avg(risk_score)::numeric, 1) avg_risk,
count(*) FILTER (WHERE transship_suspect) transship
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '5 minutes'
GROUP BY zone_code ORDER BY total DESC;
SQL
echo ""
echo "================================================================="
echo "END OF SNAPSHOT $STAMP"
echo "================================================================="
} > "$OUT" 2>&1
echo "[diag] $(date '+%H:%M:%S') saved: $OUT ($(wc -l < "$OUT") lines)"
sleep $INTERVAL_SEC
done