kcg-ai-monitoring/prediction/scripts/hourly-analysis-snapshot.sh
htlee 37ae1bfa48 feat(prediction): 서버 스크립트에 tier/match_method/G-02/G-03 추적 추가
scheduler.py features write 확장:
- pair_tier (STRONG/PROBABLE/SUSPECT)
- pair_type, pair_reject_reason
- similarity, confidence
- registered_fishery_code
→ SUSPECT tier 까지 raw_pair 로 보존하여 통계 집계 가능

diagnostic-snapshot.sh (5분 주기):
- 4-4.1 pair_trawl tier 분포 + avg_sync_min
- 4-4.2 reject 사유 journal 로그 tail
- 4-4.3 G-02 금어기 상세 (observed_at, fishery_code)
- 4-4.4 G-03 미등록 어구 상세 (detected/registered/allowed)
- 7.5-2b match_method 분포 (EXACT vs FUZZY)
- 7.5-2c fishery_code × match_method 교차

hourly-analysis-snapshot.sh (1시간 주기):
- P3.5 match_method 분포 + avg_confidence
- P3.6 fishery_code × match_method 교차
- D3.6 pair_tier 분포 + avg_sync_min + avg_sep_nm
- D3.7 G-02/G-03 건수 + gear_judgment 분포
- D3.8 reject 사유 1시간 journal 집계
2026-04-16 09:50:42 +09:00

390 lines
15 KiB
Bash
Executable File
Raw Blame 히스토리

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# prediction 시간당 상태 스냅샷 수집기 (DAR-03 G코드 + 어구 위반 포함)
# 실행 환경: redis-211 서버 (prediction 서비스 호스트)
# cron: 0 * * * * /home/apps/kcg-ai-prediction/scripts/hourly-analysis-snapshot.sh
#
# 출력: /home/apps/kcg-ai-prediction/data/hourly-analysis/YYYYMMDD-HHMM.txt
set -u
OUTDIR=/home/apps/kcg-ai-prediction/data/hourly-analysis
mkdir -p "$OUTDIR"
STAMP=$(date '+%Y%m%d-%H%M')
OUT="$OUTDIR/$STAMP.txt"
export PGPASSWORD=Kcg2026ai
PSQL="psql -U kcg-app -d kcgaidb -h 211.208.115.83 -P pager=off"
{
echo "# prediction hourly snapshot (DAR-03 enhanced)"
echo "# generated: $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo "# host: $(hostname)"
echo ""
$PSQL << 'SQL'
\echo === 1. VESSEL_ANALYSIS overview (last 1h) ===
SELECT count(*) total,
count(*) FILTER (WHERE vessel_type != 'UNKNOWN') pipeline_path,
count(*) FILTER (WHERE vessel_type = 'UNKNOWN') lightweight_path,
count(*) FILTER (WHERE is_dark) dark,
count(*) FILTER (WHERE spoofing_score > 0.5) spoof_hi,
count(*) FILTER (WHERE transship_suspect) transship,
count(*) FILTER (WHERE gear_judgment IS NOT NULL AND gear_judgment != '') gear_violation,
count(*) FILTER (WHERE risk_level='CRITICAL') crit_lvl,
count(*) FILTER (WHERE risk_level='HIGH') high_lvl,
max(risk_score) max_risk,
round(avg(risk_score)::numeric, 2) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour';
\echo
\echo === 2. ZONE x DARK x GEAR_VIOLATION distribution ===
SELECT zone_code,
count(*) total,
count(*) FILTER (WHERE is_dark) dark,
count(*) FILTER (WHERE transship_suspect) transship,
count(*) FILTER (WHERE gear_judgment IS NOT NULL AND gear_judgment != '') gear_viol,
count(*) FILTER (WHERE risk_score >= 70) crit,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
GROUP BY zone_code ORDER BY total DESC;
\echo
\echo === 3. DARK GAP distribution ===
SELECT CASE
WHEN gap_duration_min < 30 THEN 'a_lt30'
WHEN gap_duration_min < 60 THEN 'b_30-59'
WHEN gap_duration_min < 120 THEN 'c_60-119'
WHEN gap_duration_min < 360 THEN 'd_120-359'
WHEN gap_duration_min < 1440 THEN 'e_360-1439'
ELSE 'f_gte1440' END gap_bucket,
count(*) total,
count(*) FILTER (WHERE is_dark) dark,
count(*) FILTER (WHERE is_dark AND vessel_type='UNKNOWN') dark_lw,
count(*) FILTER (WHERE is_dark AND vessel_type!='UNKNOWN') dark_pipeline
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
GROUP BY gap_bucket ORDER BY gap_bucket;
\echo
\echo === 4. DARK by activity_state ===
SELECT activity_state, count(*), round(avg(gap_duration_min)::numeric, 0) avg_gap
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour' AND is_dark
GROUP BY activity_state ORDER BY count DESC;
\echo
\echo === 5. DARK sample top 20 by gap ===
SELECT mmsi, zone_code, activity_state, gap_duration_min, risk_score
FROM (
SELECT DISTINCT ON (mmsi) mmsi, zone_code, activity_state, gap_duration_min,
risk_score, analyzed_at
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour' AND is_dark
ORDER BY mmsi, analyzed_at DESC
) latest ORDER BY gap_duration_min DESC LIMIT 20;
\echo
\echo === 6. EVENTS last 1h by category x level ===
SELECT category, level, count(*) cnt
FROM kcg.prediction_events
WHERE created_at > now() - interval '1 hour'
GROUP BY category, level ORDER BY cnt DESC;
\echo
\echo === 7. STATS_HOURLY latest 3 rows ===
SELECT stat_hour, total_detections, event_count, critical_count,
by_category::text, by_zone::text
FROM kcg.prediction_stats_hourly
ORDER BY stat_hour DESC LIMIT 3;
\echo
\echo === 8. KPI REALTIME ===
SELECT kpi_key, value, trend, delta_pct, updated_at
FROM kcg.prediction_kpi_realtime ORDER BY kpi_key;
\echo
\echo === 9. RISK_SCORE histogram ===
SELECT CASE
WHEN risk_score < 10 THEN 'a_0-9'
WHEN risk_score < 30 THEN 'b_10-29'
WHEN risk_score < 50 THEN 'c_30-49'
WHEN risk_score < 70 THEN 'd_50-69'
WHEN risk_score < 90 THEN 'e_70-89'
ELSE 'f_90-100' END bucket,
count(*) cnt,
count(*) FILTER (WHERE vessel_type='UNKNOWN') lightweight
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
GROUP BY bucket ORDER BY bucket;
\echo
\echo === 10. TRANSSHIP + SPOOF + FLEET ===
SELECT
count(*) FILTER (WHERE transship_suspect) transship_ct,
count(*) FILTER (WHERE spoofing_score > 0.7) spoof_gt070,
count(*) FILTER (WHERE speed_jump_count > 0) speed_jumps,
count(*) FILTER (WHERE fleet_is_leader) fleet_leader,
count(DISTINCT fleet_cluster_id) FILTER (WHERE fleet_cluster_id > 0) fleet_clusters
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour';
\echo
\echo ===================================================================
\echo === FISHERY PERMIT CN REGISTRY (V029 - 한중어업협정)
\echo ===================================================================
\echo
\echo === P1. fishery_permit_cn year-by-year ===
SELECT permit_year, count(*) permits,
count(DISTINCT fishery_code) codes,
count(DISTINCT applicant_cn) applicants,
max(loaded_at) loaded_at
FROM kcg.fishery_permit_cn
GROUP BY permit_year ORDER BY permit_year DESC;
\echo
\echo === P2. fleet_vessels matching (current year = registry) ===
SELECT permit_year,
count(*) total,
count(mmsi) with_mmsi,
round(count(mmsi)::numeric / NULLIF(count(*),0) * 100, 1) match_pct,
max(last_seen_at) last_match
FROM kcg.fleet_vessels
WHERE permit_year IS NOT NULL
GROUP BY permit_year ORDER BY permit_year DESC;
\echo
\echo === P3. fleet_vessels breakdown by fishery_code (current year) ===
SELECT fishery_code, count(*) total, count(mmsi) matched,
round(count(mmsi)::numeric / NULLIF(count(*),0) * 100, 1) pct
FROM kcg.fleet_vessels
WHERE permit_year = EXTRACT(YEAR FROM now())::int
GROUP BY fishery_code ORDER BY total DESC;
\echo
\echo === P3.5 match_method distribution (NAME_EXACT vs NAME_FUZZY) ===
SELECT coalesce(match_method, '(unmatched)') method,
count(*) cnt,
round(avg(match_confidence)::numeric, 3) avg_conf
FROM kcg.fleet_vessels
WHERE permit_year = EXTRACT(YEAR FROM now())::int
GROUP BY method ORDER BY cnt DESC;
\echo
\echo === P3.6 fishery_code × match_method cross ===
SELECT fishery_code,
count(*) FILTER (WHERE match_method = 'NAME_EXACT') exact,
count(*) FILTER (WHERE match_method = 'NAME_FUZZY') fuzzy,
count(*) FILTER (WHERE mmsi IS NULL) unmatched,
count(*) total
FROM kcg.fleet_vessels
WHERE permit_year = EXTRACT(YEAR FROM now())::int
GROUP BY fishery_code ORDER BY total DESC;
\echo
\echo === P4. vessel_analysis_results.gear_code distribution (last 1h) ===
SELECT coalesce(gear_code, '(null)') gear_code,
count(*) cnt,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
GROUP BY gear_code ORDER BY cnt DESC LIMIT 15;
\echo
\echo === P5. fleet_role distribution (last 1h, from registry match) ===
SELECT fleet_role, count(*) cnt,
count(*) FILTER (WHERE fleet_is_leader) is_leader,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
GROUP BY fleet_role ORDER BY cnt DESC;
\echo
\echo === G1. PIPELINE vessel_type distribution ===
SELECT vessel_type, count(*),
round(avg(ucaf_score)::numeric, 3) avg_ucaf,
round(avg(ucft_score)::numeric, 3) avg_ucft,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
GROUP BY vessel_type ORDER BY count DESC;
\echo
\echo === G2. GEAR_GROUP_PARENT_RESOLUTION ===
SELECT status, count(*),
round(avg(confidence)::numeric, 3) avg_conf,
round(avg(top_score)::numeric, 3) avg_top,
round(avg(stable_cycles)::numeric, 1) avg_stable
FROM kcg.gear_group_parent_resolution
GROUP BY status ORDER BY count DESC;
\echo
\echo === G3. GEAR_CORRELATION_SCORES distribution ===
SELECT CASE
WHEN current_score < 0.3 THEN 'a_lt0.3'
WHEN current_score < 0.5 THEN 'b_0.3-0.5'
WHEN current_score < 0.7 THEN 'c_0.5-0.7'
WHEN current_score < 0.85 THEN 'd_0.7-0.85'
ELSE 'e_gte0.85' END bucket,
count(*),
count(DISTINCT group_key) uniq_groups,
count(DISTINCT target_mmsi) uniq_targets
FROM kcg.gear_correlation_scores
WHERE updated_at > now() - interval '1 hour'
GROUP BY bucket ORDER BY bucket;
\echo
\echo ===================================================================
\echo === DAR-03 G-CODE DIAGNOSTICS (last 1h)
\echo ===================================================================
\echo
\echo === D1. gear_judgment distribution ===
SELECT coalesce(NULLIF(gear_judgment, ''), '(none)') judgment,
count(*) cnt,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
GROUP BY judgment ORDER BY cnt DESC;
\echo
\echo === D2. G-code frequency ===
SELECT gcode, count(*) cnt,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results,
LATERAL jsonb_array_elements_text(features->'g_codes') AS gcode
WHERE analyzed_at > now() - interval '1 hour'
GROUP BY gcode ORDER BY cnt DESC;
\echo
\echo === D3. G-01 zone x gear cross-table (VIOLATION CHECK) ===
SELECT zone_code, vessel_type, count(*) total,
count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-01%') g01_violation
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
AND vessel_type != 'UNKNOWN' AND zone_code LIKE 'ZONE_%'
GROUP BY zone_code, vessel_type ORDER BY zone_code, vessel_type;
\echo
\echo === D3.5 pair_type distribution (DAR-03 base-target 탐색) ===
SELECT coalesce(features->>'pair_type', '(none)') pair_type,
count(*) cnt,
round(avg((features->>'similarity')::numeric)::numeric, 3) avg_sim,
round(avg((features->>'confidence')::numeric)::numeric, 3) avg_conf
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
AND features->>'pair_trawl_detected' = 'true'
GROUP BY pair_type ORDER BY cnt DESC;
\echo
\echo === D3.6 pair_trawl tier distribution (signal-strength tier) ===
SELECT coalesce(features->>'pair_tier', '(none)') tier,
count(*) cnt,
round(avg((features->>'similarity')::numeric)::numeric, 3) avg_sim,
round(avg((features->'gear_violation_evidence'->'G-06'->>'sync_duration_min')::numeric)::numeric, 1) avg_sync_min,
round(avg((features->'gear_violation_evidence'->'G-06'->>'mean_separation_nm')::numeric)::numeric, 3) avg_sep_nm
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
AND features->>'pair_trawl_detected' = 'true'
GROUP BY tier ORDER BY cnt DESC;
\echo
\echo === D3.7 G-02 closed-season + G-03 unregistered-gear counts ===
SELECT
count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-02%') g02_count,
count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-03%') g03_count,
count(*) FILTER (WHERE features->>'gear_judgment' = 'CLOSED_SEASON_FISHING') judg_closed,
count(*) FILTER (WHERE features->>'gear_judgment' = 'UNREGISTERED_GEAR') judg_unreg
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour';
\echo
\echo === D3.8 pair detection reject breakdown (last 1h journal) ===
\! ssh redis-211 "sudo journalctl -u kcg-ai-prediction --no-pager --since '1 hour ago' | grep -oE 'pair detection:[^$]+reject=\{[^}]+\}' | awk -F'reject=' '{print $2}' | sort | uniq -c | sort -rn | head -10" 2>/dev/null || true
\echo
\echo === D4. G-06 pair trawl detections ===
SELECT mmsi, zone_code, vessel_type, risk_score,
(features->'gear_violation_evidence'->'G-06'->>'sync_duration_min') sync_min,
(features->'gear_violation_evidence'->'G-06'->>'mean_separation_nm') sep_nm,
(features->'gear_violation_evidence'->'G-06'->>'pair_mmsi') pair_mmsi,
features->>'pair_trawl_detected' pt
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
AND (features->>'pair_trawl_detected' = 'true' OR features->>'g_codes' LIKE '%G-06%')
ORDER BY risk_score DESC LIMIT 20;
\echo
\echo === D5. G-04 MMSI tampering + G-05 gear drift ===
SELECT mmsi, zone_code, vessel_type, risk_score,
features->>'g_codes' g_codes,
(features->'gear_violation_evidence'->'G-04'->>'cycling_count') g04_cycle,
(features->'gear_violation_evidence'->'G-05'->>'drift_nm') g05_drift
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
AND (features->>'g_codes' LIKE '%G-04%' OR features->>'g_codes' LIKE '%G-05%')
ORDER BY risk_score DESC LIMIT 20;
\echo
\echo === D6. GEAR_ILLEGAL events (last 1h) ===
SELECT category, level, title, count(*) cnt
FROM kcg.prediction_events
WHERE created_at > now() - interval '1 hour'
AND category IN ('GEAR_ILLEGAL', 'MMSI_TAMPERING')
GROUP BY category, level, title ORDER BY cnt DESC;
\echo
\echo === D7. violation_categories ILLEGAL_GEAR breakdown ===
SELECT count(*) total,
count(*) FILTER (WHERE gear_judgment = 'ZONE_VIOLATION') zone_viol,
count(*) FILTER (WHERE gear_judgment = 'PAIR_TRAWL') pair_trawl,
count(*) FILTER (WHERE gear_judgment = 'GEAR_MISMATCH') mismatch
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
AND 'ILLEGAL_GEAR' = ANY(violation_categories);
\echo
\echo === D8. gear_violation_score histogram (pipeline vessels) ===
SELECT CASE
WHEN (features->>'gear_violation_score')::int = 0 THEN 'a_0 (no violation)'
WHEN (features->>'gear_violation_score')::int <= 15 THEN 'b_1-15 (G-01 or G-04/G-05)'
WHEN (features->>'gear_violation_score')::int <= 25 THEN 'c_16-25 (G-06 or combo)'
ELSE 'd_gt25 (multiple G-codes)' END bucket,
count(*) cnt,
round(avg(risk_score)::numeric, 1) avg_risk
FROM kcg.vessel_analysis_results
WHERE analyzed_at > now() - interval '1 hour'
AND vessel_type != 'UNKNOWN'
AND features->>'gear_violation_score' IS NOT NULL
GROUP BY bucket ORDER BY bucket;
\echo
\echo === D9. EVENTS 24h hourly trend (with GEAR_ILLEGAL) ===
SELECT date_trunc('hour', occurred_at AT TIME ZONE 'Asia/Seoul') hr,
count(*) tot,
count(*) FILTER (WHERE category='DARK_VESSEL') dark,
count(*) FILTER (WHERE category='ILLEGAL_TRANSSHIP') transship,
count(*) FILTER (WHERE category='EEZ_INTRUSION') eez,
count(*) FILTER (WHERE category='GEAR_ILLEGAL') gear_illegal,
count(*) FILTER (WHERE category='HIGH_RISK_VESSEL') high_risk,
count(*) FILTER (WHERE level='CRITICAL') critical
FROM kcg.prediction_events
WHERE created_at > now() - interval '24 hours'
GROUP BY hr ORDER BY hr DESC LIMIT 25;
SQL
echo ""
echo "=== 13. CYCLE LOG (last 65 min) ==="
journalctl -u kcg-ai-prediction --since '65 minutes ago' --no-pager 2>/dev/null | \
grep -E 'lightweight|event_generator:|stats_aggregator hourly|kpi_writer:|analysis cycle:|pair_trawl|gear_violation|GEAR_ILLEGAL|ERROR|Traceback' | \
tail -60
echo ""
echo "=== END ==="
} > "$OUT" 2>&1
echo "[snapshot] saved: $OUT"