scheduler.py features write 확장: - pair_tier (STRONG/PROBABLE/SUSPECT) - pair_type, pair_reject_reason - similarity, confidence - registered_fishery_code → SUSPECT tier 까지 raw_pair 로 보존하여 통계 집계 가능 diagnostic-snapshot.sh (5분 주기): - 4-4.1 pair_trawl tier 분포 + avg_sync_min - 4-4.2 reject 사유 journal 로그 tail - 4-4.3 G-02 금어기 상세 (observed_at, fishery_code) - 4-4.4 G-03 미등록 어구 상세 (detected/registered/allowed) - 7.5-2b match_method 분포 (EXACT vs FUZZY) - 7.5-2c fishery_code × match_method 교차 hourly-analysis-snapshot.sh (1시간 주기): - P3.5 match_method 분포 + avg_confidence - P3.6 fishery_code × match_method 교차 - D3.6 pair_tier 분포 + avg_sync_min + avg_sep_nm - D3.7 G-02/G-03 건수 + gear_judgment 분포 - D3.8 reject 사유 1시간 journal 집계
390 lines
15 KiB
Bash
Executable File
390 lines
15 KiB
Bash
Executable File
#!/bin/bash
|
||
# prediction 시간당 상태 스냅샷 수집기 (DAR-03 G코드 + 어구 위반 포함)
|
||
# 실행 환경: redis-211 서버 (prediction 서비스 호스트)
|
||
# cron: 0 * * * * /home/apps/kcg-ai-prediction/scripts/hourly-analysis-snapshot.sh
|
||
#
|
||
# 출력: /home/apps/kcg-ai-prediction/data/hourly-analysis/YYYYMMDD-HHMM.txt
|
||
|
||
set -u
|
||
|
||
OUTDIR=/home/apps/kcg-ai-prediction/data/hourly-analysis
|
||
mkdir -p "$OUTDIR"
|
||
STAMP=$(date '+%Y%m%d-%H%M')
|
||
OUT="$OUTDIR/$STAMP.txt"
|
||
|
||
export PGPASSWORD=Kcg2026ai
|
||
PSQL="psql -U kcg-app -d kcgaidb -h 211.208.115.83 -P pager=off"
|
||
|
||
{
|
||
echo "# prediction hourly snapshot (DAR-03 enhanced)"
|
||
echo "# generated: $(date '+%Y-%m-%d %H:%M:%S %Z')"
|
||
echo "# host: $(hostname)"
|
||
echo ""
|
||
|
||
$PSQL << 'SQL'
|
||
\echo === 1. VESSEL_ANALYSIS overview (last 1h) ===
|
||
SELECT count(*) total,
|
||
count(*) FILTER (WHERE vessel_type != 'UNKNOWN') pipeline_path,
|
||
count(*) FILTER (WHERE vessel_type = 'UNKNOWN') lightweight_path,
|
||
count(*) FILTER (WHERE is_dark) dark,
|
||
count(*) FILTER (WHERE spoofing_score > 0.5) spoof_hi,
|
||
count(*) FILTER (WHERE transship_suspect) transship,
|
||
count(*) FILTER (WHERE gear_judgment IS NOT NULL AND gear_judgment != '') gear_violation,
|
||
count(*) FILTER (WHERE risk_level='CRITICAL') crit_lvl,
|
||
count(*) FILTER (WHERE risk_level='HIGH') high_lvl,
|
||
max(risk_score) max_risk,
|
||
round(avg(risk_score)::numeric, 2) avg_risk
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour';
|
||
|
||
\echo
|
||
\echo === 2. ZONE x DARK x GEAR_VIOLATION distribution ===
|
||
SELECT zone_code,
|
||
count(*) total,
|
||
count(*) FILTER (WHERE is_dark) dark,
|
||
count(*) FILTER (WHERE transship_suspect) transship,
|
||
count(*) FILTER (WHERE gear_judgment IS NOT NULL AND gear_judgment != '') gear_viol,
|
||
count(*) FILTER (WHERE risk_score >= 70) crit,
|
||
round(avg(risk_score)::numeric, 1) avg_risk
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
GROUP BY zone_code ORDER BY total DESC;
|
||
|
||
\echo
|
||
\echo === 3. DARK GAP distribution ===
|
||
SELECT CASE
|
||
WHEN gap_duration_min < 30 THEN 'a_lt30'
|
||
WHEN gap_duration_min < 60 THEN 'b_30-59'
|
||
WHEN gap_duration_min < 120 THEN 'c_60-119'
|
||
WHEN gap_duration_min < 360 THEN 'd_120-359'
|
||
WHEN gap_duration_min < 1440 THEN 'e_360-1439'
|
||
ELSE 'f_gte1440' END gap_bucket,
|
||
count(*) total,
|
||
count(*) FILTER (WHERE is_dark) dark,
|
||
count(*) FILTER (WHERE is_dark AND vessel_type='UNKNOWN') dark_lw,
|
||
count(*) FILTER (WHERE is_dark AND vessel_type!='UNKNOWN') dark_pipeline
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
GROUP BY gap_bucket ORDER BY gap_bucket;
|
||
|
||
\echo
|
||
\echo === 4. DARK by activity_state ===
|
||
SELECT activity_state, count(*), round(avg(gap_duration_min)::numeric, 0) avg_gap
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour' AND is_dark
|
||
GROUP BY activity_state ORDER BY count DESC;
|
||
|
||
\echo
|
||
\echo === 5. DARK sample top 20 by gap ===
|
||
SELECT mmsi, zone_code, activity_state, gap_duration_min, risk_score
|
||
FROM (
|
||
SELECT DISTINCT ON (mmsi) mmsi, zone_code, activity_state, gap_duration_min,
|
||
risk_score, analyzed_at
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour' AND is_dark
|
||
ORDER BY mmsi, analyzed_at DESC
|
||
) latest ORDER BY gap_duration_min DESC LIMIT 20;
|
||
|
||
\echo
|
||
\echo === 6. EVENTS last 1h by category x level ===
|
||
SELECT category, level, count(*) cnt
|
||
FROM kcg.prediction_events
|
||
WHERE created_at > now() - interval '1 hour'
|
||
GROUP BY category, level ORDER BY cnt DESC;
|
||
|
||
\echo
|
||
\echo === 7. STATS_HOURLY latest 3 rows ===
|
||
SELECT stat_hour, total_detections, event_count, critical_count,
|
||
by_category::text, by_zone::text
|
||
FROM kcg.prediction_stats_hourly
|
||
ORDER BY stat_hour DESC LIMIT 3;
|
||
|
||
\echo
|
||
\echo === 8. KPI REALTIME ===
|
||
SELECT kpi_key, value, trend, delta_pct, updated_at
|
||
FROM kcg.prediction_kpi_realtime ORDER BY kpi_key;
|
||
|
||
\echo
|
||
\echo === 9. RISK_SCORE histogram ===
|
||
SELECT CASE
|
||
WHEN risk_score < 10 THEN 'a_0-9'
|
||
WHEN risk_score < 30 THEN 'b_10-29'
|
||
WHEN risk_score < 50 THEN 'c_30-49'
|
||
WHEN risk_score < 70 THEN 'd_50-69'
|
||
WHEN risk_score < 90 THEN 'e_70-89'
|
||
ELSE 'f_90-100' END bucket,
|
||
count(*) cnt,
|
||
count(*) FILTER (WHERE vessel_type='UNKNOWN') lightweight
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
GROUP BY bucket ORDER BY bucket;
|
||
|
||
\echo
|
||
\echo === 10. TRANSSHIP + SPOOF + FLEET ===
|
||
SELECT
|
||
count(*) FILTER (WHERE transship_suspect) transship_ct,
|
||
count(*) FILTER (WHERE spoofing_score > 0.7) spoof_gt070,
|
||
count(*) FILTER (WHERE speed_jump_count > 0) speed_jumps,
|
||
count(*) FILTER (WHERE fleet_is_leader) fleet_leader,
|
||
count(DISTINCT fleet_cluster_id) FILTER (WHERE fleet_cluster_id > 0) fleet_clusters
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour';
|
||
|
||
\echo
|
||
\echo ===================================================================
|
||
\echo === FISHERY PERMIT CN REGISTRY (V029 - 한중어업협정)
|
||
\echo ===================================================================
|
||
|
||
\echo
|
||
\echo === P1. fishery_permit_cn year-by-year ===
|
||
SELECT permit_year, count(*) permits,
|
||
count(DISTINCT fishery_code) codes,
|
||
count(DISTINCT applicant_cn) applicants,
|
||
max(loaded_at) loaded_at
|
||
FROM kcg.fishery_permit_cn
|
||
GROUP BY permit_year ORDER BY permit_year DESC;
|
||
|
||
\echo
|
||
\echo === P2. fleet_vessels matching (current year = registry) ===
|
||
SELECT permit_year,
|
||
count(*) total,
|
||
count(mmsi) with_mmsi,
|
||
round(count(mmsi)::numeric / NULLIF(count(*),0) * 100, 1) match_pct,
|
||
max(last_seen_at) last_match
|
||
FROM kcg.fleet_vessels
|
||
WHERE permit_year IS NOT NULL
|
||
GROUP BY permit_year ORDER BY permit_year DESC;
|
||
|
||
\echo
|
||
\echo === P3. fleet_vessels breakdown by fishery_code (current year) ===
|
||
SELECT fishery_code, count(*) total, count(mmsi) matched,
|
||
round(count(mmsi)::numeric / NULLIF(count(*),0) * 100, 1) pct
|
||
FROM kcg.fleet_vessels
|
||
WHERE permit_year = EXTRACT(YEAR FROM now())::int
|
||
GROUP BY fishery_code ORDER BY total DESC;
|
||
|
||
\echo
|
||
\echo === P3.5 match_method distribution (NAME_EXACT vs NAME_FUZZY) ===
|
||
SELECT coalesce(match_method, '(unmatched)') method,
|
||
count(*) cnt,
|
||
round(avg(match_confidence)::numeric, 3) avg_conf
|
||
FROM kcg.fleet_vessels
|
||
WHERE permit_year = EXTRACT(YEAR FROM now())::int
|
||
GROUP BY method ORDER BY cnt DESC;
|
||
|
||
\echo
|
||
\echo === P3.6 fishery_code × match_method cross ===
|
||
SELECT fishery_code,
|
||
count(*) FILTER (WHERE match_method = 'NAME_EXACT') exact,
|
||
count(*) FILTER (WHERE match_method = 'NAME_FUZZY') fuzzy,
|
||
count(*) FILTER (WHERE mmsi IS NULL) unmatched,
|
||
count(*) total
|
||
FROM kcg.fleet_vessels
|
||
WHERE permit_year = EXTRACT(YEAR FROM now())::int
|
||
GROUP BY fishery_code ORDER BY total DESC;
|
||
|
||
\echo
|
||
\echo === P4. vessel_analysis_results.gear_code distribution (last 1h) ===
|
||
SELECT coalesce(gear_code, '(null)') gear_code,
|
||
count(*) cnt,
|
||
round(avg(risk_score)::numeric, 1) avg_risk
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
GROUP BY gear_code ORDER BY cnt DESC LIMIT 15;
|
||
|
||
\echo
|
||
\echo === P5. fleet_role distribution (last 1h, from registry match) ===
|
||
SELECT fleet_role, count(*) cnt,
|
||
count(*) FILTER (WHERE fleet_is_leader) is_leader,
|
||
round(avg(risk_score)::numeric, 1) avg_risk
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
GROUP BY fleet_role ORDER BY cnt DESC;
|
||
|
||
\echo
|
||
\echo === G1. PIPELINE vessel_type distribution ===
|
||
SELECT vessel_type, count(*),
|
||
round(avg(ucaf_score)::numeric, 3) avg_ucaf,
|
||
round(avg(ucft_score)::numeric, 3) avg_ucft,
|
||
round(avg(risk_score)::numeric, 1) avg_risk
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
GROUP BY vessel_type ORDER BY count DESC;
|
||
|
||
\echo
|
||
\echo === G2. GEAR_GROUP_PARENT_RESOLUTION ===
|
||
SELECT status, count(*),
|
||
round(avg(confidence)::numeric, 3) avg_conf,
|
||
round(avg(top_score)::numeric, 3) avg_top,
|
||
round(avg(stable_cycles)::numeric, 1) avg_stable
|
||
FROM kcg.gear_group_parent_resolution
|
||
GROUP BY status ORDER BY count DESC;
|
||
|
||
\echo
|
||
\echo === G3. GEAR_CORRELATION_SCORES distribution ===
|
||
SELECT CASE
|
||
WHEN current_score < 0.3 THEN 'a_lt0.3'
|
||
WHEN current_score < 0.5 THEN 'b_0.3-0.5'
|
||
WHEN current_score < 0.7 THEN 'c_0.5-0.7'
|
||
WHEN current_score < 0.85 THEN 'd_0.7-0.85'
|
||
ELSE 'e_gte0.85' END bucket,
|
||
count(*),
|
||
count(DISTINCT group_key) uniq_groups,
|
||
count(DISTINCT target_mmsi) uniq_targets
|
||
FROM kcg.gear_correlation_scores
|
||
WHERE updated_at > now() - interval '1 hour'
|
||
GROUP BY bucket ORDER BY bucket;
|
||
|
||
\echo
|
||
\echo ===================================================================
|
||
\echo === DAR-03 G-CODE DIAGNOSTICS (last 1h)
|
||
\echo ===================================================================
|
||
|
||
\echo
|
||
\echo === D1. gear_judgment distribution ===
|
||
SELECT coalesce(NULLIF(gear_judgment, ''), '(none)') judgment,
|
||
count(*) cnt,
|
||
round(avg(risk_score)::numeric, 1) avg_risk
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
GROUP BY judgment ORDER BY cnt DESC;
|
||
|
||
\echo
|
||
\echo === D2. G-code frequency ===
|
||
SELECT gcode, count(*) cnt,
|
||
round(avg(risk_score)::numeric, 1) avg_risk
|
||
FROM kcg.vessel_analysis_results,
|
||
LATERAL jsonb_array_elements_text(features->'g_codes') AS gcode
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
GROUP BY gcode ORDER BY cnt DESC;
|
||
|
||
\echo
|
||
\echo === D3. G-01 zone x gear cross-table (VIOLATION CHECK) ===
|
||
SELECT zone_code, vessel_type, count(*) total,
|
||
count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-01%') g01_violation
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
AND vessel_type != 'UNKNOWN' AND zone_code LIKE 'ZONE_%'
|
||
GROUP BY zone_code, vessel_type ORDER BY zone_code, vessel_type;
|
||
|
||
\echo
|
||
\echo === D3.5 pair_type distribution (DAR-03 base-target 탐색) ===
|
||
SELECT coalesce(features->>'pair_type', '(none)') pair_type,
|
||
count(*) cnt,
|
||
round(avg((features->>'similarity')::numeric)::numeric, 3) avg_sim,
|
||
round(avg((features->>'confidence')::numeric)::numeric, 3) avg_conf
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
AND features->>'pair_trawl_detected' = 'true'
|
||
GROUP BY pair_type ORDER BY cnt DESC;
|
||
|
||
\echo
|
||
\echo === D3.6 pair_trawl tier distribution (signal-strength tier) ===
|
||
SELECT coalesce(features->>'pair_tier', '(none)') tier,
|
||
count(*) cnt,
|
||
round(avg((features->>'similarity')::numeric)::numeric, 3) avg_sim,
|
||
round(avg((features->'gear_violation_evidence'->'G-06'->>'sync_duration_min')::numeric)::numeric, 1) avg_sync_min,
|
||
round(avg((features->'gear_violation_evidence'->'G-06'->>'mean_separation_nm')::numeric)::numeric, 3) avg_sep_nm
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
AND features->>'pair_trawl_detected' = 'true'
|
||
GROUP BY tier ORDER BY cnt DESC;
|
||
|
||
\echo
|
||
\echo === D3.7 G-02 closed-season + G-03 unregistered-gear counts ===
|
||
SELECT
|
||
count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-02%') g02_count,
|
||
count(*) FILTER (WHERE features->>'g_codes' LIKE '%G-03%') g03_count,
|
||
count(*) FILTER (WHERE features->>'gear_judgment' = 'CLOSED_SEASON_FISHING') judg_closed,
|
||
count(*) FILTER (WHERE features->>'gear_judgment' = 'UNREGISTERED_GEAR') judg_unreg
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour';
|
||
|
||
\echo
|
||
\echo === D3.8 pair detection reject breakdown (last 1h journal) ===
|
||
\! ssh redis-211 "sudo journalctl -u kcg-ai-prediction --no-pager --since '1 hour ago' | grep -oE 'pair detection:[^$]+reject=\{[^}]+\}' | awk -F'reject=' '{print $2}' | sort | uniq -c | sort -rn | head -10" 2>/dev/null || true
|
||
|
||
\echo
|
||
\echo === D4. G-06 pair trawl detections ===
|
||
SELECT mmsi, zone_code, vessel_type, risk_score,
|
||
(features->'gear_violation_evidence'->'G-06'->>'sync_duration_min') sync_min,
|
||
(features->'gear_violation_evidence'->'G-06'->>'mean_separation_nm') sep_nm,
|
||
(features->'gear_violation_evidence'->'G-06'->>'pair_mmsi') pair_mmsi,
|
||
features->>'pair_trawl_detected' pt
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
AND (features->>'pair_trawl_detected' = 'true' OR features->>'g_codes' LIKE '%G-06%')
|
||
ORDER BY risk_score DESC LIMIT 20;
|
||
|
||
\echo
|
||
\echo === D5. G-04 MMSI tampering + G-05 gear drift ===
|
||
SELECT mmsi, zone_code, vessel_type, risk_score,
|
||
features->>'g_codes' g_codes,
|
||
(features->'gear_violation_evidence'->'G-04'->>'cycling_count') g04_cycle,
|
||
(features->'gear_violation_evidence'->'G-05'->>'drift_nm') g05_drift
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
AND (features->>'g_codes' LIKE '%G-04%' OR features->>'g_codes' LIKE '%G-05%')
|
||
ORDER BY risk_score DESC LIMIT 20;
|
||
|
||
\echo
|
||
\echo === D6. GEAR_ILLEGAL events (last 1h) ===
|
||
SELECT category, level, title, count(*) cnt
|
||
FROM kcg.prediction_events
|
||
WHERE created_at > now() - interval '1 hour'
|
||
AND category IN ('GEAR_ILLEGAL', 'MMSI_TAMPERING')
|
||
GROUP BY category, level, title ORDER BY cnt DESC;
|
||
|
||
\echo
|
||
\echo === D7. violation_categories ILLEGAL_GEAR breakdown ===
|
||
SELECT count(*) total,
|
||
count(*) FILTER (WHERE gear_judgment = 'ZONE_VIOLATION') zone_viol,
|
||
count(*) FILTER (WHERE gear_judgment = 'PAIR_TRAWL') pair_trawl,
|
||
count(*) FILTER (WHERE gear_judgment = 'GEAR_MISMATCH') mismatch
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
AND 'ILLEGAL_GEAR' = ANY(violation_categories);
|
||
|
||
\echo
|
||
\echo === D8. gear_violation_score histogram (pipeline vessels) ===
|
||
SELECT CASE
|
||
WHEN (features->>'gear_violation_score')::int = 0 THEN 'a_0 (no violation)'
|
||
WHEN (features->>'gear_violation_score')::int <= 15 THEN 'b_1-15 (G-01 or G-04/G-05)'
|
||
WHEN (features->>'gear_violation_score')::int <= 25 THEN 'c_16-25 (G-06 or combo)'
|
||
ELSE 'd_gt25 (multiple G-codes)' END bucket,
|
||
count(*) cnt,
|
||
round(avg(risk_score)::numeric, 1) avg_risk
|
||
FROM kcg.vessel_analysis_results
|
||
WHERE analyzed_at > now() - interval '1 hour'
|
||
AND vessel_type != 'UNKNOWN'
|
||
AND features->>'gear_violation_score' IS NOT NULL
|
||
GROUP BY bucket ORDER BY bucket;
|
||
|
||
\echo
|
||
\echo === D9. EVENTS 24h hourly trend (with GEAR_ILLEGAL) ===
|
||
SELECT date_trunc('hour', occurred_at AT TIME ZONE 'Asia/Seoul') hr,
|
||
count(*) tot,
|
||
count(*) FILTER (WHERE category='DARK_VESSEL') dark,
|
||
count(*) FILTER (WHERE category='ILLEGAL_TRANSSHIP') transship,
|
||
count(*) FILTER (WHERE category='EEZ_INTRUSION') eez,
|
||
count(*) FILTER (WHERE category='GEAR_ILLEGAL') gear_illegal,
|
||
count(*) FILTER (WHERE category='HIGH_RISK_VESSEL') high_risk,
|
||
count(*) FILTER (WHERE level='CRITICAL') critical
|
||
FROM kcg.prediction_events
|
||
WHERE created_at > now() - interval '24 hours'
|
||
GROUP BY hr ORDER BY hr DESC LIMIT 25;
|
||
|
||
SQL
|
||
|
||
echo ""
|
||
echo "=== 13. CYCLE LOG (last 65 min) ==="
|
||
journalctl -u kcg-ai-prediction --since '65 minutes ago' --no-pager 2>/dev/null | \
|
||
grep -E 'lightweight|event_generator:|stats_aggregator hourly|kpi_writer:|analysis cycle:|pair_trawl|gear_violation|GEAR_ILLEGAL|ERROR|Traceback' | \
|
||
tail -60
|
||
|
||
echo ""
|
||
echo "=== END ==="
|
||
} > "$OUT" 2>&1
|
||
|
||
echo "[snapshot] saved: $OUT"
|