kcg-ai-monitoring/prediction/pipeline/resampler.py

import pandas as pd
from pipeline.constants import RESAMPLE_INTERVAL_MIN
from pipeline.behavior import BehaviorDetector


class TrajectoryResampler:
    """
    불균등 AIS 수신 간격을 균등 시간 간격으로 보간
    목적: BIRCH 군집화의 입력 벡터 정규화
    방법: 선형 보간 (위도·경도·SOG·COG)
    기준: 4분 간격 (Shepperson et al. 2017)
    """

    def __init__(self, interval_min: int = RESAMPLE_INTERVAL_MIN):
        self.interval = pd.Timedelta(minutes=interval_min)

    def resample(self, df_vessel: pd.DataFrame) -> pd.DataFrame:
        df_vessel = df_vessel.sort_values('timestamp').copy()
        if len(df_vessel) < 2:
            return df_vessel

        t_start = df_vessel['timestamp'].iloc[0]
        t_end = df_vessel['timestamp'].iloc[-1]
        new_times = pd.date_range(t_start, t_end, freq=self.interval)

        df_vessel = df_vessel.set_index('timestamp')
        df_vessel = df_vessel.reindex(df_vessel.index.union(new_times))
        for col in ['lat', 'lon', 'sog', 'cog']:
            if col in df_vessel.columns:
                df_vessel[col] = df_vessel[col].interpolate(method='time')

        df_vessel = df_vessel.loc[new_times].reset_index()
        df_vessel.rename(columns={'index': 'timestamp'}, inplace=True)
        df_vessel['state'] = df_vessel['sog'].apply(BehaviorDetector.classify_point)
        return df_vessel