kcg-monitoring/prediction/chat/router.py

"""AI 해양분석 채팅 엔드포인트 — 사전 쿼리 + SSE 스트리밍 + Tool Calling."""

import json
import logging

import httpx
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from pydantic import BaseModel

from chat.cache import load_chat_history, save_chat_history, clear_chat_history
from chat.context_builder import MaritimeContextBuilder
from chat.tools import detect_prequery, execute_prequery, parse_tool_calls, execute_tool_call
from config import settings

logger = logging.getLogger(__name__)

router = APIRouter(prefix='/api/v1/chat', tags=['chat'])


class ChatRequest(BaseModel):
    message: str
    user_id: str = 'anonymous'
    stream: bool = True


class ChatResponse(BaseModel):
    role: str = 'assistant'
    content: str


@router.post('')
async def chat(req: ChatRequest):
    """해양분석 채팅 — 사전 쿼리 + 분석 컨텍스트 + Ollama SSE 스트리밍."""
    history = load_chat_history(req.user_id)

    builder = MaritimeContextBuilder()
    system_prompt = builder.build_system_prompt(user_message=req.message)

    # ── 사전 쿼리: 키워드 패턴 매칭으로 DB 조회 후 컨텍스트 보강 ──
    prequery_params = detect_prequery(req.message)
    prequery_result = ''
    if prequery_params:
        prequery_result = execute_prequery(prequery_params)
        logger.info('prequery: params=%s, results=%d chars', prequery_params, len(prequery_result))

    # 시스템 프롬프트에 사전 쿼리 결과 추가
    if prequery_result:
        system_prompt += '\n\n' + prequery_result

    messages = [
        {'role': 'system', 'content': system_prompt},
        *history[-10:],
        {'role': 'user', 'content': req.message},
    ]

    ollama_payload = {
        'model': settings.OLLAMA_MODEL,
        'messages': messages,
        'stream': req.stream,
        'options': {
            'temperature': 0.3,
            'num_predict': 1024,
            'num_ctx': 2048,
        },
    }

    if req.stream:
        return StreamingResponse(
            _stream_with_tools(ollama_payload, req.user_id, history, req.message),
            media_type='text/event-stream',
            headers={
                'Cache-Control': 'no-cache',
                'Connection': 'keep-alive',
                'X-Accel-Buffering': 'no',
            },
        )

    return await _call_with_tools(ollama_payload, req.user_id, history, req.message)


async def _stream_with_tools(payload: dict, user_id: str, history: list[dict], user_message: str):
    """SSE 스트리밍 — 1차 응답 후 Tool Call 감지 시 2차 호출."""
    accumulated = ''
    try:
        async with httpx.AsyncClient(timeout=httpx.Timeout(settings.OLLAMA_TIMEOUT_SEC)) as client:
            # 1차 LLM 호출
            async with client.stream(
                'POST',
                f'{settings.OLLAMA_BASE_URL}/api/chat',
                json=payload,
            ) as response:
                async for line in response.aiter_lines():
                    if not line:
                        continue
                    try:
                        chunk = json.loads(line)
                        content = chunk.get('message', {}).get('content', '')
                        done = chunk.get('done', False)
                        accumulated += content

                        sse_data = json.dumps({
                            'content': content,
                            'done': False,  # 아직 done 보내지 않음 (tool call 가능)
                        }, ensure_ascii=False)
                        yield f'data: {sse_data}\n\n'

                        if done:
                            break
                    except json.JSONDecodeError:
                        continue

            # Tool Call 감지
            tool_calls = parse_tool_calls(accumulated)
            if tool_calls:
                # Tool 실행
                tool_results = []
                for tc in tool_calls:
                    result = execute_tool_call(tc)
                    tool_results.append(result)
                    logger.info('tool call: %s → %d chars', tc.get('tool'), len(result))

                tool_context = '\n'.join(tool_results)

                # 2차 LLM 호출 (tool 결과 포함)
                payload['messages'].append({'role': 'assistant', 'content': accumulated})
                payload['messages'].append({
                    'role': 'user',
                    'content': f'도구 조회 결과입니다. 이 데이터를 기반으로 사용자 질문에 답변하세요:\n{tool_context}',
                })

                # 구분자 전송
                separator = json.dumps({'content': '\n\n---\n_데이터 조회 완료. 분석 결과:_\n\n', 'done': False}, ensure_ascii=False)
                yield f'data: {separator}\n\n'

                accumulated_2 = ''
                async with client.stream(
                    'POST',
                    f'{settings.OLLAMA_BASE_URL}/api/chat',
                    json=payload,
                ) as response2:
                    async for line in response2.aiter_lines():
                        if not line:
                            continue
                        try:
                            chunk = json.loads(line)
                            content = chunk.get('message', {}).get('content', '')
                            done = chunk.get('done', False)
                            accumulated_2 += content

                            sse_data = json.dumps({
                                'content': content,
                                'done': done,
                            }, ensure_ascii=False)
                            yield f'data: {sse_data}\n\n'

                            if done:
                                break
                        except json.JSONDecodeError:
                            continue

                # 히스토리에는 최종 답변만 저장
                accumulated = accumulated_2 or accumulated

    except httpx.TimeoutException:
        err_msg = json.dumps({'content': '\n\n[응답 시간 초과]', 'done': True})
        yield f'data: {err_msg}\n\n'
    except Exception as e:
        logger.error('ollama stream error: %s', e)
        err_msg = json.dumps({'content': f'[오류: {e}]', 'done': True})
        yield f'data: {err_msg}\n\n'

    if accumulated:
        updated = history + [
            {'role': 'user', 'content': user_message},
            {'role': 'assistant', 'content': accumulated},
        ]
        save_chat_history(user_id, updated)

    yield 'data: [DONE]\n\n'


async def _call_with_tools(
    payload: dict, user_id: str, history: list[dict], user_message: str,
) -> ChatResponse:
    """비스트리밍 — Tool Calling 포함."""
    try:
        async with httpx.AsyncClient(timeout=httpx.Timeout(settings.OLLAMA_TIMEOUT_SEC)) as client:
            # 1차 호출
            response = await client.post(
                f'{settings.OLLAMA_BASE_URL}/api/chat',
                json=payload,
            )
            data = response.json()
            content = data.get('message', {}).get('content', '')

            # Tool Call 감지
            tool_calls = parse_tool_calls(content)
            if tool_calls:
                tool_results = [execute_tool_call(tc) for tc in tool_calls]
                tool_context = '\n'.join(tool_results)

                payload['messages'].append({'role': 'assistant', 'content': content})
                payload['messages'].append({
                    'role': 'user',
                    'content': f'도구 조회 결과입니다. 이 데이터를 기반으로 답변하세요:\n{tool_context}',
                })

                response2 = await client.post(
                    f'{settings.OLLAMA_BASE_URL}/api/chat',
                    json=payload,
                )
                data2 = response2.json()
                content = data2.get('message', {}).get('content', content)

            updated = history + [
                {'role': 'user', 'content': user_message},
                {'role': 'assistant', 'content': content},
            ]
            save_chat_history(user_id, updated)

            return ChatResponse(content=content)
    except Exception as e:
        logger.error('ollama sync error: %s', e)
        return ChatResponse(content=f'오류: AI 서버 연결 실패 ({e})')


@router.get('/history')
async def get_history(user_id: str = 'anonymous'):
    return load_chat_history(user_id)


@router.delete('/history')
async def delete_history(user_id: str = 'anonymous'):
    clear_chat_history(user_id)
    return {'ok': True}