import xarray as xr import numpy as np import pandas as pd import os from datetime import datetime, timedelta from shapely.geometry import Polygon, Point from convex_hull import get_convex_hull_from_json as get_convex_hull from createWindJson import extract_wind_data_json as create_wind_json from calcCostlineLength import OilSpillCoastlineAnalyzer from extractUvWithinBox import _compute_hydr_region, _extract_uv_at_time import concurrent.futures from config import STORAGE, SIM, COASTLINE, THREAD_POOL from logger import get_logger from utils import check_img_file_by_date, find_time_index logger = get_logger("createJsonResult") def extract_and_save_data_to_json(ncfile_path, wind_ncfile_path, ocean_ncfile_path, name, ac_lon, ac_lat): """ NetCDF 파일에서 시간별 위치, 잔존량, 풍화량, 오염 면적을 추출하여 JSON 파일로 저장합니다. """ logger.info(f"Processing: {ncfile_path}, {wind_ncfile_path}, {ocean_ncfile_path}") try: if not os.path.exists(ncfile_path): raise FileNotFoundError(f"Simulation result file not found: {ncfile_path}") if not os.path.exists(wind_ncfile_path): raise FileNotFoundError(f"Wind data file not found: {wind_ncfile_path}") if not os.path.exists(ocean_ncfile_path): raise FileNotFoundError(f"Ocean data file not found: {ocean_ncfile_path}") analyzer = OilSpillCoastlineAnalyzer( str(STORAGE.COASTLINE), buffer_distance=COASTLINE.BUFFER_DISTANCE, simplify_tolerance=COASTLINE.SIMPLIFY_TOLERANCE, center_point=(ac_lon, ac_lat), radius=COASTLINE.DEFAULT_RADIUS ) with xr.open_dataset(ncfile_path) as ds, \ xr.open_dataset(wind_ncfile_path) as wind_ds, \ xr.open_dataset(ocean_ncfile_path) as ocean_ds: # ------------------------------------------------------------------ # ① 시뮬레이션 변수 일괄 로드 — xarray → NumPy 1회 변환 # ------------------------------------------------------------------ total_steps = len(ds.time) # OpenDrift NetCDF 차원 순서: (trajectory, time) = (N, T) # .T 전치로 (T, N)으로 통일하여 이후 모든 연산이 axis=0=시간, axis=1=입자 기준이 되도록 함 status_all = ds.status.values.T if 'status' in ds else None # (T, N) lon_all = ds.lon.values.T if 'lon' in ds else None lat_all = ds.lat.values.T if 'lat' in ds else None mass_all = ds.mass_oil.values.T if 'mass_oil' in ds else None density_all = ds.density.values.T if 'density' in ds else None evap_all = ds.mass_evaporated.values.T if 'mass_evaporated' in ds else None moving_all = ds.moving.values.T if 'moving' in ds else None viscosity_all = ds.viscosity.values.T if 'viscosity' in ds else None watertemp_all = ds.sea_water_temperature.values.T if 'sea_water_temperature' in ds else None oil_film_thick = (float(ds.oil_film_thickness.isel(time=0).values[0]) if 'oil_film_thickness' in ds else None) initial_mass_total = None if mass_all is not None: try: initial_mass_total = float(mass_all[0].sum()) logger.info(f"Initial oil mass: {initial_mass_total:.2f} kg") except Exception as e: logger.warning(f"Error processing mass_oil: {e}") # ------------------------------------------------------------------ # ② 타임스탬프 사전 계산 # ------------------------------------------------------------------ time_values_pd = pd.to_datetime(ds.time.values) formatted_times = [t.strftime('%Y-%m-%d %H:%M:%S') for t in time_values_pd] # ------------------------------------------------------------------ # ③ 전처리 루프 벡터화 (기존 lines 77–175 대체) # ------------------------------------------------------------------ cumulative_evaporated_arr = np.zeros(total_steps) cumulative_beached_arr = np.zeros(total_steps) cumulative_pollution_area = {} first_strand_step = None last_valid_lon = None last_valid_lat = None if status_all is not None and lon_all is not None: N = status_all.shape[1] # 입자별 최초 stranded 스텝 stranded_mask = (status_all == 1) # (T, N) has_stranded = stranded_mask.any(axis=0) # (N,) first_strand_step = np.where(has_stranded, stranded_mask.argmax(axis=0), -1).astype(np.int32) # (N,) # 입자별 마지막 유효 위치 valid_lon_mask = ~np.isnan(lon_all) # (T, N) has_any_valid = valid_lon_mask.any(axis=0) # (N,) last_valid_t = (total_steps - 1 - np.flip(valid_lon_mask, axis=0).argmax(axis=0)) # (N,) pix = np.arange(N) last_valid_lon = np.where(has_any_valid, lon_all[last_valid_t, pix], np.nan) last_valid_lat = np.where(has_any_valid, lat_all[last_valid_t, pix], np.nan) # 누적 증발량 — expanding max per particle if evap_all is not None: evap_clean = np.where(np.isnan(evap_all), -np.inf, evap_all) run_evap = np.maximum.accumulate(evap_clean, axis=0) # (T, N) run_evap = np.clip(run_evap, 0, None) cumulative_evaporated_arr = run_evap.sum(axis=1) # (T,) # 누적 부착량 — expanding max per stranded particle if mass_all is not None and density_all is not None: safe_den = np.where((density_all > 0) & ~np.isnan(density_all), density_all, np.inf) beach_vol = np.where( stranded_mask & ~np.isnan(mass_all) & (mass_all > 0), mass_all / safe_den, 0.0) # (T, N) run_beach = np.maximum.accumulate(beach_vol, axis=0) # (T, N) strand_threshold = np.where(first_strand_step >= 0, first_strand_step, total_steps) active_matrix = (np.arange(total_steps)[:, None] >= strand_threshold[None, :]) # (T, N) cumulative_beached_arr = (run_beach * active_matrix).sum(axis=1) # (T,) # 누적 오염 면적 — 순차 루프 유지 (set union 의존), 내부 연산은 vectorized all_polluted_cells = set() grid_config = None for i in range(total_steps): if mass_all is not None: lon_i = lon_all[i] lat_i = lat_all[i] mass_i = mass_all[i] valid_mask = (~np.isnan(lon_i)) & (~np.isnan(lat_i)) & (mass_i > 0) if np.any(valid_mask): lon_active = lon_i[valid_mask] lat_active = lat_i[valid_mask] if grid_config is None: grid_config = { 'min_lon': lon_active.min() - 0.01, 'max_lon': lon_active.max() + 0.01, 'min_lat': lat_active.min() - 0.01, 'max_lat': lat_active.max() + 0.01, 'num_lon_bins': SIM.POLLUTION_GRID_BINS, 'num_lat_bins': SIM.POLLUTION_GRID_BINS, } else: grid_config['min_lon'] = min(grid_config['min_lon'], lon_active.min() - 0.01) grid_config['max_lon'] = max(grid_config['max_lon'], lon_active.max() + 0.01) grid_config['min_lat'] = min(grid_config['min_lat'], lat_active.min() - 0.01) grid_config['max_lat'] = max(grid_config['max_lat'], lat_active.max() + 0.01) lon_bins = np.linspace(grid_config['min_lon'], grid_config['max_lon'], grid_config['num_lon_bins'] + 1) lat_bins = np.linspace(grid_config['min_lat'], grid_config['max_lat'], grid_config['num_lat_bins'] + 1) lon_indices = np.digitize(lon_active, lon_bins) - 1 lat_indices = np.digitize(lat_active, lat_bins) - 1 for lon_idx, lat_idx in zip(lon_indices, lat_indices): if (0 <= lon_idx < grid_config['num_lon_bins'] and 0 <= lat_idx < grid_config['num_lat_bins']): all_polluted_cells.add((lon_idx, lat_idx)) delta_lon_km = ((grid_config['max_lon'] - grid_config['min_lon']) * SIM.KM_PER_DEG_LON) delta_lat_km = ((grid_config['max_lat'] - grid_config['min_lat']) * SIM.KM_PER_DEG_LAT) area_of_cell_km2 = ((delta_lon_km / grid_config['num_lon_bins']) * (delta_lat_km / grid_config['num_lat_bins'])) cumulative_pollution_area[i] = len(all_polluted_cells) * area_of_cell_km2 else: cumulative_pollution_area[i] = cumulative_pollution_area.get(i - 1, 0.0) if i > 0 else 0.0 else: cumulative_pollution_area[i] = cumulative_pollution_area.get(i - 1, 0.0) if i > 0 else 0.0 # ------------------------------------------------------------------ # ④ wind/hydr 전체 타임스텝 사전 추출 (파일 재오픈 없음) # ------------------------------------------------------------------ logger.info("Pre-extracting hydr data for all timesteps...") hydr_region = _compute_hydr_region(ocean_ds, ac_lon, ac_lat) hydr_time_indices = [find_time_index(ocean_ds, t)[0] for t in formatted_times] hydr_cache = [_extract_uv_at_time(ocean_ds, idx, hydr_region) for idx in hydr_time_indices] logger.info("Pre-extracting wind data for all timesteps...") wind_cache = [create_wind_json(wind_ds, time_values_pd[i], ac_lon, ac_lat) for i in range(total_steps)] # ------------------------------------------------------------------ # ⑤ process_time_step — pre-loaded 배열 사용, 중복 xarray 호출 없음 # ------------------------------------------------------------------ def process_time_step(i): formatted_time = formatted_times[i] logger.debug(f"Processing time step: {formatted_time}") lon_t = lon_all[i] if lon_all is not None else None lat_t = lat_all[i] if lat_all is not None else None mass_t = mass_all[i] if mass_all is not None else None density_t = density_all[i] if density_all is not None else None status_t = status_all[i] if status_all is not None else None moving_t = moving_all[i] if moving_all is not None else None viscosity_t = viscosity_all[i] if viscosity_all is not None else None watertemp_t = watertemp_all[i] if watertemp_all is not None else None # 활성 입자 처리 active_mask = moving_t > 0 if moving_t is not None else None active_lon = lon_t[active_mask] if (active_mask is not None and lon_t is not None) else [] active_lat = lat_t[active_mask] if (active_mask is not None and lat_t is not None) else [] center_lon, center_lat = None, None if len(active_lon) > 0 and len(active_lat) > 0: center_lon = float(np.mean(active_lon)) center_lat = float(np.mean(active_lat)) # 입자 목록 구성 (stranded 처리 + last valid position 사용) particles = [] if lon_t is not None and lat_t is not None and first_strand_step is not None: stranded_flags = (first_strand_step >= 0) & (i >= first_strand_step) # (N,) for idx in range(len(lon_t)): lon_val = lon_t[idx] lat_val = lat_t[idx] stranded_value = int(stranded_flags[idx]) if stranded_value == 1 and (np.isnan(lon_val) or np.isnan(lat_val)): if last_valid_lon is not None and not np.isnan(last_valid_lon[idx]): lon_val = last_valid_lon[idx] lat_val = last_valid_lat[idx] if np.isnan(lon_val) or np.isnan(lat_val): continue particles.append({ "lon": float(lon_val), "lat": float(lat_val), "stranded": stranded_value, }) # 오염 해안 길이 length, info = analyzer.calculate_polluted_length(particles) # Convex hull try: hull_coords = get_convex_hull(particles) wkt = Point(hull_coords[0]).wkt if len(hull_coords) == 1 else Polygon(hull_coords).wkt except ValueError as e: logger.warning(f"Convex hull error: {e}") wkt = "" # 잔존량 (status == 0 해상 입자) remaining_volume_m3 = 0.0 if mass_t is not None and density_t is not None and status_t is not None: sea_mask = (status_t == 0) sea_masses = mass_t[sea_mask] sea_densities = density_t[sea_mask] valid_sea = ~np.isnan(sea_masses) & (sea_masses > 0) & ~np.isnan(sea_densities) & (sea_densities > 0) if np.any(valid_sea): avg_density = np.mean(sea_densities[valid_sea]) if avg_density > 0: remaining_volume_m3 = float(np.sum(sea_masses[valid_sea]) / avg_density) # 누적 증발량 → 부피 evaporated_mass = float(cumulative_evaporated_arr[i]) weathered_volume_m3 = 0.0 if evaporated_mass > 0 and density_t is not None: valid_den = ~np.isnan(density_t) & (density_t > 0) if np.any(valid_den): avg_density = float(np.mean(density_t[valid_den])) if avg_density > 0: weathered_volume_m3 = evaporated_mass / avg_density beached_volume_m3 = float(cumulative_beached_arr[i]) pollution_area = cumulative_pollution_area.get(i, 0.0) average_viscosity = float(np.nanmean(viscosity_t)) if (viscosity_t is not None and len(viscosity_t) > 0) else None average_water_temp = float(np.nanmean(watertemp_t)) if (watertemp_t is not None and len(watertemp_t) > 0) else None hydr_data = hydr_cache[i] return { "time": formatted_time, "center_lon": float(center_lon) if center_lon is not None else None, "center_lat": float(center_lat) if center_lat is not None else None, "remaining_volume_m3": float(remaining_volume_m3), "weathered_volume_m3": float(weathered_volume_m3), "pollution_area_km2": float(pollution_area), "beached_volume_m3": float(beached_volume_m3), "particles": particles, "wkt": wkt, "viscosity": average_viscosity, "thickness": oil_film_thick, "temperature": average_water_temp, "wind_data": wind_cache[i], "hydr_data": hydr_data['value'], "hydr_grid": hydr_data['grid'], "pollution_coast_length_m": length, } # ThreadPoolExecutor — wind/hydr I/O 없으므로 CPU 위주 작업만 with concurrent.futures.ThreadPoolExecutor(max_workers=THREAD_POOL.MAX_WORKERS) as executor: futures = {executor.submit(process_time_step, i): i for i in range(total_steps)} results = [None] * total_steps for future in concurrent.futures.as_completed(futures): idx = futures[future] results[idx] = future.result() return results except FileNotFoundError as e: logger.error(f"File not found: {e}") return None except Exception as e: logger.exception(f"An unexpected error occurred: {e}") return None