171 lines
5.6 KiB
Python
171 lines
5.6 KiB
Python
"""물질별 시트에서 메인 카드 이미지(100KB+) 추출.
|
|
|
|
엑셀 워크시트 → drawing → image 관계 체인을 추적해
|
|
각 물질 시트의 핵심 이미지만 out/images/{nameKr}.png 로 저장.
|
|
|
|
동시에 out/image-map.json 생성 (파일명 ↔ 시트명/국문명 매핑).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import io
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
from xml.etree import ElementTree as ET
|
|
|
|
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
|
|
|
SCRIPT_DIR = Path(__file__).parent.resolve()
|
|
OUT_DIR = SCRIPT_DIR / 'out'
|
|
IMG_DIR = OUT_DIR / 'images'
|
|
IMG_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
SOURCE_XLSX = Path(os.environ.get(
|
|
'HNS_SOURCE_XLSX',
|
|
r'C:\Projects\MeterialDB\유해물질 화물적부도 검색툴.xlsm',
|
|
))
|
|
|
|
NS = {
|
|
'm': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
|
|
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
|
|
'pr': 'http://schemas.openxmlformats.org/package/2006/relationships',
|
|
}
|
|
|
|
# 메타 시트(데이터 시트)는 스킵
|
|
SKIP_SHEETS = {
|
|
'화물적부도 화물코드',
|
|
'항구별 코드',
|
|
'동의어',
|
|
'IBC CODE',
|
|
'경계선',
|
|
}
|
|
# 지침서 번호 시트(115~171) 패턴: 순수 숫자
|
|
SKIP_PATTERN = re.compile(r'^\d{3}$')
|
|
|
|
# 최소 이미지 크기 (주요 카드만 대상, 작은 아이콘 제외)
|
|
MIN_IMAGE_SIZE = 50_000 # 50 KB
|
|
|
|
|
|
def safe_filename(name: str) -> str:
|
|
name = name.strip().rstrip(',').strip()
|
|
name = re.sub(r'[<>:"/\\|?*]', '_', name)
|
|
return name
|
|
|
|
|
|
def norm_path(p: str) -> str:
|
|
return os.path.normpath(p).replace(os.sep, '/')
|
|
|
|
|
|
def main() -> None:
|
|
print(f'[읽기] {SOURCE_XLSX}')
|
|
if not SOURCE_XLSX.exists():
|
|
raise SystemExit(f'소스 파일 없음: {SOURCE_XLSX}')
|
|
|
|
image_map: dict[str, dict] = {}
|
|
saved = 0
|
|
skipped = 0
|
|
missing = 0
|
|
|
|
with zipfile.ZipFile(SOURCE_XLSX) as z:
|
|
# 1) workbook.xml → sheet 목록
|
|
with z.open('xl/workbook.xml') as f:
|
|
wb_root = ET.parse(f).getroot()
|
|
sheets = []
|
|
for s in wb_root.findall('m:sheets/m:sheet', NS):
|
|
sheets.append({
|
|
'name': s.get('name'),
|
|
'rid': s.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id'),
|
|
})
|
|
with z.open('xl/_rels/workbook.xml.rels') as f:
|
|
rels_root = ET.parse(f).getroot()
|
|
rid_target = {r.get('Id'): r.get('Target') for r in rels_root.findall('pr:Relationship', NS)}
|
|
for s in sheets:
|
|
s['target'] = rid_target.get(s['rid'])
|
|
|
|
print(f'[시트] 총 {len(sheets)}개')
|
|
|
|
for s in sheets:
|
|
name = s['name']
|
|
if name in SKIP_SHEETS or SKIP_PATTERN.match(name or ''):
|
|
skipped += 1
|
|
continue
|
|
|
|
sheet_file = 'xl/' + s['target']
|
|
rels_file = os.path.dirname(sheet_file) + '/_rels/' + os.path.basename(sheet_file) + '.rels'
|
|
try:
|
|
with z.open(rels_file) as f:
|
|
srels = ET.parse(f).getroot()
|
|
except KeyError:
|
|
missing += 1
|
|
continue
|
|
|
|
# 시트 → drawing
|
|
drawing_rel = None
|
|
for r in srels.findall('pr:Relationship', NS):
|
|
t = r.get('Target') or ''
|
|
if 'drawing' in (r.get('Type') or '').lower() and 'drawings/' in t:
|
|
drawing_rel = t
|
|
break
|
|
if not drawing_rel:
|
|
missing += 1
|
|
continue
|
|
|
|
drawing_path = norm_path(os.path.join(os.path.dirname(sheet_file), drawing_rel))
|
|
drawing_rels_path = os.path.dirname(drawing_path) + '/_rels/' + os.path.basename(drawing_path) + '.rels'
|
|
try:
|
|
with z.open(drawing_rels_path) as f:
|
|
drels = ET.parse(f).getroot()
|
|
except KeyError:
|
|
missing += 1
|
|
continue
|
|
|
|
# drawing → images
|
|
image_paths: list[str] = []
|
|
for r in drels.findall('pr:Relationship', NS):
|
|
t = r.get('Target') or ''
|
|
if 'image' in t.lower():
|
|
img_path = norm_path(os.path.join(os.path.dirname(drawing_path), t))
|
|
image_paths.append(img_path)
|
|
if not image_paths:
|
|
missing += 1
|
|
continue
|
|
|
|
# 가장 큰 이미지 선택 (실제 카드 이미지는 100KB+, 아이콘은 수 KB)
|
|
sized = [(z.getinfo(p).file_size, p) for p in image_paths]
|
|
sized.sort(reverse=True)
|
|
largest_size, largest_path = sized[0]
|
|
if largest_size < MIN_IMAGE_SIZE:
|
|
missing += 1
|
|
continue
|
|
|
|
# 저장
|
|
safe = safe_filename(name)
|
|
ext = os.path.splitext(largest_path)[1].lower() or '.png'
|
|
out_name = f'{safe}{ext}'
|
|
out_path = IMG_DIR / out_name
|
|
with z.open(largest_path) as fin, open(out_path, 'wb') as fout:
|
|
fout.write(fin.read())
|
|
image_map[out_name] = {
|
|
'sheetName': name,
|
|
'nameKr': safe,
|
|
'source': largest_path,
|
|
'sizeBytes': largest_size,
|
|
}
|
|
saved += 1
|
|
if saved % 25 == 0:
|
|
print(f' {saved}개 저장 완료')
|
|
|
|
print(f'\n[결과] 저장 {saved} / 스킵(메타) {skipped} / 이미지없음 {missing}')
|
|
|
|
map_path = OUT_DIR / 'image-map.json'
|
|
with open(map_path, 'w', encoding='utf-8') as f:
|
|
json.dump(image_map, f, ensure_ascii=False, indent=2)
|
|
print(f'[완료] 매핑 파일: {map_path}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|