252 lines
8.4 KiB
TypeScript
252 lines
8.4 KiB
TypeScript
/**
|
|
* base.json + ocr.json → frontend/src/data/hnsSubstanceData.json
|
|
*
|
|
* 매칭 키: 국문명(nameKr) 정규화 비교 (공백/특수문자 제거 후 소문자 비교)
|
|
* 병합 규칙: Excel 기본 필드 유지, OCR 결과는 빈 필드만 채움 (OCR이 우선이지 않음)
|
|
* 실제로 물성/위험도 필드는 base.json 에서 대부분 비어있으므로 OCR 값으로 채워짐.
|
|
*/
|
|
import { readFileSync, writeFileSync, existsSync } from 'node:fs';
|
|
import { resolve, dirname } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const OUT_DIR = resolve(__dirname, 'out');
|
|
const BASE_PATH = resolve(OUT_DIR, 'base.json');
|
|
const OCR_PATH = resolve(OUT_DIR, 'ocr.json');
|
|
const TARGET_PATH = resolve(__dirname, '../../../frontend/src/data/hnsSubstanceData.json');
|
|
|
|
function normalizeName(s: string | undefined): string {
|
|
if (!s) return '';
|
|
return s
|
|
.replace(/\s+/g, '')
|
|
.replace(/[,.·/\-_()[\]]/g, '')
|
|
.toLowerCase();
|
|
}
|
|
|
|
interface NfpaBlock {
|
|
health: number;
|
|
fire: number;
|
|
reactivity: number;
|
|
special: string;
|
|
}
|
|
|
|
interface MsdsBlock {
|
|
hazard: string;
|
|
firstAid: string;
|
|
fireFighting: string;
|
|
spillResponse: string;
|
|
exposure: string;
|
|
regulation: string;
|
|
}
|
|
|
|
interface BaseRecord {
|
|
id: number;
|
|
abbreviation: string;
|
|
nameKr: string;
|
|
nameEn: string;
|
|
synonymsEn: string;
|
|
synonymsKr: string;
|
|
unNumber: string;
|
|
casNumber: string;
|
|
transportMethod: string;
|
|
sebc: string;
|
|
usage: string;
|
|
state: string;
|
|
color: string;
|
|
odor: string;
|
|
flashPoint: string;
|
|
autoIgnition: string;
|
|
boilingPoint: string;
|
|
density: string;
|
|
solubility: string;
|
|
vaporPressure: string;
|
|
vaporDensity: string;
|
|
explosionRange: string;
|
|
nfpa: NfpaBlock;
|
|
hazardClass: string;
|
|
ergNumber: string;
|
|
idlh: string;
|
|
aegl2: string;
|
|
erpg2: string;
|
|
responseDistanceFire: string;
|
|
responseDistanceSpillDay: string;
|
|
responseDistanceSpillNight: string;
|
|
marineResponse: string;
|
|
ppeClose: string;
|
|
ppeFar: string;
|
|
msds: MsdsBlock;
|
|
ibcHazard: string;
|
|
ibcShipType: string;
|
|
ibcTankType: string;
|
|
ibcDetection: string;
|
|
ibcFireFighting: string;
|
|
ibcMinRequirement: string;
|
|
emsCode: string;
|
|
emsFire: string;
|
|
emsSpill: string;
|
|
emsFirstAid: string;
|
|
cargoCodes: Array<{ code: string; name: string; company: string; source: string }>;
|
|
portFrequency: Array<{ port: string; portCode: string; lastImport: string; frequency: string }>;
|
|
}
|
|
|
|
interface OcrResult {
|
|
[key: string]: unknown;
|
|
}
|
|
|
|
function firstString(...values: Array<unknown>): string {
|
|
for (const v of values) {
|
|
if (typeof v === 'string' && v.trim().length > 0) return v.trim();
|
|
}
|
|
return '';
|
|
}
|
|
|
|
function pickNfpa(ocr: OcrResult): NfpaBlock | null {
|
|
const n = ocr.nfpa as Partial<NfpaBlock> | undefined;
|
|
if (!n || typeof n !== 'object') return null;
|
|
const h = Number(n.health);
|
|
const f = Number(n.fire);
|
|
const r = Number(n.reactivity);
|
|
if ([h, f, r].some((x) => !Number.isFinite(x))) return null;
|
|
return {
|
|
health: h,
|
|
fire: f,
|
|
reactivity: r,
|
|
special: typeof n.special === 'string' ? n.special : '',
|
|
};
|
|
}
|
|
|
|
function pickMsds(ocr: OcrResult, base: MsdsBlock): MsdsBlock {
|
|
const m = (ocr.msds ?? {}) as Partial<MsdsBlock>;
|
|
return {
|
|
hazard: firstString(base.hazard, m.hazard),
|
|
firstAid: firstString(base.firstAid, m.firstAid),
|
|
fireFighting: firstString(base.fireFighting, m.fireFighting),
|
|
spillResponse: firstString(base.spillResponse, m.spillResponse),
|
|
exposure: firstString(base.exposure, m.exposure),
|
|
regulation: firstString(base.regulation, m.regulation),
|
|
};
|
|
}
|
|
|
|
function merge(base: BaseRecord, ocr: OcrResult | undefined): BaseRecord {
|
|
if (!ocr) return base;
|
|
|
|
const nfpaFromOcr = pickNfpa(ocr);
|
|
|
|
return {
|
|
...base,
|
|
transportMethod: firstString(base.transportMethod, ocr.transportMethod),
|
|
sebc: firstString(base.sebc, ocr.sebc),
|
|
state: firstString(base.state, ocr.state),
|
|
color: firstString(base.color, ocr.color),
|
|
odor: firstString(base.odor, ocr.odor),
|
|
flashPoint: firstString(base.flashPoint, ocr.flashPoint),
|
|
autoIgnition: firstString(base.autoIgnition, ocr.autoIgnition),
|
|
boilingPoint: firstString(base.boilingPoint, ocr.boilingPoint),
|
|
density: firstString(base.density, ocr.density),
|
|
solubility: firstString(base.solubility, ocr.solubility),
|
|
vaporPressure: firstString(base.vaporPressure, ocr.vaporPressure),
|
|
vaporDensity: firstString(base.vaporDensity, ocr.vaporDensity),
|
|
explosionRange: firstString(base.explosionRange, ocr.explosionRange),
|
|
nfpa: nfpaFromOcr ?? base.nfpa,
|
|
hazardClass: firstString(base.hazardClass, ocr.hazardClass),
|
|
ergNumber: firstString(base.ergNumber, ocr.ergNumber),
|
|
idlh: firstString(base.idlh, ocr.idlh),
|
|
aegl2: firstString(base.aegl2, ocr.aegl2),
|
|
erpg2: firstString(base.erpg2, ocr.erpg2),
|
|
responseDistanceFire: firstString(base.responseDistanceFire, ocr.responseDistanceFire),
|
|
responseDistanceSpillDay: firstString(base.responseDistanceSpillDay, ocr.responseDistanceSpillDay),
|
|
responseDistanceSpillNight: firstString(base.responseDistanceSpillNight, ocr.responseDistanceSpillNight),
|
|
marineResponse: firstString(base.marineResponse, ocr.marineResponse),
|
|
ppeClose: firstString(base.ppeClose, ocr.ppeClose),
|
|
ppeFar: firstString(base.ppeFar, ocr.ppeFar),
|
|
msds: pickMsds(ocr, base.msds),
|
|
emsCode: firstString(base.emsCode, ocr.emsCode),
|
|
emsFire: firstString(base.emsFire, ocr.emsFire),
|
|
emsSpill: firstString(base.emsSpill, ocr.emsSpill),
|
|
emsFirstAid: firstString(base.emsFirstAid, ocr.emsFirstAid),
|
|
};
|
|
}
|
|
|
|
function main() {
|
|
if (!existsSync(BASE_PATH)) {
|
|
console.error(`base.json 없음: ${BASE_PATH}`);
|
|
console.error('→ extract-excel.py 를 먼저 실행하세요.');
|
|
process.exit(1);
|
|
}
|
|
if (!existsSync(OCR_PATH)) {
|
|
console.warn(`ocr.json 없음: ${OCR_PATH} — 상세 데이터 없이 base 만 사용`);
|
|
}
|
|
|
|
const base: BaseRecord[] = JSON.parse(readFileSync(BASE_PATH, 'utf-8'));
|
|
const ocr: Record<string, OcrResult> = existsSync(OCR_PATH)
|
|
? JSON.parse(readFileSync(OCR_PATH, 'utf-8'))
|
|
: {};
|
|
|
|
console.log(`[입력] base ${base.length}종, ocr ${Object.keys(ocr).length}종`);
|
|
|
|
// OCR 키를 정규화 인덱스로 변환 (정규화키 → OcrResult, 역매핑 normKey → 원본키)
|
|
const ocrIndex = new Map<string, OcrResult>();
|
|
const normToOrig = new Map<string, string>();
|
|
for (const [key, value] of Object.entries(ocr)) {
|
|
const normKey = normalizeName(key);
|
|
if (normKey) {
|
|
ocrIndex.set(normKey, value);
|
|
normToOrig.set(normKey, key);
|
|
}
|
|
}
|
|
|
|
let matched = 0;
|
|
let matchedBySynonym = 0;
|
|
const unmatched: string[] = [];
|
|
|
|
const merged = base.map((record) => {
|
|
// 1단계: nameKr 정규화 매칭
|
|
const key = normalizeName(record.nameKr);
|
|
const ocrResult = ocrIndex.get(key);
|
|
if (ocrResult) {
|
|
matched++;
|
|
ocrIndex.delete(key);
|
|
return merge(record, ocrResult);
|
|
}
|
|
|
|
// 2단계: synonymsKr 동의어 매칭 (" / " 구분자)
|
|
if (record.synonymsKr) {
|
|
const synonyms = record.synonymsKr.split(' / ');
|
|
for (const syn of synonyms) {
|
|
const normSyn = normalizeName(syn);
|
|
if (!normSyn) continue;
|
|
const synOcrResult = ocrIndex.get(normSyn);
|
|
if (synOcrResult) {
|
|
matched++;
|
|
matchedBySynonym++;
|
|
ocrIndex.delete(normSyn);
|
|
return merge(record, synOcrResult);
|
|
}
|
|
}
|
|
}
|
|
|
|
return record;
|
|
});
|
|
|
|
// 남은 OCR 키는 base에 매칭 실패한 항목 (원본 키로 복원)
|
|
for (const normKey of ocrIndex.keys()) {
|
|
unmatched.push(normToOrig.get(normKey) ?? normKey);
|
|
}
|
|
|
|
console.log(`[병합] base ↔ ocr 매칭 ${matched}종 (nameKr: ${matched - matchedBySynonym}, 동의어: ${matchedBySynonym})`);
|
|
if (unmatched.length > 0) {
|
|
const unmatchedPath = resolve(OUT_DIR, 'merge-unmatched.json');
|
|
writeFileSync(unmatchedPath, JSON.stringify({ count: unmatched.length, keys: unmatched.sort() }, null, 2), 'utf-8');
|
|
console.warn(`[경고] OCR 매칭 실패 ${unmatched.length}개 → ${unmatchedPath}`);
|
|
unmatched.slice(0, 20).forEach((k) => console.warn(` - ${k}`));
|
|
if (unmatched.length > 20) console.warn(` ... +${unmatched.length - 20}`);
|
|
}
|
|
|
|
writeFileSync(TARGET_PATH, JSON.stringify(merged, null, 2), 'utf-8');
|
|
const sizeKb = (JSON.stringify(merged).length / 1024).toFixed(0);
|
|
console.log(`[완료] ${TARGET_PATH} (${sizeKb} KB, ${merged.length}종)`);
|
|
console.log(` 상세 정보 보유: ${merged.filter((r) => r.flashPoint).length}종`);
|
|
}
|
|
|
|
main();
|